'''
sent = [['I','am', 'a', 'student', '.'],['who', 'are', 'you','?'],['my', 'name', 'is', 'student']]
生成词汇集合
'''
def build_vocab(sentences):
# build vocabulary
word_counts = Counter(itertools.chain(*sentences))
# Mapping from index to word
vocabulary_inv = [x[0] for x in word_counts.most_common()] # most_common(n)返回一个TopN列表, 如果n没有被指定,则返回所以元素
# 当所有元素计数值相同时,按照字母序列排序
vocabulary_inv = list((vocabulary_inv))
# Mapping from word to index
vocabulary = {x: i for i, x in enumerate(vocabulary_inv)}
return vocabulary_inv, vocabulary
# Maps sentences and labels to vectors based on a vocabulary
def build_input_data(sentences, labels, vocabulary):
x = np.array([[vocabulary[word] for word in sentence] for sentence in sentences])
y = np.array(labels)
return x, y版权声明:本文为u014221266原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。