#tokenizer function, this will make 3 grams of each query
def get_ngrams(query):
tempQuery = str(query)
ngrams = []
for i in range(0,len(tempQuery)-3):
ngrams.append(tempQuery[i:i+3])
return ngrams
#by zgd
def get_ngrams_zgd(input):
output = {}
n = 3
for i in range(len(input) - n + 1):
ngramTemp = " ".join(input[i:i + n])
if ngramTemp not in output:
output[ngramTemp] = 0
output[ngramTemp] += 1
return output版权声明:本文为Homewm原创文章,遵循CC 4.0 BY-SA版权协议,转载请附上原文出处链接和本声明。