我想使用 word2vectors 计算两个句子之间的相似度,我试图获取一个句子的向量,以便我可以计算句子向量的平均值以找到余弦相似度。我已经尝试过这段代码,但它不起作用。它的输出给出带有 1 的句子向量。我想要句子_1_avg_向量和句子_2_avg_向量中句子的实际向量。
代码:
#DataSet#
sent1=[['What', 'step', 'step', 'guide', 'invest', 'share', 'market', 'india'],['What', 'story', 'Kohinoor', 'KohiNoor', 'Diamond']]
sent2=[['What', 'step', 'step', 'guide', 'invest', 'share', 'market'],['What', 'would', 'happen', 'Indian', 'government', 'stole', 'Kohinoor', 'KohiNoor', 'diamond', 'back']]
sentences=sent1+sent2
#''''Applying Word2vec''''#
word2vec_model=gensim.models.Word2Vec(sentences, size=100, min_count=5)
bin_file="vecmodel.csv"
word2vec_model.wv.save_word2vec_format(bin_file,binary=False)
#''''Making Sentence Vectors''''#
def avg_feature_vector(words, model, num_features, index2word_set):
#function to average all words vectors in a given paragraph
featureVec = np.ones((num_features,), dtype="float32")
#print(featureVec)
nwords = 0
#list containing names of words in the vocabulary
index2word_set = set(model.wv.index2word)# …Run Code Online (Sandbox Code Playgroud)