我正在使用 LDA 进行主题建模。
从 sklearn.decomposition 导入 LatentDirichletAllocation
我使用一组 10 个文件制作了模型。现在,我尝试将其分为 3 个。
类似如下:
'''
import numpy as np
data = []
a1 = " a word in groupa doca"
a2 = " a word in groupa docb"
a3 = "a word in groupb docc"
a4 = "a word in groupc docd"
a5 ="a word in groupc doce"
data = [a1,a2,a3,a4,a5]
del a1,a2,a3,a4,a5
NO_DOCUMENTS = len(data)
print(NO_DOCUMENTS)
from sklearn.decomposition import LatentDirichletAllocation
from sklearn.feature_extraction.text import CountVectorizer
NUM_TOPICS = 2
vectorizer = CountVectorizer(min_df=0.001, …Run Code Online (Sandbox Code Playgroud)