LDA 算法
import jieba
import jieba.analyse as analyse
import gensim
from gensim import corpora, models, similarities# 停用詞表加載方法
def get_stopword_list():# 停用詞表存儲路徑,每一行為一個詞,按行讀取進行加載# 進行編碼轉換確保匹配準確率stop_word_path = './stopword.txt'stopword_list = [sw.replace('\n', '') for sw in open(stop_word_path, encoding='utf-8').readlines()]return stopword_listimport jieba
import jieba.analyse as analyse
import gensim
from gensim import corpora, models, similarities# 停用詞表加載方法
def get_stopword_list():# 停用詞表存儲路徑,每一行為一個詞,按行讀取進行加載# 進行編碼轉換確保匹配準確率stop_word_path = './stopword.txt'stopword_list = [sw.replace('\n', '') for sw in open(stop_word_path, encoding='utf-8').readlines()]return stopword_list# 停用詞
stop_word = get_stopword_list()
text = input()# 分詞
sentences = []
segs = jieba.lcut(text)
segs = list(filter(lambda x: x not in stop_word, segs))
sentences.append(segs)# 構建詞袋模型
dictionary = corpora.Dictionary(sentences)
corpus = [dictionary.doc2bow(sentence) for sentence in sentences]
result = ""
# 任務:使用gensim模塊中的函數構造LDA模型,得出最佳主題詞的分析結果保存到result變量中。
# ********** Begin *********#
lda = gensim.models.ldamodel.LdaModel(corpus=corpus,id2word=dictionary, num_topics=8)
result=lda.print_topic(1, topn=1)# ********** End **********#
print(result.split('*')[1],end="")
一定要評測兩遍
總結
- 上一篇: 九九乘法表代码
- 下一篇: 三因素三水平正交表l9_正交试验在减水剂