机器学习算法Python实现:基于情感词典的文本情感分析
生活随笔
收集整理的這篇文章主要介紹了
机器学习算法Python实现:基于情感词典的文本情感分析
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
# -*- coding:utf-8 -*
#本代碼是在jupyter notebook上實現,author:huzhifei, create time:2018/8/14
#本腳本主要實現了基于python通過已有的情感詞典對文本數據做的情感分析的項目目的#導入對應的包及相關的自定義的jieba詞典
import jieba
import numpy as np
jieba.load_userdict("C:\\Users\\Desktop\\中文分詞詞庫整理\\中文分詞詞庫整理\\百度分詞詞庫.txt") # 打開詞典文件,返回列表
def open_dict(Dict='hahah',path = 'C:\\Users\\Desktop\\Textming\\'):path = path + '%s.txt' %Dictdictionary = open(path, 'r', encoding='utf-8',errors='ignore')dict = []for word in dictionary:word = word.strip('\n')dict.append(word)return dictdef judgeodd(num): #往情感詞前查找否定詞,找完全部否定詞,若數量為奇數,乘以-1,若數量為偶數,乘以1.if num % 2 == 0:return 'even'else:return 'odd'deny_word = open_dict(Dict='deny')#否定詞詞典
posdict = open_dict(Dict='positive')#積極情感詞典
negdict = open_dict(Dict = 'negative')#消極情感詞典degree_word = open_dict(Dict = 'degree',path='C:\\Users\\AAS-1413\\Desktop\\Textming\\')#程度詞詞典#為程度詞設置權重
mostdict = degree_word[degree_word.index('extreme')+1: degree_word.index('very')] #權重4,即在情感前乘以3
verydict = degree_word[degree_word.index('very')+1: degree_word.index('more')] #權重3
moredict = degree_word[degree_word.index('more')+1: degree_word.index('ish')]#權重2
ishdict = degree_word[degree_word.index('ish')+1: degree_word.index('last')]#權重0.5
seg_sentence=[]def sentiment_score_list(data):for i in data:seg_sentence.append(i.replace(' ',','))#去除逗號后的評論數據集#seg_sentence=data.replace(' ',',').split(',')#以逗號分隔count1 = []count2 = []for sen in seg_sentence:#print(sen)# 循環遍歷每一個評論segtmp = jieba.lcut(sen, cut_all=False) # 把句子進行分詞,以列表的形式返回#print(segtmp)i = 0 #記錄掃描到的詞的位置a = 0 #記錄情感詞的位置poscount = 0 # 積極詞的第一次分值poscount2 = 0 # 積極反轉后的分值poscount3 = 0 # 積極詞的最后分值(包括嘆號的分值)negcount = 0negcount2 = 0negcount3 = 0for word in segtmp:if word in posdict: # 判斷詞語是否是積極情感詞poscount +=1c = 0for w in segtmp[a:i]: # 掃描情感詞前的程度詞if w in mostdict:poscount *= 4.0elif w in verydict:poscount *= 3.0elif w in moredict:poscount *= 2.0elif w in ishdict:poscount *= 0.5elif w in deny_word: c+= 1if judgeodd(c) == 'odd': # 掃描情感詞前的否定詞數poscount *= -1.0poscount2 += poscountposcount = 0poscount3 = poscount + poscount2 + poscount3poscount2 = 0else:poscount3 = poscount + poscount2 + poscount3poscount = 0a = i+1elif word in negdict: # 消極情感的分析,與上面一致negcount += 1d = 0for w in segtmp[a:i]:if w in mostdict:negcount *= 4.0elif w in verydict:negcount *= 3.0elif w in moredict:negcount *= 2.0elif w in ishdict:negcount *= 0.5elif w in degree_word:d += 1if judgeodd(d) == 'odd':negcount *= -1.0negcount2 += negcountnegcount = 0negcount3 = negcount + negcount2 + negcount3negcount2 = 0else:negcount3 = negcount + negcount2 + negcount3negcount = 0a = i + 1elif word == '!' or word == '!': # 判斷句子是否有感嘆號for w2 in segtmp[::-1]: # 掃描感嘆號前的情感詞,發現后權值+2,然后退出循環if w2 in posdict:poscount3 += 2elif w2 in negdict:negcount3 += 2else:poscount3 +=0negcount3 +=0breakelse:poscount3=0negcount3=0i += 1# 以下是防止出現負數的情況pos_count = 0neg_count = 0if poscount3 <0 and negcount3 > 0:neg_count += negcount3 - poscount3pos_count = 0elif negcount3 <0 and poscount3 > 0:pos_count = poscount3 - negcount3neg_count = 0elif poscount3 <0 and negcount3 < 0:neg_count = -pos_countpos_count = -neg_countelse:pos_count = poscount3neg_count = negcount3count1.append([pos_count,neg_count]) #返回每條評論打分后的列表#print(count1)count2.append(count1)count1=[]#print(count2)return count2 #返回所有評論打分后的列表def sentiment_score(senti_score_list):#分析完所有評論后,正式對每句評論打情感分#score = []s=''w=''for review in senti_score_list:#senti_score_list#print(review)score_array = np.array(review)#print(score_array)Pos = np.sum(score_array[:,0])#積極總分Neg = np.sum(score_array[:,1])#消極總分AvgPos = np.mean(score_array[:,0])#積極情感均值AvgPos = float('%.lf' % AvgPos)AvgNeg = np.mean(score_array[:, 1])#消極情感均值AvgNeg = float('%.1f' % AvgNeg)StdPos = np.std(score_array[:, 0])#積極情感方差StdPos = float('%.1f' % StdPos)StdNeg = np.std(score_array[:, 1])#消極情感方差StdNeg = float('%.1f' % StdNeg)#s+=([Pos,Neg,AvgPos,AvgNeg,StdPos,StdNeg]))s+='\n'+str([Pos, Neg])#score.append([Pos,Neg])res=Pos-Negif res>0:w+='\n'+'好評'print ('該條評論是:好評')elif res<0:w+='\n'+'差評'print ('該條評論是:差評')else:w+='\n'+'中評'print ('該條評論是:中評')#print(w)return w#讀取要做情感分析的文本
data=open("content.txt","r",errors='ignore')#調用函數做實體分析
sentiment_score(sentiment_score_list(data))#將函數返回結果存入txt中
f=open('s.txt','w',errors='ignore')
f.write(sentiment_score(sentiment_score_list(data)))
f.close()
總結
以上是生活随笔為你收集整理的机器学习算法Python实现:基于情感词典的文本情感分析的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: 机器学习算法Python实现:kmean
- 下一篇: 机器学习算法Python实现:gensi