生活随笔
收集整理的這篇文章主要介紹了
《少年的你》短评情感分析——机器学习之逻辑回归
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
原文網址:
https://segmentfault.com/a/1190000021947908
import pandas as pd
import jieba
import re
#邏輯回歸建模需要的庫
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.linear_model import LogisticRegression
import numpy as np
from pandas import DataFramedf1 = [{"name":"整兒錢小姐","short":"少年的你值得一看"}]
df2 = [{"rating":[('50','力薦')]}]
data = pd.merge(df1,df2,how = 'outer')
print(data.shape)#劃分等級
def rating(e):if '50' in e:return 5if '40' in e:return 4if '30' in e:return 3if '20' in e:return 2if '10' in e:return 1data['new_rating'] = data['rating'].map(rating)
print(data.head())#剔除中性的評價
new_data = data[data['new_rating'] != 3]
new_data['sentiment'] = new_data['new_rating'].apply(lambda x : +1 if x>3 else -1)print(new_data['sentiment'].value_counts())#分詞
def cut_word(text):text = jieba.cut(str(text), cut_all = False)return " ".join(text)
new_data['new_short'] = new_data['short'].apply(cut_word)#刪除數字
def remove_num(new_short):return re.sub(r'\d+','',new_short)#刪除字母
def remove_word(new_short):return re.sub(r'[a-z]+','',new_short)new_data['new_short'] = new_data['new_short'].apply(remove_num)
new_data['new_short'] = new_data['new_short'].apply(remove_word)#邏輯回歸分析與建模
#第一步需要對分析好的數據進行數據劃分,分為訓練集和測試集
train_data, test_data = train_test_split(new_data, train_size = 0.8,random_stat=0)#文本提取
transfer = CountVectorizer()
train_word = transfer.fit_transform(train_data['new_short'])
test_word = transfer.transform(test_data['new_short'])#稀疏矩陣
print('new_data:\n', train_word.toarray())#特征值
print('feature_name:\n',transfer.get_feature_names())#第二步對分詞后的文本進行特征提取,可以生成一個對應的稀疏矩陣,并且得到稀疏矩陣對應的特征值
#第三步利用邏輯回歸建模,即讓訓練集中的特征值和目標值進行擬合,從而生成一個模型
x_train, x_test,y_train,y_test = train_test_split(new_data['new_short'],new_data['sentiment'],train_size = 0.8, random_state = 0)
x_train = train_word
x_test = test_word
model = LogisticRegression()
model.fit(x_train,y_train)
y_predict = model.predict(x_test)
print('布爾比對:\n',y_predict==y_test)
score = model.score(x_test,y_test)
print('模型準確率:\n',score)example = test_data[50:55]
example[['short','new_rating','sentiment']]possibility = model.predict_proba(test_word)[:,1]
test_data.loc[:,'possibility'] = possibility
print(test_data.head())
總結
以上是生活随笔為你收集整理的《少年的你》短评情感分析——机器学习之逻辑回归的全部內容,希望文章能夠幫你解決所遇到的問題。
如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。