机器学习算法Python实现：基于情感词典的文本情感分析

# -*- coding:utf-8 -*#本代码是在jupyter notebook上实现，author:huzhifei， create time:/8/14#本脚本主要实现了基于python通过已有的情感词典对文本数据做的情感分析的项目目的#导入对应的包及相关的自定义的jieba词典import jiebaimport numpy as npjieba.load_userdict("C:\\Users\\Desktop\\中文分词词库整理\\中文分词词库整理\\百度分词词库.txt") # 打开词典文件，返回列表def open_dict(Dict='hahah',path = 'C:\\Users\\Desktop\\Textming\\'):path = path + '%s.txt' %Dictdictionary = open(path, 'r', encoding='utf-8',errors='ignore')dict = []for word in dictionary:word = word.strip('\n')dict.append(word)return dictdef judgeodd(num): #往情感词前查找否定词，找完全部否定词，若数量为奇数，乘以-1，若数量为偶数，乘以1.if num % 2 == 0:return 'even'else:return 'odd'deny_word = open_dict(Dict='deny')#否定词词典posdict = open_dict(Dict='positive')#积极情感词典negdict = open_dict(Dict = 'negative')#消极情感词典degree_word = open_dict(Dict = 'degree',path='C:\\Users\\AAS-1413\\Desktop\\Textming\\')#程度词词典#为程度词设置权重mostdict = degree_word[degree_word.index('extreme')+1: degree_word.index('very')] #权重4，即在情感前乘以3verydict = degree_word[degree_word.index('very')+1: degree_word.index('more')] #权重3moredict = degree_word[degree_word.index('more')+1: degree_word.index('ish')]#权重2ishdict = degree_word[degree_word.index('ish')+1: degree_word.index('last')]#权重0.5seg_sentence=[]def sentiment_score_list(data):for i in data:seg_sentence.append(i.replace(' ','，'))#去除逗号后的评论数据集#seg_sentence=data.replace(' ','，').split(',')#以逗号分隔count1 = []count2 = []for sen in seg_sentence:#print(sen)# 循环遍历每一个评论segtmp = jieba.lcut(sen, cut_all=False) # 把句子进行分词，以列表的形式返回#print(segtmp)i = 0 #记录扫描到的词的位置a = 0 #记录情感词的位置poscount = 0 # 积极词的第一次分值poscount2 = 0 # 积极反转后的分值poscount3 = 0 # 积极词的最后分值（包括叹号的分值）negcount = 0negcount2 = 0negcount3 = 0for word in segtmp:if word in posdict: # 判断词语是否是积极情感词poscount +=1c = 0for w in segtmp[a:i]: # 扫描情感词前的程度词if w in mostdict:poscount *= 4.0elif w in verydict:poscount *= 3.0elif w in moredict:poscount *= 2.0elif w in ishdict:poscount *= 0.5elif w in deny_word: c+= 1if judgeodd(c) == 'odd': # 扫描情感词前的否定词数poscount *= -1.0poscount2 += poscountposcount = 0poscount3 = poscount + poscount2 + poscount3poscount2 = 0else:poscount3 = poscount + poscount2 + poscount3poscount = 0a = i+1elif word in negdict: # 消极情感的分析，与上面一致negcount += 1d = 0for w in segtmp[a:i]:if w in mostdict:negcount *= 4.0elif w in verydict:negcount *= 3.0elif w in moredict:negcount *= 2.0elif w in ishdict:negcount *= 0.5elif w in degree_word:d += 1if judgeodd(d) == 'odd':negcount *= -1.0negcount2 += negcountnegcount = 0negcount3 = negcount + negcount2 + negcount3negcount2 = 0else:negcount3 = negcount + negcount2 + negcount3negcount = 0a = i + 1elif word == '！' or word == '!': # 判断句子是否有感叹号for w2 in segtmp[::-1]: # 扫描感叹号前的情感词，发现后权值+2，然后退出循环if w2 in posdict:poscount3 += 2elif w2 in negdict:negcount3 += 2else:poscount3 +=0negcount3 +=0breakelse:poscount3=0negcount3=0i += 1# 以下是防止出现负数的情况pos_count = 0neg_count = 0if poscount3 <0 and negcount3 > 0:neg_count += negcount3 - poscount3pos_count = 0elif negcount3 <0 and poscount3 > 0:pos_count = poscount3 - negcount3neg_count = 0elif poscount3 <0 and negcount3 < 0:neg_count = -pos_countpos_count = -neg_countelse:pos_count = poscount3neg_count = negcount3count1.append([pos_count,neg_count]) #返回每条评论打分后的列表#print(count1)count2.append(count1)count1=[]#print(count2)return count2 #返回所有评论打分后的列表def sentiment_score(senti_score_list):#分析完所有评论后，正式对每句评论打情感分#score = []s=''w=''for review in senti_score_list:#senti_score_list#print(review)score_array = np.array(review)#print(score_array)Pos = np.sum(score_array[:,0])#积极总分Neg = np.sum(score_array[:,1])#消极总分AvgPos = np.mean(score_array[:,0])#积极情感均值AvgPos = float('%.lf' % AvgPos)AvgNeg = np.mean(score_array[:, 1])#消极情感均值AvgNeg = float('%.1f' % AvgNeg)StdPos = np.std(score_array[:, 0])#积极情感方差StdPos = float('%.1f' % StdPos)StdNeg = np.std(score_array[:, 1])#消极情感方差StdNeg = float('%.1f' % StdNeg)#s+=([Pos,Neg,AvgPos,AvgNeg,StdPos,StdNeg]))s+='\n'+str([Pos, Neg])#score.append([Pos,Neg])res=Pos-Negif res>0:w+='\n'+'好评'print ('该条评论是：好评')elif res<0:w+='\n'+'差评'print ('该条评论是：差评')else:w+='\n'+'中评'print ('该条评论是：中评')#print(w)return w#读取要做情感分析的文本data=open("content.txt","r",errors='ignore')#调用函数做实体分析sentiment_score(sentiment_score_list(data))#将函数返回结果存入txt中f=open('s.txt','w',errors='ignore')f.write(sentiment_score(sentiment_score_list(data)))f.close()