Skip to content

Commit

Permalink
🚀 机器学习7.9
Browse files Browse the repository at this point in the history
  • Loading branch information
AlexFanw committed Jul 8, 2020
1 parent 4c17cd8 commit 655551d
Show file tree
Hide file tree
Showing 8 changed files with 3,068 additions and 81 deletions.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,50 +1,4 @@
# coding=utf-8
import pandas as pd
import jieba
import numpy as np
from snownlp import SnowNLP
from snownlp import sentiment
import pandas as pd
import numpy as np
import jieba

def GetScore(line, score, key):
segs = jieba.lcut(line)
score_list = [score[key.index(x)] for x in segs if (x in key)]
# print(segs)
return sum(score_list)


BosonNlpScore = pd.read_csv("bosonnlp//BosonNLP_sentiment_score.txt", sep=" ", names=['key', 'score'])
key = BosonNlpScore['key'].values.tolist()
score = BosonNlpScore['score'].values.tolist()

sentiment.train('train&test/neg.txt','train&test/pos.txt')
sentiment.save('/Users/alexfan/anaconda3/lib/python3.7/site-packages/snownlp/sentiment/sentiment.marshal')

'''
获取test.csv中所有的舆论
'''
test = pd.read_csv("train&test/test.csv")
# print(test)
test_list = test.values.tolist()
test_str = []
for i in test_list:
test_str.append(i[0])

'''
获取test_label.csv的最终结果标识
'''
test_label = pd.read_csv("train&test/test_label.csv")
test_label = test_label["情感倾向"].values.tolist()

result = []
for comment in test_str[0:100]:
#comment_predict = int(GetScore(comment, score, key))
# if(comment_predict>0):comment_predict=1
# if(comment_predict<0):comment_predict=-1
comment = SnowNLP(comment)
result.append(comment.sentiments)

print(test_label)
print(result)
from splitNegPos import split
if __name__ == '__main__':
splitToTxt = split()
splitToTxt.startSplit(1000,1000,1000) #设置读取的训练集中pos、neg、mid样本的大小
Original file line number Diff line number Diff line change
@@ -1,34 +1,29 @@
# coding=utf-8
import pandas as pd
import jieba
import numpy as np

all = pd.read_csv("train&test/train.csv")

pos = all.loc[all['情感倾向']==1]
pos = pos['微博中文内容'].values.tolist()

neg = all.loc[all['情感倾向']==-1]
neg = neg['微博中文内容'].values.tolist()

mid = all.loc[all['情感倾向']==0]
mid = mid['微博中文内容'].values.tolist()

file = open("train&test/pos.txt",'w');
for comment in pos[0:1000]:
file.write(comment)
file.write('\n')
file.close()

file = open("train&test/neg.txt",'w');
for comment in neg[0:1000]:
file.write(comment)
file.write('\n')
file.close()

file = open("train&test/mid.txt",'w');
for comment in mid:
file.write(comment)
file.write('\n')
file.close()
class split:
def __init__(self):
self.all = pd.read_csv("train&test/train.csv")
self.pos = (self.all.loc[self.all['情感倾向']==1])['微博中文内容'].values.tolist()
self.neg = (self.all.loc[self.all['情感倾向']==-1])['微博中文内容'].values.tolist()
self.mid = (self.all.loc[self.all['情感倾向']==0])['微博中文内容'].values.tolist()

def startSplit(self,posnum,negnum,midnum):
file = open("train&test/pos.txt", 'w');
for comment in self.pos[0:posnum]:
file.write(comment)
file.write('\n')
file.close()

file = open("train&test/neg.txt",'w');
for comment in self.neg[0:negnum]:
file.write(comment)
file.write('\n')
file.close()

file = open("train&test/mid.txt",'w');
for comment in self.mid[0:midnum]:
file.write(comment)
file.write('\n')
file.close()

Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# coding=utf-8
import
def main():
print("Welcome Back,Alex Fan")

Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# coding=utf-8
import pandas as pd
import jieba
from snownlp import SnowNLP
from snownlp import sentiment

def GetScore(line, score, key):
segs = jieba.lcut(line)
score_list = [score[key.index(x)] for x in segs if (x in key)]
return sum(score_list)

def bosonNlp():
BosonNlpScore = pd.read_csv("bosonnlp//BosonNLP_sentiment_score.txt", sep=" ", names=['key', 'score'])
key = BosonNlpScore['key'].values.tolist()
score = BosonNlpScore['score'].values.tolist()
sentiment.train('train&test/neg.txt','train&test/pos.txt')
sentiment.save('/Users/alexfan/anaconda3/lib/python3.7/site-packages/snownlp/sentiment/sentiment.marshal')

def test():
'''
获取test.csv中所有的舆论
'''
test = pd.read_csv("train&test/test.csv")
# print(test)
test_list = test.values.tolist()
test_str = []
for i in test_list:
test_str.append(i[0])
'''
获取test_label.csv的最终结果标识
'''
test_label = (pd.read_csv("train&test/test_label.csv"))["情感倾向"].values.tolist()
result = []
for comment in test_str[0:100]:
#comment_predict = int(GetScore(comment, score, key))
# if(comment_predict>0):comment_predict=1
# if(comment_predict<0):comment_predict=-1
comment = SnowNLP(comment)
result.append(comment.sentiments)

0 comments on commit 655551d

Please sign in to comment.