forked from AlexFanw/HUSTER-CS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
3,068 additions
and
81 deletions.
There are no files selected for viewing
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,50 +1,4 @@ | ||
# coding=utf-8 | ||
import pandas as pd | ||
import jieba | ||
import numpy as np | ||
from snownlp import SnowNLP | ||
from snownlp import sentiment | ||
import pandas as pd | ||
import numpy as np | ||
import jieba | ||
|
||
def GetScore(line, score, key): | ||
segs = jieba.lcut(line) | ||
score_list = [score[key.index(x)] for x in segs if (x in key)] | ||
# print(segs) | ||
return sum(score_list) | ||
|
||
|
||
BosonNlpScore = pd.read_csv("bosonnlp//BosonNLP_sentiment_score.txt", sep=" ", names=['key', 'score']) | ||
key = BosonNlpScore['key'].values.tolist() | ||
score = BosonNlpScore['score'].values.tolist() | ||
|
||
sentiment.train('train&test/neg.txt','train&test/pos.txt') | ||
sentiment.save('/Users/alexfan/anaconda3/lib/python3.7/site-packages/snownlp/sentiment/sentiment.marshal') | ||
|
||
''' | ||
获取test.csv中所有的舆论 | ||
''' | ||
test = pd.read_csv("train&test/test.csv") | ||
# print(test) | ||
test_list = test.values.tolist() | ||
test_str = [] | ||
for i in test_list: | ||
test_str.append(i[0]) | ||
|
||
''' | ||
获取test_label.csv的最终结果标识 | ||
''' | ||
test_label = pd.read_csv("train&test/test_label.csv") | ||
test_label = test_label["情感倾向"].values.tolist() | ||
|
||
result = [] | ||
for comment in test_str[0:100]: | ||
#comment_predict = int(GetScore(comment, score, key)) | ||
# if(comment_predict>0):comment_predict=1 | ||
# if(comment_predict<0):comment_predict=-1 | ||
comment = SnowNLP(comment) | ||
result.append(comment.sentiments) | ||
|
||
print(test_label) | ||
print(result) | ||
from splitNegPos import split | ||
if __name__ == '__main__': | ||
splitToTxt = split() | ||
splitToTxt.startSplit(1000,1000,1000) #设置读取的训练集中pos、neg、mid样本的大小 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,34 +1,29 @@ | ||
# coding=utf-8 | ||
import pandas as pd | ||
import jieba | ||
import numpy as np | ||
|
||
all = pd.read_csv("train&test/train.csv") | ||
|
||
pos = all.loc[all['情感倾向']==1] | ||
pos = pos['微博中文内容'].values.tolist() | ||
|
||
neg = all.loc[all['情感倾向']==-1] | ||
neg = neg['微博中文内容'].values.tolist() | ||
|
||
mid = all.loc[all['情感倾向']==0] | ||
mid = mid['微博中文内容'].values.tolist() | ||
|
||
file = open("train&test/pos.txt",'w'); | ||
for comment in pos[0:1000]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
||
file = open("train&test/neg.txt",'w'); | ||
for comment in neg[0:1000]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
||
file = open("train&test/mid.txt",'w'); | ||
for comment in mid: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
class split: | ||
def __init__(self): | ||
self.all = pd.read_csv("train&test/train.csv") | ||
self.pos = (self.all.loc[self.all['情感倾向']==1])['微博中文内容'].values.tolist() | ||
self.neg = (self.all.loc[self.all['情感倾向']==-1])['微博中文内容'].values.tolist() | ||
self.mid = (self.all.loc[self.all['情感倾向']==0])['微博中文内容'].values.tolist() | ||
|
||
def startSplit(self,posnum,negnum,midnum): | ||
file = open("train&test/pos.txt", 'w'); | ||
for comment in self.pos[0:posnum]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
||
file = open("train&test/neg.txt",'w'); | ||
for comment in self.neg[0:negnum]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
||
file = open("train&test/mid.txt",'w'); | ||
for comment in self.mid[0:midnum]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,4 @@ | ||
# coding=utf-8 | ||
import | ||
def main(): | ||
print("Welcome Back,Alex Fan") | ||
|
||
|
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
# coding=utf-8 | ||
import pandas as pd | ||
import jieba | ||
from snownlp import SnowNLP | ||
from snownlp import sentiment | ||
|
||
def GetScore(line, score, key): | ||
segs = jieba.lcut(line) | ||
score_list = [score[key.index(x)] for x in segs if (x in key)] | ||
return sum(score_list) | ||
|
||
def bosonNlp(): | ||
BosonNlpScore = pd.read_csv("bosonnlp//BosonNLP_sentiment_score.txt", sep=" ", names=['key', 'score']) | ||
key = BosonNlpScore['key'].values.tolist() | ||
score = BosonNlpScore['score'].values.tolist() | ||
sentiment.train('train&test/neg.txt','train&test/pos.txt') | ||
sentiment.save('/Users/alexfan/anaconda3/lib/python3.7/site-packages/snownlp/sentiment/sentiment.marshal') | ||
|
||
def test(): | ||
''' | ||
获取test.csv中所有的舆论 | ||
''' | ||
test = pd.read_csv("train&test/test.csv") | ||
# print(test) | ||
test_list = test.values.tolist() | ||
test_str = [] | ||
for i in test_list: | ||
test_str.append(i[0]) | ||
''' | ||
获取test_label.csv的最终结果标识 | ||
''' | ||
test_label = (pd.read_csv("train&test/test_label.csv"))["情感倾向"].values.tolist() | ||
result = [] | ||
for comment in test_str[0:100]: | ||
#comment_predict = int(GetScore(comment, score, key)) | ||
# if(comment_predict>0):comment_predict=1 | ||
# if(comment_predict<0):comment_predict=-1 | ||
comment = SnowNLP(comment) | ||
result.append(comment.sentiments) |