forked from AlexFanw/HUSTER-CS
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
14 changed files
with
121,642 additions
and
2 deletions.
There are no files selected for viewing
Binary file not shown.
Empty file.
104,546 changes: 104,546 additions & 0 deletions
104,546
机器学习/结课项目/疫情期间网民情绪识别/covid19/bosonnlp/BosonNLP_sentiment_score.txt
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,50 @@ | ||
import json | ||
print("alex") | ||
# coding=utf-8 | ||
import pandas as pd | ||
import jieba | ||
import numpy as np | ||
from snownlp import SnowNLP | ||
from snownlp import sentiment | ||
import pandas as pd | ||
import numpy as np | ||
import jieba | ||
|
||
def GetScore(line, score, key): | ||
segs = jieba.lcut(line) | ||
score_list = [score[key.index(x)] for x in segs if (x in key)] | ||
# print(segs) | ||
return sum(score_list) | ||
|
||
|
||
BosonNlpScore = pd.read_csv("bosonnlp//BosonNLP_sentiment_score.txt", sep=" ", names=['key', 'score']) | ||
key = BosonNlpScore['key'].values.tolist() | ||
score = BosonNlpScore['score'].values.tolist() | ||
|
||
sentiment.train('train&test/neg.txt','train&test/pos.txt') | ||
sentiment.save('/Users/alexfan/anaconda3/lib/python3.7/site-packages/snownlp/sentiment/sentiment.marshal') | ||
|
||
''' | ||
获取test.csv中所有的舆论 | ||
''' | ||
test = pd.read_csv("train&test/test.csv") | ||
# print(test) | ||
test_list = test.values.tolist() | ||
test_str = [] | ||
for i in test_list: | ||
test_str.append(i[0]) | ||
|
||
''' | ||
获取test_label.csv的最终结果标识 | ||
''' | ||
test_label = pd.read_csv("train&test/test_label.csv") | ||
test_label = test_label["情感倾向"].values.tolist() | ||
|
||
result = [] | ||
for comment in test_str[0:100]: | ||
#comment_predict = int(GetScore(comment, score, key)) | ||
# if(comment_predict>0):comment_predict=1 | ||
# if(comment_predict<0):comment_predict=-1 | ||
comment = SnowNLP(comment) | ||
result.append(comment.sentiments) | ||
|
||
print(test_label) | ||
print(result) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
# coding=utf-8 | ||
import pandas as pd | ||
import jieba | ||
import numpy as np | ||
|
||
all = pd.read_csv("train&test/train.csv") | ||
|
||
pos = all.loc[all['情感倾向']==1] | ||
pos = pos['微博中文内容'].values.tolist() | ||
|
||
neg = all.loc[all['情感倾向']==-1] | ||
neg = neg['微博中文内容'].values.tolist() | ||
|
||
mid = all.loc[all['情感倾向']==0] | ||
mid = mid['微博中文内容'].values.tolist() | ||
|
||
file = open("train&test/pos.txt",'w'); | ||
for comment in pos[0:1000]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
||
file = open("train&test/neg.txt",'w'); | ||
for comment in neg[0:1000]: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
||
file = open("train&test/mid.txt",'w'); | ||
for comment in mid: | ||
file.write(comment) | ||
file.write('\n') | ||
file.close() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
# coding=utf-8 | ||
def main(): | ||
print("Welcome Back,Alex Fan") | ||
|
||
if __name__ == '__main__': | ||
main() |
15,000 changes: 15,000 additions & 0 deletions
15,000
机器学习/结课项目/疫情期间网民情绪识别/covid19/train&test/mid.txt
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
File renamed without changes.
File renamed without changes.
File renamed without changes.
Binary file not shown.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters