-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
143 changed files
with
1,832,710 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,99 @@ | ||
import random | ||
import xlrd | ||
import excelSave as save | ||
import os | ||
import jieba | ||
from snownlp import SnowNLP | ||
|
||
# 插入文本到txt中 | ||
def insertToTxt(content, fileName): | ||
with open(fileName, "a", encoding='utf-8') as file: | ||
file.write(content + ",") | ||
|
||
# 插入xls | ||
def insert_data(elems,sentiments_temp): | ||
path = "test4.xls" | ||
for index,elem in enumerate(elems): | ||
workbook = xlrd.open_workbook(path) # 打开工作簿 | ||
sheets = workbook.sheet_names() # 获取工作簿中的所有表格 | ||
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格 | ||
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数 | ||
rid = rows_old | ||
# 热词 | ||
word = elem[0] | ||
# 词频 | ||
frequency = elem[1] | ||
# 情绪 | ||
emotion = sentiments_temp[index] | ||
|
||
if frequency >= 3 and float(emotion) >= 0.7: | ||
value1 = [ | ||
[rid, word,frequency,emotion], ] | ||
print("当前插入第%d条数据" % rid) | ||
save.write_excel_xls_append_norepeat("test4.xls", value1) | ||
|
||
file = open('test2.txt',errors="ignore",encoding="utf-8") | ||
file_context = file.read() | ||
words1 = jieba.lcut(file_context) # 全模式 | ||
words2 = jieba.lcut_for_search(file_context) # 搜索引擎模式 | ||
|
||
# 统计词频 | ||
data1 = {} | ||
for chara in words1: | ||
if len(chara) < 2: | ||
continue | ||
if chara in data1: | ||
data1[chara] += 1 | ||
else: | ||
data1[chara] = 1 | ||
data1 = sorted(data1.items(), key=lambda x: x[1], reverse=True) # 排序 | ||
|
||
data2 = {} | ||
for chara in words2: | ||
if len(chara) < 2: | ||
continue | ||
if chara in data2: | ||
data2[chara] += 1 | ||
else: | ||
data2[chara] = 1 | ||
data2 = sorted(data2.items(), key=lambda x: x[1], reverse=True) # 排序 | ||
|
||
print("----------") | ||
|
||
# 放存入txt的,词语*频次列表 | ||
txt_context = [] | ||
# 放存入txt的,情感词列表 | ||
sentiments_temp = [] | ||
|
||
print("---褒义---") | ||
print('词语: 频次: 情感值:' ) | ||
for i in data2: | ||
s1 = SnowNLP(i[0]) | ||
sentiments_temp.append(str(round(s1.sentiments,2))) | ||
try: | ||
if i[1] >= 10 and s1.sentiments >= 0.7: | ||
# 打印 | ||
print(i[0] + ' ' + str(i[1]) + ' ' + str(round(s1.sentiments,2))) | ||
# 存txt | ||
for j in range(i[1]): | ||
txt_context.append(str(i[0])) | ||
except ValueError: | ||
pass | ||
|
||
#存txt | ||
random.shuffle(txt_context) | ||
txt_str = '' | ||
for i in txt_context: | ||
txt_str = txt_str + i + "," | ||
insertToTxt(txt_str, 'test3.txt') | ||
print("存txt,ok") | ||
|
||
# 存xls | ||
if os.path.exists("test4.xls"): | ||
print("文件已存在") | ||
else: | ||
print("文件不存在,重新创建") | ||
value_title = [["rid", "热词", "词频", "情感值"],] | ||
save.write_excel_xls("test4.xls", "统计数据", value_title) | ||
insert_data(data2,sentiments_temp) | ||
print("存xls,ok") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
# coding=utf-8 | ||
|
||
import sys | ||
import json | ||
import base64 | ||
import time | ||
|
||
# make it work in both python2 both python3 | ||
IS_PY3 = sys.version_info.major == 3 | ||
if IS_PY3: | ||
from urllib.request import urlopen | ||
from urllib.request import Request | ||
from urllib.error import URLError | ||
from urllib.parse import urlencode | ||
from urllib.parse import quote_plus | ||
# else: | ||
# import urllib2 | ||
# from urllib import quote_plus | ||
# from urllib2 import urlopen | ||
# from urllib2 import Request | ||
# from urllib2 import URLError | ||
# from urllib import urlencode | ||
|
||
# skip https auth | ||
import ssl | ||
ssl._create_default_https_context = ssl._create_unverified_context | ||
|
||
API_KEY = 'v5vb' | ||
SECRET_KEY = 'r8yy' | ||
|
||
COMMENT_TAG_URL = "https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag" | ||
|
||
""" TOKEN start """ | ||
TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token' | ||
|
||
""" | ||
get token | ||
""" | ||
def fetch_token(): | ||
params = {'grant_type': 'client_credentials', | ||
'client_id': API_KEY, | ||
'client_secret': SECRET_KEY} | ||
post_data = urlencode(params) | ||
if (IS_PY3): | ||
post_data = post_data.encode('utf-8') | ||
req = Request(TOKEN_URL, post_data) | ||
try: | ||
f = urlopen(req, timeout=5) | ||
result_str = f.read() | ||
except URLError as err: | ||
print(err) | ||
if (IS_PY3): | ||
result_str = result_str.decode() | ||
|
||
result = json.loads(result_str) | ||
|
||
if ('access_token' in result.keys() and 'scope' in result.keys()): | ||
if not 'brain_all_scope' in result['scope'].split(' '): | ||
print ('please ensure has check the ability') | ||
exit() | ||
return result['access_token'] | ||
else: | ||
print ('please overwrite the correct API_KEY and SECRET_KEY') | ||
exit() | ||
|
||
""" | ||
call remote http server | ||
""" | ||
def make_request(url, comment): | ||
print("---------------------------------------------------") | ||
print("评论文本:") | ||
# print(" " + comment) | ||
print("\n评论观点:") | ||
|
||
response = request(url, json.dumps( | ||
{ | ||
"text": comment, | ||
# 13为3C手机类型评论,其他类别评论请参考 https://ai.baidu.com/docs#/NLP-Apply-API/09fc895f | ||
"type": 11 | ||
})) | ||
|
||
data = json.loads(response) | ||
|
||
if "error_code" not in data or data["error_code"] == 0: | ||
for item in data["items"]: | ||
# 积极的评论观点 | ||
if item["sentiment"] == 2: | ||
print(u" 积极的评论观点: " + item["prop"] + item["adj"]) | ||
# 中性的评论观点 | ||
if item["sentiment"] == 1: | ||
print(u" 中性的评论观点: " + item["prop"] + item["adj"]) | ||
# 消极的评论观点 | ||
if item["sentiment"] == 0: | ||
print(u" 消极的评论观点: " + item["prop"] + item["adj"]) | ||
else: | ||
# print error response | ||
print(response) | ||
|
||
# 防止qps超限 | ||
time.sleep(0.5) | ||
|
||
""" | ||
call remote http server | ||
""" | ||
def request(url, data): | ||
req = Request(url, data.encode('utf-8')) | ||
has_error = False | ||
try: | ||
f = urlopen(req) | ||
result_str = f.read() | ||
if (IS_PY3): | ||
result_str = result_str.decode() | ||
return result_str | ||
except URLError as err: | ||
print(err) | ||
|
||
if __name__ == '__main__': | ||
|
||
# comment1 = "手机已经收到,非常完美超出自己的想象,外观惊艳 黑色高端加外形时尚融为一体比较喜欢的类型。系统流畅优化的很好,操作界面简洁大方好上手。电池用量很满意,快充很不错。相机拍人拍物都美。总而言之一句话很喜欢的宝贝。" | ||
# comment2 = "外观精美大小正合适,做工精细,线条流畅,拍照完美,吃鸡最高画质无压力。连续玩了三个小时掉电百分之二十,电池强劲持久,无明显发热,操作流畅,准备再买一台给老婆生日礼物!" | ||
# comment3 = "大家千万不要在上当了,耗电特别快,手机激活后不支持7天无理由退货,请大家小心购买" | ||
|
||
file = open('test2.txt', errors="ignore", encoding="utf-8") | ||
file_context = file.read() | ||
comment1 = file_context | ||
|
||
# get access token | ||
token = fetch_token() | ||
|
||
# concat url | ||
url = COMMENT_TAG_URL + "?charset=UTF-8&access_token=" + token | ||
|
||
make_request(url, comment1) | ||
# make_request(url, comment2) | ||
# make_request(url, comment3) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
import xlrd | ||
import xlwt | ||
from xlutils.copy import copy | ||
|
||
def write_excel_xls(path, sheet_name, value): | ||
index = len(value) # 获取需要写入数据的行数 | ||
workbook = xlwt.Workbook() # 新建一个工作簿 | ||
sheet = workbook.add_sheet(sheet_name) # 在工作簿中新建一个表格 | ||
for i in range(0, index): | ||
for j in range(0, len(value[i])): | ||
sheet.write(i, j, value[i][j]) # 像表格中写入数据(对应的行和列) | ||
workbook.save(path) # 保存工作簿 | ||
print("xls格式表格写入数据成功!") | ||
|
||
def read_excel_xls(path): | ||
data = [] | ||
workbook = xlrd.open_workbook(path) # 打开工作簿 | ||
sheets = workbook.sheet_names() # 获取工作簿中的所有表格 | ||
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格 | ||
if worksheet.nrows == 1: | ||
print("目前是第一行") | ||
else: | ||
for i in range(1, worksheet.nrows): #从第二行取值 | ||
dataTemp = [] | ||
for j in range(0, worksheet.ncols): | ||
#print(worksheet.cell_value(i, j), "\t", end="") # 逐行逐列读取数据 | ||
dataTemp.append(worksheet.cell_value(i, j)) | ||
data.append(dataTemp) | ||
return data | ||
|
||
def write_excel_xls_append_norepeat(path, value): | ||
workbook = xlrd.open_workbook(path) # 打开工作簿 | ||
sheets = workbook.sheet_names() # 获取工作簿中的所有表格 | ||
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格 | ||
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数 | ||
new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象 | ||
new_worksheet = new_workbook.get_sheet(0) # 获取转化后工作簿中的第一个表格 | ||
rid = 0 | ||
for i in range(0, len(value)): | ||
data = read_excel_xls(path) | ||
data_temp = [] | ||
for m in range(0,len(data)): | ||
data_temp.append(data[m][1:len(data[m])]) | ||
value_temp = [] | ||
for m in range(0,len(value)): | ||
value_temp.append(value[m][1:len(value[m])]) | ||
|
||
if value_temp[i] not in data_temp: | ||
for j in range(0, len(value[i])): | ||
new_worksheet.write(rid+rows_old, j, value[i][j]) # 追加写入数据,注意是从i+rows_old行开始写入 | ||
rid = rid + 1 | ||
new_workbook.save(path) # 保存工作簿 | ||
print("xls格式表格【追加】写入数据成功!") | ||
else: | ||
print("数据重复") |
Oops, something went wrong.