Skip to content

Commit

Permalink
upload
Browse files Browse the repository at this point in the history
  • Loading branch information
Feuoy committed Mar 9, 2020
1 parent fcafd28 commit e2bd25c
Show file tree
Hide file tree
Showing 143 changed files with 1,832,710 additions and 0 deletions.
99 changes: 99 additions & 0 deletions analysis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import random
import xlrd
import excelSave as save
import os
import jieba
from snownlp import SnowNLP

# 插入文本到txt中
def insertToTxt(content, fileName):
with open(fileName, "a", encoding='utf-8') as file:
file.write(content + ",")

# 插入xls
def insert_data(elems,sentiments_temp):
path = "test4.xls"
for index,elem in enumerate(elems):
workbook = xlrd.open_workbook(path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数
rid = rows_old
# 热词
word = elem[0]
# 词频
frequency = elem[1]
# 情绪
emotion = sentiments_temp[index]

if frequency >= 3 and float(emotion) >= 0.7:
value1 = [
[rid, word,frequency,emotion], ]
print("当前插入第%d条数据" % rid)
save.write_excel_xls_append_norepeat("test4.xls", value1)

file = open('test2.txt',errors="ignore",encoding="utf-8")
file_context = file.read()
words1 = jieba.lcut(file_context) # 全模式
words2 = jieba.lcut_for_search(file_context) # 搜索引擎模式

# 统计词频
data1 = {}
for chara in words1:
if len(chara) < 2:
continue
if chara in data1:
data1[chara] += 1
else:
data1[chara] = 1
data1 = sorted(data1.items(), key=lambda x: x[1], reverse=True) # 排序

data2 = {}
for chara in words2:
if len(chara) < 2:
continue
if chara in data2:
data2[chara] += 1
else:
data2[chara] = 1
data2 = sorted(data2.items(), key=lambda x: x[1], reverse=True) # 排序

print("----------")

# 放存入txt的,词语*频次列表
txt_context = []
# 放存入txt的,情感词列表
sentiments_temp = []

print("---褒义---")
print('词语: 频次: 情感值:' )
for i in data2:
s1 = SnowNLP(i[0])
sentiments_temp.append(str(round(s1.sentiments,2)))
try:
if i[1] >= 10 and s1.sentiments >= 0.7:
# 打印
print(i[0] + ' ' + str(i[1]) + ' ' + str(round(s1.sentiments,2)))
# 存txt
for j in range(i[1]):
txt_context.append(str(i[0]))
except ValueError:
pass

#存txt
random.shuffle(txt_context)
txt_str = ''
for i in txt_context:
txt_str = txt_str + i + ","
insertToTxt(txt_str, 'test3.txt')
print("存txt,ok")

# 存xls
if os.path.exists("test4.xls"):
print("文件已存在")
else:
print("文件不存在,重新创建")
value_title = [["rid", "热词", "词频", "情感值"],]
save.write_excel_xls("test4.xls", "统计数据", value_title)
insert_data(data2,sentiments_temp)
print("存xls,ok")
135 changes: 135 additions & 0 deletions baiduNLP.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
# coding=utf-8

import sys
import json
import base64
import time

# make it work in both python2 both python3
IS_PY3 = sys.version_info.major == 3
if IS_PY3:
from urllib.request import urlopen
from urllib.request import Request
from urllib.error import URLError
from urllib.parse import urlencode
from urllib.parse import quote_plus
# else:
# import urllib2
# from urllib import quote_plus
# from urllib2 import urlopen
# from urllib2 import Request
# from urllib2 import URLError
# from urllib import urlencode

# skip https auth
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

API_KEY = 'v5vb'
SECRET_KEY = 'r8yy'

COMMENT_TAG_URL = "https://aip.baidubce.com/rpc/2.0/nlp/v2/comment_tag"

""" TOKEN start """
TOKEN_URL = 'https://aip.baidubce.com/oauth/2.0/token'

"""
get token
"""
def fetch_token():
params = {'grant_type': 'client_credentials',
'client_id': API_KEY,
'client_secret': SECRET_KEY}
post_data = urlencode(params)
if (IS_PY3):
post_data = post_data.encode('utf-8')
req = Request(TOKEN_URL, post_data)
try:
f = urlopen(req, timeout=5)
result_str = f.read()
except URLError as err:
print(err)
if (IS_PY3):
result_str = result_str.decode()

result = json.loads(result_str)

if ('access_token' in result.keys() and 'scope' in result.keys()):
if not 'brain_all_scope' in result['scope'].split(' '):
print ('please ensure has check the ability')
exit()
return result['access_token']
else:
print ('please overwrite the correct API_KEY and SECRET_KEY')
exit()

"""
call remote http server
"""
def make_request(url, comment):
print("---------------------------------------------------")
print("评论文本:")
# print(" " + comment)
print("\n评论观点:")

response = request(url, json.dumps(
{
"text": comment,
# 13为3C手机类型评论,其他类别评论请参考 https://ai.baidu.com/docs#/NLP-Apply-API/09fc895f
"type": 11
}))

data = json.loads(response)

if "error_code" not in data or data["error_code"] == 0:
for item in data["items"]:
# 积极的评论观点
if item["sentiment"] == 2:
print(u" 积极的评论观点: " + item["prop"] + item["adj"])
# 中性的评论观点
if item["sentiment"] == 1:
print(u" 中性的评论观点: " + item["prop"] + item["adj"])
# 消极的评论观点
if item["sentiment"] == 0:
print(u" 消极的评论观点: " + item["prop"] + item["adj"])
else:
# print error response
print(response)

# 防止qps超限
time.sleep(0.5)

"""
call remote http server
"""
def request(url, data):
req = Request(url, data.encode('utf-8'))
has_error = False
try:
f = urlopen(req)
result_str = f.read()
if (IS_PY3):
result_str = result_str.decode()
return result_str
except URLError as err:
print(err)

if __name__ == '__main__':

# comment1 = "手机已经收到,非常完美超出自己的想象,外观惊艳 黑色高端加外形时尚融为一体比较喜欢的类型。系统流畅优化的很好,操作界面简洁大方好上手。电池用量很满意,快充很不错。相机拍人拍物都美。总而言之一句话很喜欢的宝贝。"
# comment2 = "外观精美大小正合适,做工精细,线条流畅,拍照完美,吃鸡最高画质无压力。连续玩了三个小时掉电百分之二十,电池强劲持久,无明显发热,操作流畅,准备再买一台给老婆生日礼物!"
# comment3 = "大家千万不要在上当了,耗电特别快,手机激活后不支持7天无理由退货,请大家小心购买"

file = open('test2.txt', errors="ignore", encoding="utf-8")
file_context = file.read()
comment1 = file_context

# get access token
token = fetch_token()

# concat url
url = COMMENT_TAG_URL + "?charset=UTF-8&access_token=" + token

make_request(url, comment1)
# make_request(url, comment2)
# make_request(url, comment3)
55 changes: 55 additions & 0 deletions excelSave.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import xlrd
import xlwt
from xlutils.copy import copy

def write_excel_xls(path, sheet_name, value):
index = len(value) # 获取需要写入数据的行数
workbook = xlwt.Workbook() # 新建一个工作簿
sheet = workbook.add_sheet(sheet_name) # 在工作簿中新建一个表格
for i in range(0, index):
for j in range(0, len(value[i])):
sheet.write(i, j, value[i][j]) # 像表格中写入数据(对应的行和列)
workbook.save(path) # 保存工作簿
print("xls格式表格写入数据成功!")

def read_excel_xls(path):
data = []
workbook = xlrd.open_workbook(path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
if worksheet.nrows == 1:
print("目前是第一行")
else:
for i in range(1, worksheet.nrows): #从第二行取值
dataTemp = []
for j in range(0, worksheet.ncols):
#print(worksheet.cell_value(i, j), "\t", end="") # 逐行逐列读取数据
dataTemp.append(worksheet.cell_value(i, j))
data.append(dataTemp)
return data

def write_excel_xls_append_norepeat(path, value):
workbook = xlrd.open_workbook(path) # 打开工作簿
sheets = workbook.sheet_names() # 获取工作簿中的所有表格
worksheet = workbook.sheet_by_name(sheets[0]) # 获取工作簿中所有表格中的的第一个表格
rows_old = worksheet.nrows # 获取表格中已存在的数据的行数
new_workbook = copy(workbook) # 将xlrd对象拷贝转化为xlwt对象
new_worksheet = new_workbook.get_sheet(0) # 获取转化后工作簿中的第一个表格
rid = 0
for i in range(0, len(value)):
data = read_excel_xls(path)
data_temp = []
for m in range(0,len(data)):
data_temp.append(data[m][1:len(data[m])])
value_temp = []
for m in range(0,len(value)):
value_temp.append(value[m][1:len(value[m])])

if value_temp[i] not in data_temp:
for j in range(0, len(value[i])):
new_worksheet.write(rid+rows_old, j, value[i][j]) # 追加写入数据,注意是从i+rows_old行开始写入
rid = rid + 1
new_workbook.save(path) # 保存工作簿
print("xls格式表格【追加】写入数据成功!")
else:
print("数据重复")
Loading

0 comments on commit e2bd25c

Please sign in to comment.