Skip to content

Commit

Permalink
add Delete_user.py to delete useless uid
Browse files Browse the repository at this point in the history
  • Loading branch information
starFalll committed Jun 16, 2018
1 parent 592b1db commit 0bf50ed
Show file tree
Hide file tree
Showing 6 changed files with 62 additions and 24 deletions.
14 changes: 11 additions & 3 deletions weibo/Connect_mysql.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,17 @@
#连接数据库
from sqlalchemy import create_engine
from weibo.sina_spider import loadconf_db
from yaml import load

#加载配置
def loadconf_db(file_path):
with open(file_path,'r',encoding='utf-8') as f:
cont=f.read()
cf=load(cont)
return cf

def Connect(file):
conf = loadconf_db(file)
db = conf.get('db')
connect_str = 'mysql+pymysql://' + db['user'] + ':' + db['password'] + '@127.0.0.1:3306/weibo?charset=utf8mb4'
connect_str = 'mysql+pymysql://' + str(db['user']) + ':' + str(db['password']) + '@127.0.0.1:3306/weibo?charset=utf8mb4'
engine = create_engine(connect_str, encoding='utf-8')
return engine
return conf,engine
12 changes: 9 additions & 3 deletions weibo/Create_all.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#创建数据库
from sqlalchemy import create_engine, MetaData,Table, Column, Integer, String, ForeignKey,TEXT
import os
import pymysql

from weibo.sina_spider import loadconf_db
from weibo.Connect_mysql import loadconf_db


"""
创建数据库
Why use utf8mb4 ,not utf8
However, for MySQL versions 5.5.3 on forward, a new MySQL-specific encoding 'utf8mb4' has been introduced.
The rationale for this new encoding is due to the fact that MySQL’s utf-8 encoding
Expand All @@ -28,9 +31,11 @@ def main():
cur.close()
conn.close()

connect_str = 'mysql+pymysql://' + db['user'] + ':' + db['password'] + '@127.0.0.1:3306/weibo?charset=utf8mb4'
connect_str = 'mysql+pymysql://' + str(db['user']) + ':' + str(db['password']) + '@127.0.0.1:3306/weibo?charset=utf8mb4'
engine = create_engine(connect_str, encoding='utf-8')
metadata = MetaData()

#微博用户信息表
WBUser = Table('WBUser', metadata,
Column('userID', Integer, primary_key=True, autoincrement=True), # 主键,自动添加
Column("uid", String(20), unique=True, nullable=False), # 微博用户的uid
Expand All @@ -45,6 +50,7 @@ def main():
Column("Description", String(2500), default='', server_default=''), # 简介
mysql_charset='utf8mb4'
)
#微博用户动态表
WBData = Table('WBData', metadata,
Column('dataID', Integer, primary_key=True, autoincrement=True), # 主键,自动添加
Column('uid', String(20), ForeignKey(WBUser.c.uid), nullable=False), # 外键
Expand Down
33 changes: 33 additions & 0 deletions weibo/Delete_users.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#删除不在conf.yaml配置文件中的微博用户及其动态
from weibo.Connect_mysql import Connect
from sqlalchemy import create_engine, MetaData,Table, select ,delete

def DeleteUsers():
conf,engine = Connect('conf.yaml')
conn = engine.connect()
metadata = MetaData(engine)
WBData = Table('WBData', metadata, autoload=True)
WBUser = Table('WBUser', metadata, autoload=True)
empty = select([WBUser.c.uid])
res = conn.execute(empty)#得到WBUser表中所有的uid
deluid = [] #要删除的uid
uids = conf.get('uids')
uids = list(uids.values())#得到配置文件中的uid
for r in res:
if(int(r[0]) not in uids):
deluid.append(r[0])
for uid in deluid:
exc = WBData.delete().where(WBUser.c.uid==str(uid)) #删除用户动态信息
conn.execute(exc)
exc = WBUser.delete().where(WBUser.c.uid==str(uid))#删除用户个人信息
conn.execute(exc)


conn.close()





if __name__ == '__main__':
DeleteUsers()
7 changes: 2 additions & 5 deletions weibo/conf.yaml
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
#change your mysql user and password
db:
host: 127.0.0.1
port: 3306
user: root
password: password
db_name: weibo
db_type: mysql

#please add your uid
uids:
1: 6073302163
2: 1845675654
3: 1497642751

#please add your account cookies(small count,not big count,becaues may be banned)
cookies:
1: _T_WM=17ddf36185fa05928df85390206cc99e; MLOGIN=0; M_WEIBOCN_PARAMS=uicode%3D10000011%26fid%3D102803; SUB=_2A252Jm93DeRhGeBK7lAV8yfIyzyIHXVV6XE_rDV6PUJbkdBeLVfVkW1NR7ex31922SwVAsr-r8BtHMH-tjyP5bYE; SUHB=079j4ILhM97JcL; SCF=Ai5UyFtcjzJlxDyblLOeU87DbFXQsoI_q8c_61YX8Q7XQY5dGTS4tzUsiYHhR6sJ_eeVuzSj6ve8Tcmz1QU1EOc.; SSOLoginState=1528962855
2: _T_WM=17ddf36185fa05928df85390206cc99e; SUB=_2A252Jm93DeRhGeBK7lAV8yfIyzyIHXVV6XE_rDV6PUJbkdBeLVfVkW1NR7ex31922SwVAsr-r8BtHMH-tjyP5bYE; SUHB=079j4ILhM97JcL; SCF=Ai5UyFtcjzJlxDyblLOeU87DbFXQsoI_q8c_61YX8Q7XQY5dGTS4tzUsiYHhR6sJ_eeVuzSj6ve8Tcmz1QU1EOc.; SSOLoginState=1529033043; ALF=1531625043
Expand All @@ -21,4 +18,4 @@ cookies:
user_agents:
1: 'Mozilla/5.0 (iPhone; CPU iPhone OS 9_1 like Mac OS X) AppleWebKit/601.1.46 (KHTML, like Gecko) Version/9.0 Mobile/13B143 Safari/601.1]'
2: 'Mozilla/5.0 (Linux; Android 5.0; SM-G900P Build/LRX21T) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36'
3: 'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36'
3: 'Mozilla/5.0 (Linux; Android 5.1.1; Nexus 6 Build/LYZ28E) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.23 Mobile Safari/537.36'
3 changes: 2 additions & 1 deletion weibo/data_analysis/Data_analysis.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#根据微博用户动态进行词云和词频分析
import jieba
from wordcloud import WordCloud
from sqlalchemy import create_engine, MetaData,Table, Column, Integer, String, ForeignKey,update,select
Expand Down Expand Up @@ -57,7 +58,7 @@ def word_segmentation(content, stop_words):

#将数据库中的微博动态转化为字符串
def getstr(uid=1845675654):
engine = Connect('../conf.yaml') # 连接数据库
_,engine = Connect('../conf.yaml') # 连接数据库
conn = engine.connect()
metadata = MetaData(engine)
WBData = Table('WBData', metadata, autoload=True)
Expand Down
17 changes: 5 additions & 12 deletions weibo/sina_spider.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
#爬取微博用户资料和动态并保存在数据库中

import requests
import random
import re
import pymysql
from sqlalchemy import create_engine, MetaData,Table, Column, Integer, String, ForeignKey,update
from sqlalchemy.dialects.mysql import insert
from yaml import load
import os
import time
from weibo.Connect_mysql import Connect
"""
手动创建数据库的SQL语句(非必须,如果Create_all.py运行没问题则不用手动创建)
CREATE DATABASE weibo;
Expand Down Expand Up @@ -112,12 +113,6 @@ def getinfo(r,uid,table,conn):
conn.execute(ins)


#加载配置
def loadconf_db(file_path):
with open(file_path,'r',encoding='utf-8') as f:
cont=f.read()
cf=load(cont)
return cf


#获取个人动态信息并导入mysql
Expand Down Expand Up @@ -156,8 +151,7 @@ def getmain(res,uid,table,conn,url,headers,cookie):


def main():
conf = loadconf_db(os.path.abspath('conf.yaml')) # 获取配置文件的内容
db = conf.get('db')
conf,engine = Connect('conf.yaml') # 获取配置文件的内容
uids = conf.get('uids')
cookies = conf.get('cookies')
user_agents = conf.get('user_agents')
Expand All @@ -173,8 +167,7 @@ def main():
cookie = random.choice(cookies)
cookie = getcookies(cookie)

connect_str = 'mysql+pymysql://' + db['user'] + ':' + db['password'] + '@127.0.0.1:3306/weibo?charset=utf8mb4'
engine = create_engine(connect_str, encoding='utf-8')

conn = engine.connect()
metadata = MetaData(engine)
WBUser = Table('WBUser', metadata, autoload=True) # Table Reflection 个人信息表
Expand Down

0 comments on commit 0bf50ed

Please sign in to comment.