Skip to content

Commit

Permalink
no message
Browse files Browse the repository at this point in the history
  • Loading branch information
guanjun_xing committed Oct 23, 2019
1 parent 76b7334 commit ec3db39
Show file tree
Hide file tree
Showing 15 changed files with 693 additions and 132 deletions.
14 changes: 14 additions & 0 deletions .idea/deployment.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/dyVideoListCrack.iml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion .idea/misc.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 15 additions & 0 deletions .idea/webServers.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

471 changes: 389 additions & 82 deletions .idea/workspace.xml

Large diffs are not rendered by default.

48 changes: 34 additions & 14 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,12 @@
# -*- coding: utf-8 -*-
import re
import requests
import sys, os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '.')))
import json
import os
from settings import PRO_DIR
print(PRO_DIR)


def get_sig_dytk_headers(uid, ua):
Expand All @@ -12,7 +17,10 @@ def get_sig_dytk_headers(uid, ua):
p2 = r'dytk ?: ?\'(?P<dytk>[0-9a-z]*?)\''
pattern2 = re.compile(p2)
html = requests.get('https://www.douyin.com/share/user/{}/?share_type=link'.format(uid), headers={
'user-agent': ua}).text
'user-agent': ua}, proxies={
'http': 'http://' + '118.190.122.25:10240',
'https': 'http://' + '118.190.122.25:10240'
}).text

tac = pattern1.search(html).group('tac')
dytk = pattern2.search(html).group('dytk')
Expand Down Expand Up @@ -188,37 +196,48 @@ def get_sig_dytk_headers(uid, ua):
"""
s2 = s2.replace('&&&', uid)

with open('/Users/apple/test/loach/loach/test/signature/test2.html', 'w', encoding='utf-8') as fw:
with open(os.path.join(PRO_DIR, 'test/signature/test2.html'), 'w', encoding='utf-8') as fw:
fw.write(s1 + s_tac + s2)
#
from selenium import webdriver
option = webdriver.ChromeOptions()
# option.add_argument('headless')

option.add_argument('start-maximized')
option.add_argument('--disable-dev-shm-usage')
option.add_argument('--disable-extensions')
option.add_argument('--disable-gpu')
option.add_argument('--no-sandbox')
option.add_argument('headless')
option.add_argument('disable-infobars')
option.add_argument('--user-agent={}'.format(ua))
driver = webdriver.Chrome('/Users/apple/test/loach/loach/test/signature/chromedriver', chrome_options=option)
driver = webdriver.Chrome(os.path.join(PRO_DIR, 'opt/chromedriver'), chrome_options=option)

js = "window.open('')"
driver.execute_script(js)
driver.get('file:///Users/apple/test/loach/loach/test/signature/test2.html')
driver.get('file:///'+ PRO_DIR +'/test/signature/test2.html')
# driver.get_cookies()
sig = driver.title
driver.close()
return sig, dytk


def get_all_video(user_id):
def get_all_video(user_id, page=1):
ua = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
sig, dytk = get_sig_dytk_headers(user_id, ua)

has_more = 1
max_cursor = 0
headers = {'user-agent': ua}
headers = {
'user-agent': ua
}
result = []
while has_more == 1:
while page > 0 :
r = requests.get(
'https://www.douyin.com/aweme/v1/aweme/post/?user_id={}&count=21&max_cursor={}&aid=1128&_signature={}&dytk={}'.format(
user_id, max_cursor, sig, dytk), headers=headers)
user_id, max_cursor, sig, dytk), headers=headers, proxies={
'http': 'http://' + '118.190.122.25:10240',
'https': 'http://' + '118.190.122.25:10240'
}
)
if r.status_code == 200:
data = json.loads(r.text)
if data['status_code'] == 0:
Expand All @@ -227,16 +246,17 @@ def get_all_video(user_id):
max_cursor = data['max_cursor']
headers = r.request.headers
result.append(dict([('result_code', 1), ('aweme_list', data['aweme_list'])]))
# time.sleep(1)
continue
if not has_more:
break
else:
result.append(dict([('result_code', 0), ('aweme_list', data['aweme_list'])]))
break
page -= 1

return result


if __name__ == '__main__':
r = get_all_video('6556303280')
r = get_all_video('110563717491')
import json
for item in r:
print(item)
Expand Down
47 changes: 27 additions & 20 deletions dolphin/service/douyin/kol.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,15 @@
import json
import os
import uuid
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
from settings import PRO_DIR, CHROME_DRIVER, PROXIES
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from fake_useragent import UserAgent
from settings import PRO_DIR

ua = UserAgent()
ua = UserAgent(path=os.path.join(PRO_DIR, 'opt/fake_useragent_0.1.10.json'))

class Kol(object):
proxy = Proxy(
Expand All @@ -22,7 +25,6 @@ class Kol(object):


def __init__(self):
self.uid = ''
self.driver=None

def set_up(self):
Expand All @@ -37,18 +39,19 @@ def set_up(self):
self.driver = webdriver.Chrome(CHROME_DRIVER, chrome_options=option)
self.driver.get('https://www.baidu.com')

def set_user(self, uid):
self.uid = uid

def __get_sig_dytk(self):
def __get_sig_dytk(self, uid):
# 获取到 tac 和 dytk
p1 = r'<script>tac=\'(?P<tac>[\W\w]{150,300}?)\'</script>'
pattern1 = re.compile(p1)

p2 = r'dytk ?: ?\'(?P<dytk>[0-9a-z]*?)\''
pattern2 = re.compile(p2)
html = requests.get('https://www.douyin.com/share/user/{}/?share_type=link'.format(self.uid), headers={
'user-agent': self.ua}).text
html = requests.get('https://www.douyin.com/share/user/{}/?share_type=link'.format(uid), headers={
'user-agent': self.ua}, proxies={
'http': 'http://' + '118.190.122.25:10240',
'https': 'http://' + '118.190.122.25:10240'
}).text
tac = pattern1.search(html).group('tac')
dytk = pattern2.search(html).group('dytk')
# print('22',tac,dytk)
Expand Down Expand Up @@ -221,8 +224,8 @@ def __get_sig_dytk(self):
"""
s2 = s2.replace('&&&', self.uid)
file = os.path.join(PRO_DIR, './' + uuid.uuid4().hex + '.html')
s2 = s2.replace('&&&', uid)
file = os.path.join(PRO_DIR, './htmls' + uuid.uuid4().hex + '.html')
with open(file, 'w', encoding='utf-8') as fw:
fw.write(s1 + s_tac + s2)

Expand All @@ -231,17 +234,22 @@ def __get_sig_dytk(self):
os.remove(file)
return sig, dytk

def fetch_all_video(self):
sig, dytk = self.__get_sig_dytk()
def fetch_all_video(self, uid, page=1):
sig, dytk = self.__get_sig_dytk(uid)

has_more = 1
max_cursor = 0
headers = {'user-agent': self.ua}
headers = {
'user-agent': self.ua
}
result = []
while has_more == 1:
while page > 0:
r = requests.get(
'https://www.douyin.com/aweme/v1/aweme/post/?user_id={}&count=21&max_cursor={}&aid=1128&_signature={}&dytk={}'.format(
self.uid, max_cursor, sig, dytk), headers=headers, proxies={'http': PROXIES})
uid, max_cursor, sig, dytk), headers=headers, proxies={
'http': 'http://' + '118.190.122.25:10240',
'https': 'http://' + '118.190.122.25:10240'
}
)
if r.status_code == 200:
data = json.loads(r.text)
if data['status_code'] == 0:
Expand All @@ -250,11 +258,11 @@ def fetch_all_video(self):
max_cursor = data['max_cursor']
headers = r.request.headers
result.append(dict([('result_code', 1), ('aweme_list', data['aweme_list'])]))
# time.sleep(1)
continue
if not has_more:
break
else:
result.append(dict([('result_code', 0), ('aweme_list', data['aweme_list'])]))
break
page -= 1

return result

Expand All @@ -267,6 +275,5 @@ def checkout_user_agent(self):
kol.set_up()

if __name__ == '__main__':
kol.set_user('6556303280')
r = kol.fetch_all_video()
r = kol.fetch_all_video('89852104754')
print(r)
28 changes: 28 additions & 0 deletions dolphin/service/douyin/kolclient.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# -*- coding: utf-8 -*-
import json
import os
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
import thriftpy2 as thriftpy
from thrift_connector import ClientPool, ThriftPyCyClient

from settings import PRO_DIR






kol_thrift = thriftpy.load(os.path.join(PRO_DIR, "./dolphin/service/douyin/data/kol_thrift.thrift"), module_name="kol_thrift_thrift")
connection_pool = ClientPool(
kol_thrift.KolServer,
'192.168.1.94',
7000,
connection_class=ThriftPyCyClient
)


if __name__ == '__main__':
data = connection_pool.fetch_all_works('89852104754')
for item in json.loads(data):
print(item)
21 changes: 11 additions & 10 deletions dolphin/service/douyin/kolserver.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,14 @@
# -*- coding: utf-8 -*-
import json
import os

import sys
import thriftpy
from thriftpy.rpc import make_server
from thriftpy.thrift import TProcessor
from thriftpy.protocol import TCyBinaryProtocolFactory
from thriftpy.transport import TCyBufferedTransportFactory
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../../../')))
import sys
import thriftpy2 as thriftpy
from thriftpy2.rpc import make_server
from thriftpy2.thrift import TProcessor
from thriftpy2.protocol import TCyBinaryProtocolFactory
from thriftpy2.transport import TCyBufferedTransportFactory
from dolphin.service.douyin.kol import kol
from settings import PRO_DIR

Expand All @@ -17,8 +18,7 @@ def ping(self):
return "pong"

def fetch_all_works(self, uid):
kol.set_user(uid)
return json.dumps(kol.fetch_all_video())
return json.dumps(kol.fetch_all_video(uid))

def checkout_user_agent(self):
return kol.checkout_user_agent()
Expand All @@ -30,10 +30,11 @@ def checkout_user_agent(self):
kol_thrift = thriftpy.load(os.path.join(PRO_DIR, './dolphin/service/douyin/data/kol_thrift.thrift'), module_name='kol_thrift_thrift')

app = TProcessor(kol_thrift.KolServer, KolDispatcher())

# server = TProcessor(kol_thrift.KolServer, KolDispatcher())
#
if __name__ == '__main__':
server = make_server(kol_thrift.KolServer, KolDispatcher(), '0.0.0.0', 6000, proto_factory=TCyBinaryProtocolFactory(), trans_factory=TCyBufferedTransportFactory())
server.serve()

# gunicorn_thrift dolphin.service.douyin.kolserver:app -k thriftpy_sync -b 0.0.0.0:6000 -w 4 --thrift-protocol-factory thriftpy.protocol:TCyBinaryProtocolFactory --thrift-transport-factory thriftpy.transport:TCyBufferedTransportFactory --thrift-client-timeout=5
# gunicorn_thrift dolphin.service.douyin.kolserver:app -k thriftpy_sync -b 0.0.0.0:6000 -w 4 --thrift-protocol-factory thriftpy.protocol:TCyBinaryProtocolFactory --thrift-transport-factory thriftpy.transport:TCyBufferedTransportFactory --thrift-client-timeout=5
# gunicorn_thrift dolphin.service.douyin.kolserver:app --bind 0.0.0.0:7000 -w 10 -k thriftpy_gevent --timeout 10 --thrift-protocol-factory thriftpy2.protocol:TCyBinaryProtocolFactory --thrift-transport-factory thriftpy2.transport:TCyBufferedTransportFactory --error-logfile aa.log -D -p douyin_server.pid
Empty file added htmls/1
Empty file.
Binary file added opt/chromedriver_linux64.zip
Binary file not shown.
1 change: 1 addition & 0 deletions opt/fake_useragent_0.1.10.json

Large diffs are not rendered by default.

5 changes: 4 additions & 1 deletion settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,10 @@


#网络代理
PROXIES = 'http:100.98.76.31:1020'
PROXIES = {
'http': 'http://' + '118.190.122.25:10240',
'https': 'http://' + '118.190.122.25:10240'
}

PRO_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.append(PRO_DIR)
Expand Down
8 changes: 5 additions & 3 deletions test/main.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
#coding=utf-8
import os
import thriftpy
import sys
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '../')))
import thriftpy2 as thriftpy
from settings import PRO_DIR
from thriftpy.rpc import make_client
from thriftpy2.rpc import make_client
import json

if __name__ == '__main__':
kol_thrift = thriftpy.load(os.path.join(PRO_DIR, "./dolphin/service/douyin/data/kol_thrift.thrift"), module_name="kol_thrift_thrift")

client = make_client(kol_thrift.KolServer, '127.0.0.1', 6000)
client = make_client(kol_thrift.KolServer, '127.0.0.1', 7000)
data = client.fetch_all_works('58495581596')
for item in json.loads(data):
print(item)
Loading

0 comments on commit ec3db39

Please sign in to comment.