Skip to content

Commit

Permalink
news
Browse files Browse the repository at this point in the history
  • Loading branch information
zkeq committed Jul 9, 2022
1 parent 8b236fc commit 746fc94
Show file tree
Hide file tree
Showing 3 changed files with 95 additions and 0 deletions.
Empty file added api/fast.py
Empty file.
84 changes: 84 additions & 0 deletions api/index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
# coding: utf-8
from bs4 import BeautifulSoup
import requests
import time


def get_zhihu_days(index):
headers = {
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44"
}
base_url = f"https://www.zhihu.com/api/v4/columns/c_1261258401923026944/items?limit=1&offset={index}"
data = requests.get(base_url, headers=headers).json()
html = data['data'][0]['content']
soup = BeautifulSoup(html, 'lxml')
day_news = soup.find_all('p')
final_list = []
news_list = []
for i in day_news:
i = i.text
if i != '':
final_list.append(i)
if '、' in i:
new_str = '、'.join(i.split('、')[1:])
news_list.append(new_str)
final_list[0], final_list[1] = final_list[1], final_list[0]
return final_list, news_list


def get_163_days(index):
list_url = 'https://www.163.com/dy/media/T1603594732083.html'
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/103.0.5060.66 Safari/537.36 Edg/103.0.1264.44",
"realIP": "218.109.147.57"
}

data = requests.get(list_url, headers=headers)
print(data.url)
soup = BeautifulSoup(data.text, 'lxml')
days_list = soup.find_all('a', attrs={"class": "title"})
new_url = days_list[index]['href']
new_data = requests.get(new_url, headers=headers)
soup = BeautifulSoup(new_data.text, 'lxml')
day_news = soup.find('div', attrs={"class": "post_body"})
list_all = str(day_news).split('<br/>')
final_list = []
news_list = []
for i in list_all:
if "<" not in i and ">" not in i and i != '':
i.replace('\u200b', '')
if '、' in i:
new_str = '、'.join(i.split('、')[1:])
news_list.append(new_str)
final_list.append(i)
return final_list, news_list


def main(index, origin):
if origin == 'zhihu':
try:
data, news_list = get_zhihu_days(index)
suc = True
except Exception as e:
data = e
suc = False
news_list = []
else:
try:
data, news_list = get_163_days(index)
suc = True
except Exception as e:
data = e
suc = False
news_list = []
return {
'suc': suc,
'time': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()),
'data': {
'title': data[0],
'date': data[1],
'news': news_list,
'weiyu': data[-1]
},
'all_data': data
}
11 changes: 11 additions & 0 deletions vercel.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
{
"version": 2,
"public": true,
"builds": [
{ "src": "api/fast.py", "use": "@vercel/python" },
{ "src": "api/crawler.py", "use": "@vercel/python" }
],
"routes": [
{"src": "/news", "dest": "api/fast.py"}
]
}

0 comments on commit 746fc94

Please sign in to comment.