Skip to content

Commit

Permalink
🎨 [error]网站结构发生变化
Browse files Browse the repository at this point in the history
  • Loading branch information
Hatcat123 committed Aug 6, 2020
1 parent c9a2d82 commit 8c935ed
Showing 1 changed file with 14 additions and 14 deletions.
28 changes: 14 additions & 14 deletions OthertCrawler/0x05quanjing/quanjingwang.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,23 @@
#-*- coding:utf-8 -*-
# -*- coding:utf-8 -*-
# 多线程,自动创建文件夹,每个页面单独存储一个文件夹

import requests
import threading
import os
import queue
import re
import threading
import time
import queue
import os
from bs4 import BeautifulSoup

import requests
from bs4 import BeautifulSoup

string = 'https://www.quanjing.com/category/1286521/'
url_queue = queue.Queue()
pipei = re.compile('lowsrc="(.*?)" m=') #
pipei = re.compile('lowsrc="(.*?)" m=') #


def get_url(page):
for i in range(1, page+1):
url = string +'{}.html'.format(i) #更改网址拼接形式
for i in range(1, page + 1):
url = string + '{}.html'.format(i) # 更改网址拼接形式
url_queue.put(url)
# print(url_queue.queue)

Expand All @@ -29,16 +29,16 @@ def spider(url_queue):
floder_name = floder_count[1]
else:
floder_name = floder_count
os.mkdir('第{0}页'.format(floder_name)) # mkdir只能创建一级目录,makedirs可以创建多级目录,可能是以参数中的‘/’分级
html = requests.get(url=url).text
os.mkdir('第{0}页'.format(floder_name)) # mkdir只能创建一级目录,makedirs可以创建多级目录,可能是以参数中的‘/’分级
html = requests.get(url=url, verify=False).text
soup = BeautifulSoup(html, 'lxml')
ul = soup.find_all(attrs={"class": "gallery_list"})
# print(ul)
lianjies = re.findall(pipei, str(ul)) # 正则匹配必须是字符串类型
lianjies = re.findall(pipei, str(ul)) # 正则匹配必须是字符串类型
i = 1
for lianjie in lianjies:
# print(lianjie)
result = requests.get(url=lianjie).content
result = requests.get(url=lianjie, verify=False).content
with open('第{0}页\{1}.jpg'.format(floder_name, i), 'ab') as f:
f.write(result)
print('第{0}页第{1}张存储完成'.format(floder_name, i))
Expand All @@ -52,7 +52,7 @@ def main():
queue_list = []
queue_count = 3
for i in range(queue_count):
t = threading.Thread(target=spider, args=(url_queue, ))
t = threading.Thread(target=spider, args=(url_queue,))
queue_list.append(t)
for t in queue_list:
t.start()
Expand Down

0 comments on commit 8c935ed

Please sign in to comment.