Skip to content

Commit

Permalink
add delete_empty_dir func make sure that every series of pictures is …
Browse files Browse the repository at this point in the history
…downloaded
  • Loading branch information
chenjiandongx committed May 1, 2017
1 parent 58f1cea commit 856ff66
Showing 1 changed file with 21 additions and 0 deletions.
21 changes: 21 additions & 0 deletions mm_crawler.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,24 @@ def make_dir(folder_name):
return False


def delete_empty_dir(dir):
""" 如果程序半路中断的话,可能存在已经新建好文件夹但是仍没有下载的图片的情况
但此时文件夹已经存在所以会忽略该套图的下载,此时要删除空文件夹 """

if os.path.exists(dir):
if os.path.isdir(dir):
for d in os.listdir(dir):
path = os.path.join(dir, d)
if os.path.isdir(path):
delete_empty_dir(path)

if not os.listdir(dir):
os.rmdir(dir)
print("remove the empty dir: " + dir)
else:
print("Please start your performance!")


lock = threading.Lock() # 全局资源锁

def urls_crawler(url):
Expand Down Expand Up @@ -78,10 +96,13 @@ def urls_crawler(url):

if __name__ == "__main__":


urls = ['http://mmjpg.com/mm/{cnt}'.format(cnt=str(cnt)) for cnt in range(1, 953)]
pool = Pool(processes=cpu_count())
try:
delete_empty_dir(r"E:\mmjpg")
results = pool.map(urls_crawler, urls)
except Exception as exception:
time.sleep(30)
delete_empty_dir(r"E:\mmjpg")
results = pool.map(urls_crawler, urls)

0 comments on commit 856ff66

Please sign in to comment.