Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
jasper2326 authored Sep 12, 2017
1 parent a0dfcf6 commit a2417b8
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 0 deletions.
7 changes: 7 additions & 0 deletions Ch1_crawl_links.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
# author = [email protected]
# -*- coding: cp936 -*-
# coding: cp936

import urllib2
import re
import Ch1_download
30 changes: 30 additions & 0 deletions Ch1_crawl_via_id.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# author = [email protected]
# -*- coding: cp936 -*-
# coding: cp936

import itertools
import re
import Ch1_download

def crawl_via_id_0():
for page in itertools.count(1):
url = 'http://example.webscraping.com/places/default/view/-%d' % page
html = Ch1_download.download_3(url)
if html is None:
break
else:
pass


def crawl_via_id_1():
max_errors = 5
num_errors = 0
for page in itertools.count(1):
url = 'http://example.webscraping.com/places/default/view/-%d' % page
html = Ch1_download.download_3(url)
if html is None:
num_errors += 1
if num_errors == max_errors:
break
else:
num_errors = 0

0 comments on commit a2417b8

Please sign in to comment.