Skip to content

Commit

Permalink
Small tweaks suggested by Brandon Rhodes.
Browse files Browse the repository at this point in the history
- Drop stale() test from recycle_connection().
- Add docstrings to some toplevel things.
- Rename open_http_conn(), get_response() to make_request(), read_response().
- Check for all-digits status.
  • Loading branch information
Guido van Rossum committed Mar 9, 2014
1 parent bfb0157 commit 605111e
Showing 1 changed file with 11 additions and 10 deletions.
21 changes: 11 additions & 10 deletions crawler/crawling.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,6 @@ def recycle_connection(self, conn):
This also prunes the pool if it exceeds the size limits.
"""
if conn.stale():
conn.close()
return

conns = self.connections.setdefault(conn.key, [])
conns.append(conn)
self.queue.append(conn)
Expand Down Expand Up @@ -120,6 +116,7 @@ def recycle_connection(self, conn):


class Connection:
"""A connection that can be recycled to the pool."""

def __init__(self, pool, host, port, ssl):
self.pool = pool
Expand Down Expand Up @@ -153,7 +150,8 @@ def close(self, recycle=False):


@asyncio.coroutine
def open_http_conn(url, pool, *, method='GET', headers=None, version='1.1'):
def make_request(url, pool, *, method='GET', headers=None, version='1.1'):
"""Start an HTTP request. Return a Connection."""
parts = urllib.parse.urlparse(url)
assert parts.scheme in ('http', 'https'), repr(url)
ssl = parts.scheme == 'https'
Expand All @@ -176,11 +174,12 @@ def open_http_conn(url, pool, *, method='GET', headers=None, version='1.1'):
# TODO: close conn if this fails.
conn.writer.write('\r\n'.join(lines + ['', '']).encode('latin-1'))

return conn # Caller must send body if desired, then call get_response().
return conn # Caller must send body if desired, then call read_response().


@asyncio.coroutine
def get_response(conn):
def read_response(conn):
"""Read an HTTP response from a connection."""

@asyncio.coroutine
def getline():
Expand All @@ -190,7 +189,7 @@ def getline():

status_line = yield from getline()
status_parts = status_line.split(None, 2)
if len(status_parts) != 3:
if len(status_parts) != 3 or not status_parts[1].isdigit():
logger.error('bad status_line %r', status_line)
raise BadStatusLine(status_line)
http_version, status, reason = status_parts
Expand Down Expand Up @@ -220,6 +219,7 @@ def getline():

@asyncio.coroutine
def length_handler(nbytes, input, output):
"""Async handler for reading a body given a Content-Length header."""
while nbytes > 0:
buffer = yield from input.read(min(nbytes, 256*1024))
if not buffer:
Expand All @@ -233,6 +233,7 @@ def length_handler(nbytes, input, output):

@asyncio.coroutine
def chunked_handler(input, output):
"""Async handler for reading a body using Transfer-Encoding: chunked."""
logger.info('parsing chunked response')
nblocks = 0
nbytes = 0
Expand Down Expand Up @@ -308,8 +309,8 @@ def fetch(self):
self.tries += 1
conn = None
try:
conn = yield from open_http_conn(self.url, self.crawler.pool)
_, status, _, headers, output = yield from get_response(conn)
conn = yield from make_request(self.url, self.crawler.pool)
_, status, _, headers, output = yield from read_response(conn)
self.status, self.headers = status, headers
self.body = yield from output.read()
h_conn = headers.get('connection', '').lower()
Expand Down

0 comments on commit 605111e

Please sign in to comment.