Skip to content

Commit

Permalink
add some comment
Browse files Browse the repository at this point in the history
  • Loading branch information
xianhu committed Oct 26, 2016
1 parent 2276933 commit 387136e
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions spider/utilities/util_parse.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ def get_html_content(response, charset=None):
"""
get html content from a response, charset can be None, "utf-8", "gb2312" and "gbk", etc
"""
# get info and content
# get info(response headers) and content
info = response.info()
content = response.read()

# decompress the content by info
# decompress the content by info: Content_Encoding also can be content-encoding, ignore case
content_encoding = info.get("Content-Encoding", failobj="").lower()
content = zlib.decompress(content, zlib.MAX_WBITS | 16) if (content_encoding.find("gzip") >= 0) else (
zlib.decompress(content, zlib.MAX_WBITS) if (content_encoding.find("zlib") >= 0) else (
Expand Down

0 comments on commit 387136e

Please sign in to comment.