Skip to content

Commit

Permalink
修改
Browse files Browse the repository at this point in the history
  • Loading branch information
qiyeboy committed Nov 14, 2017
1 parent ae29acd commit 5a1db42
Show file tree
Hide file tree
Showing 7 changed files with 12 additions and 9 deletions.
7 changes: 5 additions & 2 deletions ch01/1.4.1.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,14 @@ def run_proc(name):
print 'Child process %s (%s) Running...' % (name, os.getpid())
if __name__ == '__main__':
print 'Parent process %s.' % os.getpid()
p_list=[]
for i in range(5):
p = Process(target=run_proc, args=(str(i),))
p_list.append(p)
print 'Process will start.'
p.start()
p.join()
p_list[i].start()
for p in p_list:
p.join()
print 'Process end.'
'''
Expand Down
2 changes: 1 addition & 1 deletion ch05/5.1.1.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,5 @@
box_title = a.get('title')
list.append({'href':href,'box_title':box_title})
content.append({'title':h2_title,'content':list})
with open('qiye.json','wb') as fp:
with open('qiye.json','w') as fp:
json.dump(content,fp=fp,indent=4)
2 changes: 1 addition & 1 deletion ch06/DataOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def output_html(self):
fout.write("<td>%s</td>"%data['title'])
fout.write("<td>%s</td>"%data['summary'])
fout.write("</tr>")
self.datas.remove(data)

fout.write("</table>")
fout.write("</body>")
fout.write("</html>")
Expand Down
2 changes: 1 addition & 1 deletion ch06/SpiderMan.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def crawl(self,root_url):
#数据存储器储存文件
self.output.store_data(data)
print "已经抓取%s个链接"%self.manager.old_url_size()
except Exception,e:
except Exception as e:
print "crawl failed"
#数据存储器将文件输出成指定格式
self.output.output_html()
Expand Down
4 changes: 2 additions & 2 deletions ch07/ControlNode/DataOutput.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def store_data(self,data):
if data is None:
return
self.datas.append(data)
if len(self.datas)>0:
if len(self.datas)>10:
self.output_html(self.filepath)


Expand Down Expand Up @@ -38,7 +38,7 @@ def output_html(self,path):
fout.write("<td>%s</td>"%data['title'])
fout.write("<td>%s</td>"%data['summary'])
fout.write("</tr>")
self.datas.remove(data)
self.datas=[]
fout.close()

def ouput_end(self,path):
Expand Down
2 changes: 1 addition & 1 deletion ch12/cnblogSpider/cnblogSpider/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@
'cnblogSpider.pipelines.CnblogspiderPipeline': 300,
'scrapy.pipelines.images.ImagesPipeline':1
}
IMAGES_STORE = 'F:\\cnblogs'
IMAGES_STORE = 'D:\\cnblogs'
IMAGES_URLS_FIELD = 'cimage_urls'
IMAGES_RESULT_FIELD = 'cimages'
IMAGES_EXPIRES = 30
Expand Down
2 changes: 1 addition & 1 deletion ch12/cnblogSpider/cnblogSpider/spiders/cnblogs_spider.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def parse(self, response):
url = paper.xpath(".//*[@class='postTitle']/a/@href").extract()[0]
title = paper.xpath(".//*[@class='postTitle']/a/text()").extract()[0]
time = paper.xpath(".//*[@class='dayTitle']/a/text()").extract()[0]
content = paper.xpath(".//*[@class='postCon']/a/text()").extract()[0]
content = paper.xpath(".//*[@class='postCon']/div/text()").extract()[0]
item = CnblogspiderItem(url=url, title=title, time=time, content=content)
request = scrapy.Request(url=url, callback=self.parse_body)
request.meta['item'] = item # 将item暂存
Expand Down

0 comments on commit 5a1db42

Please sign in to comment.