Skip to content

Commit

Permalink
修改engine逻辑
Browse files Browse the repository at this point in the history
修改engine逻辑
  • Loading branch information
yqh231 committed Nov 21, 2016
1 parent b6379a7 commit 1c6ed83
Showing 1 changed file with 44 additions and 16 deletions.
60 changes: 44 additions & 16 deletions myScrapy/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,27 @@

import threading
from lxml import etree
#from __Threading import ThreadManager, ScrapyWorker
from threading import Thread

from Schedule import schedule
from Spiders import spider
from Mylogging import INFO,WARNING
import __Threading
import Download


class engine_Manager(object):

def __init__(self):
self.request_object = None
self.request_list = []
self.thread = __Threading.ThreadManager()
self.download = Download.Download()
self.request_buf = []

def sendToSchedule(self, res = ""):
if (not res):
def engine_start(self, res = None):
if res is None:
request_object = spider.start_request()
print type(request_object)
else:
request_object = res

Expand All @@ -28,38 +31,63 @@ def sendToSchedule(self, res = ""):
res_object = request_object.next()
if(res_object.method == "GET"):
schedule.AddTodo_Get(res_object)
INFO("[engine_Manager] send http for get!!")

elif(res_object.method == "POST"):
schedule.AddTodo_Post(res_object)
elif(res_object.method == "DOWNLOAD"):
schedule.Download(res_object)
INFO("[engine_Manager] send http for post!!")

elif(res_object.method == "DOWNLOAD"):
schedule.PutToDownload(res_object)
INFO("[engine_Manager] send download message!!")

except StopIteration:
INFO("[engine] [senToSchedule] generation is empty")
break

if res_object.method == "GET":
self.thread.add_func_get()
self.thread.start()
self.thread.waitForallThreadcompelete()

elif res_object.method == "POST":
self.thread.add_func_post()
self.thread.start()
self.thread.waitForallThreadcompelete()

elif res_object.method == "DOWNLOAD":
self.download.GetTodown()
self.download.start()
self.download.waitForallThreadcompelete()

def GetfromSchedule(self):
self._GetfromSchedule()

while (not schedule.Judge_empty_get()):

def _GetfromSchedule(self):

while not schedule.Judge_empty_get():
item = schedule.Get_result_Get()
print len(item)
self.callback_func(lis = item)
self._callback_func(lis = item)


def callback_func(self, lis):
if (not isinstance(lis, list)):
def _callback_func(self, lis):
if not isinstance(lis, list):
return
print len(lis)

response = lis[0]
callback = lis[1]

request_next = callback(response)
if (not request_next):
if request_next is None:
WARNING("[engine] [callback_func] nothing to call back")
return

self.sendToSchedule(res = request_next)

self.request_buf.append(request_next)

if schedule.Judge_empty_get():
while len(self.request_buf):
tmp = self.request_buf.pop()
self.engine_start(tmp)


engine = engine_Manager()
Expand Down

0 comments on commit 1c6ed83

Please sign in to comment.