Skip to content

Commit cece1eb

Browse files
author
Your Name
committed
repair bug
1 parent 44181b4 commit cece1eb

File tree

3 files changed

+11
-1
lines changed

3 files changed

+11
-1
lines changed

comm/request.py

+2
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,8 @@ def _analysis_404(self):
6363
def get_is_vul(self, url):
6464
r = self.send_http(url)
6565
if r != 0 and r.status == 200 and r.getheader('content-length') != self.not_found_page_length:
66+
self.pool.threadLock.acquire()
6667
print '[!] %s' % url.encode('utf-8')
68+
self.pool.threadLock.release()
6769
return True
6870
return False

comm/threadpool.py

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ class ThreadPool(object):
2828
def __init__(self, thread_num):
2929
self.tasks = Queue()
3030
self.pool = []
31+
self.threadLock = threading.Lock()
3132
self.__init__thread_pool(thread_num)
3233

3334
def __init__thread_pool(self, thread_num):

core/controllers/spider.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -24,13 +24,20 @@ def __init__(self, site, timeout, delay, cookie, depth, threads):
2424
super(Spider, self).__init__(site, timeout, delay, cookie, threads)
2525
self.depth = depth
2626
self.visited = []
27+
self.found = []
2728
self.tasks_queue = PriorityQueue()
2829

2930
def get_page_content(self, url):
31+
'''
32+
获取网页源代码,如果content-type不是html,或者content-type>1M不读取内容
33+
'''
3034
r = 0
3135
r = self.send_http(url)
3236
if r != 0 and r.status == 200 and r.getheader('content-length') != self.not_found_page_length:
37+
self.pool.threadLock.acquire()
3338
print '[!]' + url.encode('utf-8')
39+
self.pool.threadLock.release()
40+
self.found.append(url)
3441
if r.getheader('content-type') or r.getheader('content-type').find('html') != 1:
3542
return r.read()
3643
elif r.getheader('content-length') and int(r.getheader('content-length') < 102400):
@@ -106,5 +113,5 @@ def start(self):
106113
self.visited.append(url)
107114
fuzz_urls.put(url)
108115
print '[%s] Stop Spider' % time.strftime('%H:%M:%S')
109-
print '[%s] %s Founded' % (time.strftime('%H:%M:%S'), len(self.visited))
116+
print '[%s] %s Founded' % (time.strftime('%H:%M:%S'), len(self.found))
110117
result.spider = self.visited

0 commit comments

Comments
 (0)