forked from daacheng/PythonBasic
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathPythonForTaoBao.py
74 lines (64 loc) · 2.62 KB
/
PythonForTaoBao.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from selenium import webdriver
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from pyquery import PyQuery as pq
from pymongo import MongoClient
#定义数据库连接并将抓取到的商品数据保存到MongoDB中
client = MongoClient('localhost',27017)
taobao = client.taobao
collection_product = taobao.product
def save_to_mongodb(product):
try:
if collection_product.insert(product):
print('successful')
except Exception:
print('faile')
def getProducts():
html = browser.page_source
doc = pq(html)
items = doc('#mainsrp-itemlist .items .item').items()
for item in items:
product = {
'image':item.find('.pic .img').attr('data-src'),
'price':item.find('.price').text(),
'deal':item.find('.deal-cnt').text(),
'title':item.find('.title').text(),
'location':item.find('.location').text(),
'shop':item.find('.shop').text()
}
print(product)
save_to_mongodb(product)
def index_page(page):
print(page)
try:
browser.get(url)
if page>1:
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager div.form > input')))
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager div.form > span.btn.J_Submit')))
input.clear()
input.send_keys(3)
submit.click()
wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager li.item.active > span'), str(page)))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.m-itemlist .items .item')))
getProducts()
except Exception:
print('超时')
browser = webdriver.Chrome()
wait = WebDriverWait(browser,10)
url = 'https://s.taobao.com/search?q=ipad'
for page in range(2,5):
print(page)
try:
browser.get(url)
if page>1:
input = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '#mainsrp-pager div.form > input')))
submit = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, '#mainsrp-pager div.form > span.btn.J_Submit')))
input.clear()
input.send_keys(page)
submit.click()
wait.until(EC.text_to_be_present_in_element((By.CSS_SELECTOR, '#mainsrp-pager li.item.active > span'), str(page)))
wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, '.m-itemlist .items .item')))
getProducts()
except Exception:
print('超时')