-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraping.py
90 lines (79 loc) · 3.33 KB
/
scraping.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def wait(selector, value, time:int=20):
WebDriverWait(driver,time).until(EC.presence_of_element_located((selector,value)))
def seperateString(aString):
strList = aString.split(" ")
if (len(strList) > 1):
str = aString.replace(" ","+")
return str
else:
return aString
def scrollPage(size):
jscommand = """
window.scrollTo(0, {});
""".format(size)
driver.execute_script(jscommand)
driver = webdriver.Chrome(executable_path=r"path_of_the_driver")
class Scraping:
def __init__(self,itemName):
self.driver = driver
self.itemName = itemName
self.searchForItem()
def searchForItem(self):
itemN = seperateString(self.itemName)
self.driver.get("https://www.akakce.com/arama/?q=" + itemN)
wait(By.XPATH,'//*[@id="APL"]/li[1]')
self.driver.find_element(By.XPATH,'//*[@id="APL"]/li[1]/a/figure/img').click()
sleep(3)
self.verifySearched()
def verifySearched(self):
try:
self.driver.find_element(By.XPATH,'//*[@id="DT_w"]')
self.getPrices()
self.getAttributes()
self.getComments()
self.getDeviceImg()
# except:
# print("Cannot get device page, Plase try again later....")
# return
finally:
print("oh no")
def getPrices(self):
prices = self.driver.find_elements(By.XPATH,'//*[@id="PL"]/li')
with open("project3/prices.txt","w") as file:
i = 1
for price in prices:
price = self.driver.find_element(By.XPATH,'//*[@id="PL"]/li[{i}]//*[@class="pt_v8"]'.format(i = i)).text
pricehrf = str(self.driver.find_element(By.XPATH,'//*[@id="PL"]/li[{i}]/a'.format(i = i)).get_attribute("href"))
file.write(price + "_" + pricehrf + "\n")
i += 1
def getAttributes(self):
attributes = self.driver.find_element(By.XPATH,'//*[@id="DT_w"]').text
with open("project3/attributes.txt","w",encoding="UTF-8") as file:
list = attributes.split("\n")
for element in list:
if element.endswith(":"):
element = element + " Var"
file.write(element + "\n")
def getComments(self):
comments = self.driver.find_elements(By.XPATH,'//*[@id="UCL"]//*[@class="cm"]')
with open("project3/comments.txt","w",encoding="UTF-8") as file:
for com in comments:
file.write(com.text + "\n")
def getDeviceImg(self):
scrollPage(190)
image = self.driver.find_element(By.XPATH,'//*[@id="PI_v8"]/a/img').screenshot_as_png
with open('project3/static/filename.png', 'wb') as file:
file.write(image)
# def detailedAttributes(self):
# try:
# detailedAtt = self.driver.find_element(By.XPATH,"//div[@class='icSTw_v8 wbb_v8']").text
# with open("details.txt","w",encoding="UTF-8") as file:
# file.write(detailedAtt)
# except Exception:
# return "There is no more attribute...."