forked from thepycoach/automation
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path1.news-extract-data.py
32 lines (26 loc) · 1.01 KB
/
1.news-extract-data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
import pandas as pd
web = 'https://www.thesun.co.uk/sport/football/'
path = '/Users/frankandrade/Downloads/chromedriver' # introduce path here
# Creating the driver
driver_service = Service(executable_path=path)
driver = webdriver.Chrome(service=driver_service)
driver.get(web)
# Finding Elements
containers = driver.find_elements(by='xpath', value='//div[@class="teaser__copy-container"]')
titles = []
subtitles = []
links = []
for container in containers:
title = container.find_element(by='xpath', value='./a/h2').text
subtitle = container.find_element(by='xpath', value='./a/p').text
link = container.find_element(by='xpath', value='./a').get_attribute('href')
titles.append(title)
subtitles.append(subtitle)
links.append(link)
# Exporting data to a CSV file
my_dict = {'title': titles, 'subtitle': subtitles, 'link': links}
df_headlines = pd.DataFrame(my_dict)
df_headlines.to_csv('headline.csv')
driver.quit()