Skip to content

Commit

Permalink
cleaned up and make it look nice
Browse files Browse the repository at this point in the history
  • Loading branch information
louiskwt committed Dec 20, 2024
1 parent 345a5a0 commit a57e201
Showing 1 changed file with 10 additions and 41 deletions.
51 changes: 10 additions & 41 deletions main.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,15 @@
import requests
import csv
from bs4 import BeautifulSoup
from datetime import date
from scrapper import Scrapper
from utils import process_bbc_headlines, process_guadian_headlines

bbc_scrapper = Scrapper("https://www.bbc.com/news", "https://www.bbc.com", 'sc-2e6baa30-0 gILusN', "bbc_news")
def scrap_news():
bbc_scrapper = Scrapper("https://www.bbc.com/news", "https://www.bbc.com", 'sc-2e6baa30-0 gILusN', 'href', "bbc_news")
guardian_scrapper = Scrapper("https://www.theguardian.com/international", "https://www.theguardian.com", 'dcr-ezvrjj', 'aria-label', 'guardian_news')

bbc_scrapper.scrape(process_bbc_headlines)
bbc_scrapper.export_news_csv()

bbc_scrapper.scrape()
bbc_scrapper.export_news_csv()
guardian_scrapper.scrape(process_guadian_headlines)
guardian_scrapper.export_news_csv()


# def guardian_scrapper():
# try:
# url = "https://www.theguardian.com/international"
# respose = requests.get(url)
# respose.raise_for_status()
# soup = BeautifulSoup(respose.content, 'html.parser')
# headlines = []
# for link in soup.find_all('a', class_='dcr-ezvrjj'):
# href = 'https://www.theguardian.com' + link['href']
# headlines.append({'title': link['aria-label'], 'link': href})
# return headlines
# except requests.exceptions.RequestException as e:
# print(f'Error scraping the guardians: {e}')
# return []

# bbc_headlines = bbc_scrapper()
# guardian_headlines = guardian_scrapper()

# today = date.today().isoformat()

# if bbc_headlines:
# with open(f"bbc_news-{today}.csv", "w", newline="") as file:
# writer = csv.DictWriter(file, fieldnames=bbc_headlines[0].keys())
# writer.writeheader()
# for headline in bbc_headlines:
# writer.writerow(headline)

# if guardian_headlines:
# with open(f'guardian_news-{today}.csv', 'w', newline="") as file:
# writer = csv.DictWriter(file, fieldnames=guardian_headlines[0].keys())
# writer.writeheader()
# for headline in guardian_headlines:
# writer.writerow(headline)
if __name__ == '__main__':
scrap_news()

0 comments on commit a57e201

Please sign in to comment.