cleaned up and make it look nice

louiskwt · Dec 20, 2024 · a57e201 · a57e201
1 parent 345a5a0
commit a57e201
Showing 1 changed file with 10 additions and 41 deletions.
diff --git a/main.py b/main.py
@@ -1,46 +1,15 @@
-import requests
-import csv
-from bs4 import BeautifulSoup
-from datetime import date
 from scrapper import Scrapper
+from utils import process_bbc_headlines, process_guadian_headlines
 
-bbc_scrapper = Scrapper("https://www.bbc.com/news", "https://www.bbc.com", 'sc-2e6baa30-0 gILusN', "bbc_news")
+def scrap_news():
+    bbc_scrapper = Scrapper("https://www.bbc.com/news", "https://www.bbc.com", 'sc-2e6baa30-0 gILusN', 'href', "bbc_news")
+    guardian_scrapper = Scrapper("https://www.theguardian.com/international", "https://www.theguardian.com", 'dcr-ezvrjj', 'aria-label', 'guardian_news')
 
+    bbc_scrapper.scrape(process_bbc_headlines)
+    bbc_scrapper.export_news_csv()
 
-bbc_scrapper.scrape()
-bbc_scrapper.export_news_csv()
+    guardian_scrapper.scrape(process_guadian_headlines)
+    guardian_scrapper.export_news_csv()
 
-
-# def guardian_scrapper():
-#     try:
-#         url = "https://www.theguardian.com/international"
-#         respose = requests.get(url)
-#         respose.raise_for_status()
-#         soup = BeautifulSoup(respose.content, 'html.parser')
-#         headlines = []
-#         for link in soup.find_all('a', class_='dcr-ezvrjj'):
-#             href = 'https://www.theguardian.com' + link['href']
-#             headlines.append({'title': link['aria-label'], 'link': href})
-#         return headlines
-#     except requests.exceptions.RequestException as e:
-#         print(f'Error scraping the guardians: {e}')
-#         return []
-
-# bbc_headlines = bbc_scrapper()
-# guardian_headlines = guardian_scrapper()
-
-# today = date.today().isoformat()
-
-# if bbc_headlines:
-#     with open(f"bbc_news-{today}.csv", "w", newline="") as file:
-#         writer = csv.DictWriter(file, fieldnames=bbc_headlines[0].keys())
-#         writer.writeheader()
-#         for headline in bbc_headlines:
-#             writer.writerow(headline)
-
-# if guardian_headlines:
-#     with open(f'guardian_news-{today}.csv', 'w', newline="") as file:
-#         writer = csv.DictWriter(file, fieldnames=guardian_headlines[0].keys())
-#         writer.writeheader()
-#         for headline in guardian_headlines:
-#             writer.writerow(headline)
+if __name__ == '__main__':
+    scrap_news()