Skip to content

Commit

Permalink
Added naukri JobLoader
Browse files Browse the repository at this point in the history
  • Loading branch information
Saavrm26 committed Jul 11, 2023
1 parent 9ebca86 commit 384e8c8
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 4 deletions.
5 changes: 3 additions & 2 deletions scrapping/scrapping/itemloaders/indeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def add_values(self, dictionary: dict[str, Any]):
def load_item(self) -> ItemLoader.load_item:
return self.Loader.load_item

def __init__(self, scrappedItems: dict[str, Any]):
def __init__(self, scrapped_items: dict[str, Any]):
self.Loader = ItemLoader(item=Job())
self.add_values(scrappedItems)
self.add_values(scrapped_items)

27 changes: 27 additions & 0 deletions scrapping/scrapping/itemloaders/naukri.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from typing import Any
from scrapy.loader import ItemLoader
from scrapping.items.Job import Job


class JobLoader:
def add_values(self, dictionary: dict[str, Any]):
for key, value in dictionary.items():
if not value:
continue

if type(value) is list:
value = tuple(value)

if type(value) is str:
value = value.strip()

key = '_'.join(key.split(' '))

self.Loader.replace_value(key, value)

def load_item(self) -> ItemLoader.load_item:
return self.Loader.load_item

def __init__(self, scrapped_items: dict[str, Any]):
self.Loader = ItemLoader(item=Job())
self.add_values(scrapped_items)
8 changes: 6 additions & 2 deletions scrapping/scrapping/spiders/naukri.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import scrapy
from scrapping.utils.color_printing import prYellow, prRed, prGreen
from scrapping.itemloaders.naukri import JobLoader
from scrapping.utils.types import JOB_TITLE, COMPANY_NAME, COMPANY_ABOUT_URL, SALARY, LOCATION, JOB_URL

meta = {
Expand All @@ -8,6 +9,7 @@


class NaukriSpider(scrapy.Spider):
# * THOUGHT: COMPRESS AND STORE JOB DESCRIPTION IN AN ELASTIC SEARCH DATABASE, THIS WILL ELIMINATE THE NEED OF SCRAPING BENEFITS, JOB_TYPE
name = "naukri"

def __init__(self, title: str, location: str, *args, **kwargs):
Expand Down Expand Up @@ -40,7 +42,7 @@ def parse_job_cards(self, response):

def parse_job_page(self, response):
try:
prYellow("Inside parse_job_page")
prYellow("Inside Naukri Spider parse_job_page")
prGreen(response.url)
jd_body = response.css(".jd-container .leftSec")
scrapped_items = {}
Expand All @@ -63,6 +65,8 @@ def parse_job_page(self, response):

prGreen(scrapped_items)

yield JobLoader(scrapped_items).load_item()()

except Exception as e:
prRed(e)
prYellow("At parse_job_page")
prYellow("At Naukri Spider parse_job_page")

0 comments on commit 384e8c8

Please sign in to comment.