Skip to content

Commit

Permalink
sample queries
Browse files Browse the repository at this point in the history
  • Loading branch information
lilydia committed Dec 7, 2020
1 parent e01989b commit 784da2b
Show file tree
Hide file tree
Showing 8 changed files with 23,278 additions and 0 deletions.
2,391 changes: 2,391 additions & 0 deletions examples/finance.csv

Large diffs are not rendered by default.

132 changes: 132 additions & 0 deletions examples/finance_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
from linkedin_jobs_scraper import LinkedinScraper
from linkedin_jobs_scraper.events import Events, EventData
from linkedin_jobs_scraper.query import Query, QueryOptions, QueryFilters
from linkedin_jobs_scraper.filters import RelevanceFilters, TimeFilters, TypeFilters, ExperienceLevelFilters
import csv
import pandas

searches = ['real estate', 'rental and lease', 'insurance and claims']

def on_data(data: EventData):
#print('[ON_DATA]', data.title, data.company, data.date, data.link, len(data.description))
title.append(data.title)
company.append(data.company)
date.append(data.date)
link.append(data.link)

def on_error(error):
print('[ON_ERROR]', error)


def on_end():
print('[ON_END]')


for counter in range(2):
if counter == 0:
for search in searches:

title =[]
company = []
date = []
link = []
industry = []
occupation = []
jobType = []

scraper = LinkedinScraper(
chrome_options=None, # You can pass your custom Chrome options here
max_workers=1, # How many threads will be spawn to run queries concurrently (one Chrome driver for each thread)
slow_mo=0.5, # Slow down the scraper to avoid 'Too many requests (429)' errors
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
Query(
options=QueryOptions(
optimize=True, # Blocks requests for resources like images and stylesheet
limit=0 # Limit the number of jobs to scrape
)
),
Query(
query= search ,
options=QueryOptions(
locations=['Toronto, Ontario, Canada'],
optimize=True,
limit=250,
filters=QueryFilters(
relevance=RelevanceFilters.RELEVANT,
time=TimeFilters.WEEK,
type=[TypeFilters.FULL_TIME],
)
)
),
]

scraper.run(queries)

for i in range(len(title)):
industry.append('Finance')
occupation.append(search)
jobType.append('Full Time')

df = pandas.DataFrame(data={"Title": title, "Company": company, "Date": date, "Link":link, "Industry":industry, "Occupation":occupation, "Type": jobType})
df.to_csv("C:/Users/User/py-linkedin-jobs-scraper/examples/finance.csv", mode='a', sep=',',header=False, index=False)

if counter == 1:
for search in searches:

title =[]
company = []
date = []
link = []
industry = []
occupation = []
jobType = []

scraper = LinkedinScraper(
chrome_options=None, # You can pass your custom Chrome options here
max_workers=1, # How many threads will be spawn to run queries concurrently (one Chrome driver for each thread)
slow_mo=0.6, # Slow down the scraper to avoid 'Too many requests (429)' errors
)

# Add event listeners
scraper.on(Events.DATA, on_data)
scraper.on(Events.ERROR, on_error)
scraper.on(Events.END, on_end)

queries = [
Query(
options=QueryOptions(
optimize=True, # Blocks requests for resources like images and stylesheet
limit=0 # Limit the number of jobs to scrape
)
),
Query(
query= search ,
options=QueryOptions(
locations=['Toronto, Ontario, Canada'],
optimize=True,
limit=50,
filters=QueryFilters(
relevance=RelevanceFilters.RELEVANT,
time=TimeFilters.WEEK,
type=[TypeFilters.PART_TIME],
)
)
),
]

scraper.run(queries)

for i in range(len(title)):
industry.append('Finance')
occupation.append(search)
jobType.append('Part Time')

df = pandas.DataFrame(data={"Title": title, "Company": company, "Date": date, "Link":link, "Industry":industry, "Occupation":occupation, "Type": jobType})
df.to_csv("C:/Users/User/py-linkedin-jobs-scraper/examples/finance.csv", mode='a', sep=',',header=False, index=False)
Loading

0 comments on commit 784da2b

Please sign in to comment.