Skip to content

Commit

Permalink
module
Browse files Browse the repository at this point in the history
  • Loading branch information
RoopashreeRamachandraiah committed Mar 21, 2024
1 parent 0078347 commit 8b847a9
Showing 1 changed file with 5 additions and 13 deletions.
18 changes: 5 additions & 13 deletions ks1_papers.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,33 +13,25 @@
# Find all hyperlinks present on webpage
links = soup.find_all('a')

#get cwd and create a folder 'Materials' with in cwd.
cwd=os.getcwd()

os.mkdir(cwd+'/'+'Materials')

path= cwd+'/Materials/'

i = 0

urls=[]

# From all links check for pdf link and
# if present download file
#get all the links on the webpage having 'Key-stage-1' string
for link in links:
if ('key-stage-1' in link.get('href')):
#print(link.get('href'))
urls.append(baseurl+link.get('href'))

#print(urls)
#Parse and call each link within to download the materials.
for each_url in urls:
response=requests.get(each_url)
soup1=BeautifulSoup(response.text, 'html.parser')

material_links=soup1.find_all('a')
print('material_links')
print(material_links)



for m_link in material_links:
if ('.pdf' in m_link.get('href')):
i+=1
Expand All @@ -55,6 +47,6 @@
pdf = open(path+folder+'/'+name+".pdf", 'wb')
pdf.write(response.content)
pdf.close()
print("File ", i, " downloaded")



0 comments on commit 8b847a9

Please sign in to comment.