-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscraper.py
123 lines (92 loc) · 4.24 KB
/
scraper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import time
import sys
import numpy as np
from collections import Counter
def scrape_studentweb():
# SETUP
my_username = "" # WRITE YOUR FEIDE-USERNAME HERE
my_password = "" # WRITE YOUR FEIDE-PASSWORD HERE
my_school = "NTNU" # WRITE YOUR SCHOOL NAME HERE
# WRITE THE FULL NAME OF YOUR SCHOOL HERE
my_school_fullname = "norges teknisk-naturvitenskap"
url = "https://idp.feide.no/simplesaml/module.php/feide/login.php?asLen=169&AuthState=_41e67986da33cb1be903b3796b1c61ba1118d6230c%3Ahttps%3A%2F%2Fidp.feide.no%2Fsimplesaml%2Fsaml2%2Fidp%2FSSOService.php%3Fspentityid%3Dhttps%253A%252F%252Ffsweb.no%252Fstudentweb%26cookieTime%3D1538497429"
# Uncomment to see the procedure live in a Chrome-browser:
#driver = webdriver.Chrome()
# To make the procedure invisible:
options = webdriver.ChromeOptions()
options.add_argument('headless')
driver = webdriver.Chrome(chrome_options=options)
driver.get(url)
ac = webdriver.ActionChains(driver)
# PART 1 - CHOOSE SCHOOL
schoolpath = "//input[@id='org_selector-selectized']"
driver.find_element_by_xpath(schoolpath).send_keys(my_school, Keys.ENTER)
# PART 2 - LOG IN TO STUDENTWEB WITH FEIDE-CREDENTIALS
loginpaths = {
'username': "//input[@id='username']",
'password': "//input[@id='password']",
}
driver.find_element_by_xpath(loginpaths['username']).send_keys(my_username)
password_field = driver.find_element_by_xpath(loginpaths['password'])
password_field.send_keys(my_password, Keys.ENTER)
# PART 3 - CHOOSE SCHOOL AGAIN FROM NEW LIST
element = driver.find_element_by_id("institusjonsvalg")
ac.move_to_element(element).click().perform()
ac.send_keys(my_school_fullname, Keys.ENTER)
ac.move_to_element(element).move_by_offset(70, 0).click().perform()
# PART 4 - CLICK "LOG ON USING FEIDE"
driver.find_element_by_link_text('Log on using Feide').click()
# PART 5 - OPEN URL WITH GRADES
driver.get("https://fsweb.no/studentweb/resultater.jsf")
# PART 6 - EXTRACT GRADES
table = driver.find_element_by_id('resultatlisteForm:HeleResultater:resultaterPanel')
table_body = table.find_elements_by_tag_name("tbody")[0]
graded = table_body.find_elements_by_class_name("resultatTop")
passed = table_body.find_elements_by_class_name("none")
rows = graded + passed
def clean_emne(emne):
list_ = emne.split("\n")
return list_[0], list_[1]
def create_dict(rows):
results = {}
for row in rows:
cols = row.find_elements_by_tag_name("td")
emne_code, emne_name = clean_emne(cols[1].text)
grade = cols[5].text
results[emne_code] = [emne_name, grade]
return results
# PART 7 - FILE-HANDLING AND COMPARING PREVIOUS RESULTS WITH NEW
def write_results_to_file(results, filename):
with open(filename, 'w') as f:
f.write(str(results))
def read_results_from_file(filename):
with open(filename, 'r') as f:
return eval(f.read())
def compare_results(prev, new):
prev_keys = prev.keys()
new_keys = new.keys()
changed = False
result = ""
if len(new_keys) != len(prev_keys): # IF NEW GRADE
diff = np.setdiff1d(list(new_keys), list(prev_keys))
for key in diff:
changed = True
print(f"Ny karakter i {key} {new[key][0]}: {new[key][1]}")
result += f"{new[key][1]} i {key} {new[key][0]} \n"
# IF GRADE IS CHANGED (Ignore passed/not passed b/c of unicode-issues)
for key in prev:
if prev[key][1] != new[key][1] and not "bes" in prev[key][1].lower():
changed = True
print(f"Endret karakter i {key} {prev[key][0]}: {prev[key][1]} => {new[key][1]}")
result += f"{new[key][1]} i {key} {new[key][0]} \n"
print("Ingen endringer") if not changed else print("Ferdig")
return result
new = create_dict(rows)
prev = read_results_from_file("results.txt")
write_results_to_file(new, "results.txt")
results = compare_results(prev, new)
driver.close()
driver.quit()
return results