Skip to content
This repository has been archived by the owner on Oct 17, 2021. It is now read-only.

Commit

Permalink
Merge pull request #1 from angga1518/angga/enrich-courses-data-for-ul…
Browse files Browse the repository at this point in the history
…as-kelas

feat: scrape course code and curriculum
  • Loading branch information
angga1518 authored Sep 7, 2021
2 parents cedd550 + c5f7259 commit 1596b6b
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 1 deletion.
2 changes: 2 additions & 0 deletions backend/models/period.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ def serialize(self):


class Course(mongo.EmbeddedDocument):
course_code = mongo.StringField(max_length=16)
curriculum = mongo.StringField(max_length=32)
name = mongo.StringField(max_length=128)
credit = mongo.IntField()
term = mongo.IntField()
Expand Down
22 changes: 21 additions & 1 deletion backend/scraper/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
CHANGEROLE_URL = f"{BASE_URL}/Authentication/ChangeRole"
DETAIL_SCHEDULE_URL = f"{BASE_URL}/Schedule/Index?period={{period}}&search="
GENERAL_SCHEDULE_URL = f"{BASE_URL}/Schedule/IndexOthers?fac={{fac}}&org={{org}}&per={{period}}&search="
DETAIL_COURSES_URL = f"{BASE_URL}/Course/Detail?course={{course}}&curr={{curr}}"
DEFAULT_CREDENTIAL = "01.00.12.01"

def scrape_courses_with_credentials(period, username, password):
Expand All @@ -31,6 +32,7 @@ def scrape_courses_with_credentials(period, username, password):
r = req.get(CHANGEROLE_URL)
r = req.get(DETAIL_SCHEDULE_URL.format(period=period))
courses = create_courses(r.text, is_detail=True)
generate_desc_prerequisite(courses, req)
return courses


Expand Down Expand Up @@ -92,6 +94,14 @@ def get_period_and_kd_org(html):

return None, None

def generate_desc_prerequisite(courses, req):
for course in courses:
html = req.get(DETAIL_COURSES_URL.format(course=course.course_code, curr=course.curriculum)).text
soup = BeautifulSoup(html, 'html.parser')
desc = soup.find(text="Deskripsi Mata Kuliah")
prerequisite = soup.find(text="Prasyarat Mata Kuliah")
print(str(desc)+" :: "+str(prerequisite))
break

def create_courses(html, is_detail=False):
soup = BeautifulSoup(html, 'html.parser')
Expand All @@ -107,6 +117,14 @@ def create_courses(html, is_detail=False):
if m:
credit, term = m.group().split(' SKS, Term ')

c = str(class_.text).split(" - ")
if c:
course_code = c[0].strip()

c = str(class_.text).split("Kurikulum")
if c:
curriculum = c[1].strip()

classes = []
for sib in class_.parent.find_next_siblings('tr'):
if (sib.get('class') == None):
Expand Down Expand Up @@ -157,7 +175,9 @@ def create_courses(html, is_detail=False):
name=course_name,
credit=credit,
term=term,
classes=classes
classes=classes,
course_code=course_code,
curriculum=curriculum
))

return courses

0 comments on commit 1596b6b

Please sign in to comment.