Skip to content

Commit

Permalink
Merge pull request coursera-dl#477 from coursera-dl/fix-0-courses-ava…
Browse files Browse the repository at this point in the history
…ilable

This uses the observation from @aruljohn from issues coursera-dl#472 and coursera-dl#474 and and
potentially fixes issues coursera-dl#472, coursera-dl#473, coursera-dl#474.

Signed-off-By: Rogério Theodoro de Brito <[email protected]>
  • Loading branch information
rbrito authored Dec 21, 2017
2 parents d8f676d + b6c77a6 commit e8838f2
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
16 changes: 15 additions & 1 deletion edx_dl/parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,21 @@ def extract_courses_from_html(self, page, BASE_URL):
Extracts courses (Course) from the html page
"""
soup = BeautifulSoup(page)
courses_soup = soup.find_all('div', 'course')

# First, try with new course structure (as of December 2017). If
# that doesn't work, we fallback to an older course structure
# (released with version 0.1.6). If even that doesn't work, then we
# try with the oldest course structure (that was current before
# version 0.1.6).
#
# rbrito---This code is ugly.

courses_soup = soup.find_all('article', 'course')
if len(courses_soup) == 0:
courses_soup = soup.find_all('div', 'course')
if len(courses_soup) == 0:
courses_soup = soup.find_all('div', 'course audit')

courses = []

for course_soup in courses_soup:
Expand Down
1 change: 1 addition & 0 deletions test_parsing.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ def test_extract_sections(file, num_sections_expected, num_subsections_expected)
@pytest.mark.parametrize(
'filename,site,num_courses_expected,num_available_courses_expected', [
('test/html/dashboard-version-with-articles.html', 'https://courses.edx.org', 18, 14),
('test/html/dashboard-version-with-divs.html', 'https://courses.edx.org', 18, 14),
]
)
def test_extract_courses_from_html(filename, site, num_courses_expected, num_available_courses_expected):
Expand Down

0 comments on commit e8838f2

Please sign in to comment.