Skip to content

Commit

Permalink
spaces + misc pages
Browse files Browse the repository at this point in the history
  • Loading branch information
ssafari-PARC committed Sep 25, 2019
1 parent 025776e commit b64e6d2
Showing 1 changed file with 6 additions and 1 deletion.
7 changes: 6 additions & 1 deletion confluence_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,12 @@ def handle_html_references(html_content, page_duplicate_file_names, page_file_ma
xpath_expr = '//a[contains(@href, "/display/")]'
for link_element in html_tree.xpath(xpath_expr):
if not link_element.get('class'):
page_title = link_element.attrib['href'].split('/')[4]
print("LINK - "+link_element.attrib['href'])
try:
page_title = link_element.attrib['href'].split('/')[4]
except:
page_title = link_element.attrib['href'].split('/')[3]

page_title = page_title.replace('+', ' ')
decoded_page_title = utils.decode_url(page_title)
offline_link = provide_unique_file_name(page_duplicate_file_names, page_file_matching, decoded_page_title,
Expand Down

0 comments on commit b64e6d2

Please sign in to comment.