Skip to content

Commit

Permalink
Supported relative paths for confluence root directory.
Browse files Browse the repository at this point in the history
  • Loading branch information
thomai committed Sep 2, 2016
1 parent 873d673 commit 072765b
Showing 1 changed file with 4 additions and 4 deletions.
8 changes: 4 additions & 4 deletions confluence_dumper.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def handle_html_references(html_content, duplicate_file_names, page_file_matchin
# Example: /display/TES/pictest1
# => pictest1.html
# TODO: This code does not work for "Recent space activity" areas in space pages because of a different url format.
xpath_expr = '//a[starts-with(@href, "/display/")]'
xpath_expr = '//a[contains(@href, "/display/")]'
for link_element in html_tree.xpath(xpath_expr):
if not link_element.get('class'):
page_title = link_element.attrib['href'].split('/')[3]
Expand All @@ -115,7 +115,7 @@ def handle_html_references(html_content, duplicate_file_names, page_file_matchin
decoded_page_title)

# Fix links to other Confluence pages when page ids are used
xpath_expr = '//a[starts-with(@href, "/pages/viewpage.action?pageId=")]'
xpath_expr = '//a[contains(@href, "/pages/viewpage.action?pageId=")]'
for link_element in html_tree.xpath(xpath_expr):
if not link_element.get('class'):
page_id = link_element.attrib['href'].split('/pages/viewpage.action?pageId=')[1]
Expand All @@ -132,8 +132,8 @@ def handle_html_references(html_content, duplicate_file_names, page_file_matchin
# Fix file paths for img tags
# TODO: Handle non-<img> tags as well if necessary.
# TODO: Support files with different versions as well if necessary.
possible_image_xpaths = ['//img[starts-with(@src, "/download/")]',
'//img[starts-with(@src, "/rest/documentConversion/latest/conversion/thumbnail/")]']
possible_image_xpaths = ['//img[contains(@src, "/download/")]',
'//img[contains(@src, "/rest/documentConversion/latest/conversion/thumbnail/")]']
xpath_expr = '|'.join(possible_image_xpaths)
for img_element in html_tree.xpath(xpath_expr):
# Replace file path
Expand Down

0 comments on commit 072765b

Please sign in to comment.