diff --git a/confluence_dumper.py b/confluence_dumper.py index fc95019..4b8dab3 100644 --- a/confluence_dumper.py +++ b/confluence_dumper.py @@ -204,7 +204,7 @@ def fetch_page_recursively(page_id, folder_path, download_folder, html_template, print('%sPAGE: %s (%s)' % ('\t'*(depth+1), page_title, page_id)) # Remember this file and all children - file_name = '%s.html' % utils.encode_url(page_title) + file_name = '%s.html' % utils.encode_url(utils.escape_slashes(page_title)) path_collection = {'file_path': file_name, 'page_title': page_title, 'child_pages': [], 'child_attachments': []} # Download attachments of this page diff --git a/utils.py b/utils.py index 8dafa3b..5d63512 100644 --- a/utils.py +++ b/utils.py @@ -115,6 +115,15 @@ def encode_url(decoded_url): return urllib.quote(decoded_url.encode('utf8')).encode('utf8') +def escape_slashes(unescaped_string): + """ Escapes slashes to avoid file path injection + + :param unescaped_string: String to escape + :returns: Escaped string + """ + return unescaped_string.replace('/', 'SLASH') + + def is_file_format(file_name, file_extensions): """ Checks whether the extension of the given file is in a list of file extensions.