Skip to content

Commit

Permalink
Generate URI map on the fly
Browse files Browse the repository at this point in the history
Generate the keyword to URI map on the fly, if the user
does not request to load it from a file instead
  • Loading branch information
hakonhagland committed Nov 5, 2024
1 parent 2040a62 commit d9105c6
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 26 deletions.
2 changes: 1 addition & 1 deletion scripts/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ fodt-extract-xml-tag = "fodt.splitter:extract_xml_tag"
fodt-fix-ignored-keywords = "fodt.fix_ignored:fix_ignored"
fodt-fix-footer-style = "fodt.fix_footer_style:fix_footer_style"
fodt-fix-letter-k-footer = "fodt.fix_letter_k_footer:fix_letter_k_footer"
fodt-gen-kw-uri-map = "fodt.keyword_linker:gen_kw_uri_map"
fodt-gen-kw-uri-map = "fodt.keyword_linker:gen_kw_uri_map_cli"
fodt-link-keywords = "fodt.keyword_linker2:link_keywords"
fodt-remove-bookmarks-from-master-styles = "fodt.remove_bookmarks:remove_bookmarks_from_master_styles"
fodt-remove-chapters = "fodt.splitter:remove_chapters"
Expand Down
31 changes: 18 additions & 13 deletions scripts/python/src/fodt/keyword_linker.py
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,21 @@ def keyword_uri(self, keyword: str) -> str:
uri = ExtractURI(kw_file, keyword).extract()
return uri

def get_kw_uri_map(maindir: Path, keyword_dir: Path) -> dict[str, str]:
kw_uri_map = {}
# Assume all directories in keyword_dir are keyword directories on the form xx.yy
# where xx is the chapter number and yy is the section number.
for item1 in keyword_dir.iterdir():
if not item1.is_dir():
continue
chapter_str = item1.name
(chapter, section) = chapter_str.split(".")
kw_file = item1 / FileNames.keywords
logging.info(f"Processing chapter {chapter_str}")
ProcessChapter(maindir, chapter, section, kw_file, kw_uri_map).process()
return kw_uri_map


# fodt-gen-kw-uri-map
# -------------------
#
Expand All @@ -138,26 +153,16 @@ def keyword_uri(self, keyword: str) -> str:
@click.command()
@ClickOptions.maindir()
@ClickOptions.keyword_dir
def gen_kw_uri_map(maindir: str|None, keyword_dir: str|None) -> None:
def gen_kw_uri_map_cli(maindir: str|None, keyword_dir: str|None) -> None:
logging.basicConfig(level=logging.INFO)
keyword_dir = helpers.get_keyword_dir(keyword_dir)
maindir = helpers.get_maindir(maindir)
kw_uri_map = {}
# Assume all directories in keyword_dir are keyword directories on the form xx.yy
# where xx is the chapter number and yy is the section number.
for item1 in keyword_dir.iterdir():
if not item1.is_dir():
continue
chapter_str = item1.name
(chapter, section) = chapter_str.split(".")
kw_file = item1 / FileNames.keywords
logging.info(f"Processing chapter {chapter_str}")
ProcessChapter(maindir, chapter, section, kw_file, kw_uri_map).process()
kw_uri_map = get_kw_uri_map(maindir, keyword_dir)

with open(maindir / Directories.meta / FileNames.kw_uri_map, "w", encoding='utf8') as f:
for kw in sorted(kw_uri_map.keys()):
f.write(f"{kw} {kw_uri_map[kw]}\n")
logging.info(f"Generated keyword URI map to {maindir / Directories.meta / FileNames.kw_uri_map}")

if __name__ == "__main__":
gen_kw_uri_map()
gen_kw_uri_map_cli()
52 changes: 40 additions & 12 deletions scripts/python/src/fodt/keyword_linker2.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from fodt.constants import ClickOptions, Directories, FileNames, FileExtensions
from fodt.exceptions import HandlerDoneException, ParsingException
from fodt import helpers
from fodt import helpers, keyword_linker
from fodt.xml_helpers import XMLHelper

class FileHandler(xml.sax.handler.ContentHandler):
Expand Down Expand Up @@ -136,17 +136,23 @@ def update_example_stack(self, attrs: xml.sax.xmlreader.AttributesImpl) -> None:
self.is_example_p.append(False)

class InsertLinks():
def __init__(self, maindir: Path, kw_dir: Path, kw_uri_map: dict[str, str]) -> None:
def __init__(
self, maindir: Path, subsection: str, kw_dir: Path, kw_uri_map: dict[str, str]
) -> None:
self.maindir = maindir
self.kw_dir = kw_dir
self.kw_uri_map = kw_uri_map
self.subsection = subsection

def insert_links(self) -> None:
for item in self.kw_dir.iterdir():
if not item.is_dir():
continue
if self.subsection:
if item.name != self.subsection:
logging.info(f"Skipping directory: {item}")
continue
logging.info(f"Processing directory: {item}")
breakpoint()
for item2 in item.iterdir():
if item2.suffix == f".{FileExtensions.fodt}":
keyword_name = item2.name.removesuffix(f".{FileExtensions.fodt}")
Expand Down Expand Up @@ -193,29 +199,51 @@ def load_kw_uri_map(maindir: Path) -> dict[str, str]:
#
# SHELL USAGE:
#
# fodt-link-keyword --maindir=<main_dir>
# fodt-link-keyword \
# --maindir=<main_dir> --keyword_dir=<keyword_dir> --subsection=<subsection> --use-map-file
#
# DESCRIPTION:
#
# Links all keyword names found inside <p> tags in the subsection documents to the
# corresponding keyword subsection subdocument.
# Uses the mapping file "meta/kw_uri_map.txt" generated by the script
# "fodt-gen-kw-uri-map".
#
# If the option --use-map-file is given, the script will use the mapping file
# "meta/kw_uri_map.txt" (generated by running the script "fodt-gen-kw-uri-map"), else
# it will generate the mapping on the fly. The mapping is a map from keyword name to
# the URI of the keyword subsection subdocument. This map is needed to generate the
# links.
#
# EXAMPLE:
# If --subsection is not given, the script will process all subsections. If --subsection
# is given, the script will only process the specified subsection.
#
# fodt-link-keywords
# EXAMPLES:
#
# Will use the default value: --maindir=../../parts
# fodt-link-keywords --subsection=5.3
#
# Will use the default values: --maindir=../../parts, --keyword_dir=../../keyword-names,
# and will process only the keywords in subsection 5.3, and will generate the mapping on the fly.
#
# fodt-link-keywords
#
# Same as above, but will process all subsections.
#
@click.command()
@ClickOptions.maindir()
def link_keywords(maindir: str|None) -> None:
@ClickOptions.keyword_dir
@click.option('--subsection', help='The subsection to process')
@click.option('--use-map-file', is_flag=True, help='Use the mapping file "meta/kw_uri_map.txt"')
def link_keywords(
maindir: str|None, keyword_dir: str|None, subsection: str|None, use_map_file: bool
) -> None:
logging.basicConfig(level=logging.INFO)
maindir = helpers.get_maindir(maindir)
kw_uri_map = load_kw_uri_map(maindir)
keyword_dir = helpers.get_keyword_dir(keyword_dir)
if use_map_file:
kw_uri_map = load_kw_uri_map(maindir)
else:
kw_uri_map = keyword_linker.get_kw_uri_map(maindir, keyword_dir)
kw_dir = maindir / Directories.chapters / Directories.subsections
InsertLinks(maindir, kw_dir, kw_uri_map).insert_links()
InsertLinks(maindir, subsection, kw_dir, kw_uri_map).insert_links()

if __name__ == "__main__":
link_keywords()

0 comments on commit d9105c6

Please sign in to comment.