Skip to content

Commit

Permalink
Improve type annotations (add more and fix wrong ones)
Browse files Browse the repository at this point in the history
The origins of these are three-fold:

* Merging in stubs from https://github.com/python/typeshed/tree/main/stubs/Markdown using "merge-pyi"
   - Note: we can consider these annotations to be the important ones because it's what people have been adding according to their own need
* Double-checking around places where stubs were already added from the above, particularly conflicts with annotations that got added in this repository already
   + Taking the opportunity to declare a generic "Registry of T" class
* Running mypy and eliminating the most glaring errors it reported
  • Loading branch information
oprypin authored Oct 30, 2023
1 parent c53d5d7 commit 99425b4
Show file tree
Hide file tree
Showing 16 changed files with 130 additions and 100 deletions.
1 change: 1 addition & 0 deletions docs/changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
### Fixed

* Fix a performance problem with HTML extraction where large HTML input could trigger quadratic line counting behavior (PR#1392).
* Improve and expand type annotations in the code base (#1394)

## [3.5] -- 2023-10-06

Expand Down
13 changes: 7 additions & 6 deletions markdown/blockparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,12 @@
from __future__ import annotations

import xml.etree.ElementTree as etree
from typing import TYPE_CHECKING, Sequence, Any
from typing import TYPE_CHECKING, Iterable, Any
from . import util

if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
from .blockprocessors import BlockProcessor


class State(list):
Expand All @@ -59,7 +60,7 @@ def set(self, state: Any):
""" Set a new state. """
self.append(state)

def reset(self):
def reset(self) -> None:
""" Step back one step in nested state. """
self.pop()

Expand Down Expand Up @@ -92,11 +93,11 @@ def __init__(self, md: Markdown):
[`blockprocessors`][markdown.blockprocessors].
"""
self.blockprocessors = util.Registry()
self.blockprocessors: util.Registry[BlockProcessor] = util.Registry()
self.state = State()
self.md = md

def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
def parseDocument(self, lines: Iterable[str]) -> etree.ElementTree:
""" Parse a Markdown document into an `ElementTree`.
Given a list of lines, an `ElementTree` object (not just a parent
Expand All @@ -116,7 +117,7 @@ def parseDocument(self, lines: Sequence[str]) -> etree.ElementTree:
self.parseChunk(self.root, '\n'.join(lines))
return etree.ElementTree(self.root)

def parseChunk(self, parent: etree.Element, text: str):
def parseChunk(self, parent: etree.Element, text: str) -> None:
""" Parse a chunk of Markdown text and attach to given `etree` node.
While the `text` argument is generally assumed to contain multiple
Expand All @@ -134,7 +135,7 @@ def parseChunk(self, parent: etree.Element, text: str):
"""
self.parseBlocks(parent, text.split('\n\n'))

def parseBlocks(self, parent: etree.Element, blocks: Sequence[str]):
def parseBlocks(self, parent: etree.Element, blocks: list[str]) -> None:
""" Process blocks of Markdown text and attach to given `etree` node.
Given a list of `blocks`, each `blockprocessor` is stepped through
Expand Down
12 changes: 6 additions & 6 deletions markdown/blockprocessors.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def lastChild(self, parent: etree.Element) -> etree.Element | None:
else:
return None

def detab(self, text: str, length: int = None) -> str:
def detab(self, text: str, length: int | None = None) -> tuple[str, str]:
""" Remove a tab from the front of each line of the given text. """
if length is None:
length = self.tab_length
Expand All @@ -105,7 +105,7 @@ def looseDetab(self, text: str, level: int = 1) -> str:
lines[i] = lines[i][self.tab_length*level:]
return '\n'.join(lines)

def test(self, parent: etree.Element, block: list[str]) -> bool:
def test(self, parent: etree.Element, block: str) -> bool:
""" Test for block type. Must be overridden by subclasses.
As the parser loops through processors, it will call the `test`
Expand Down Expand Up @@ -214,7 +214,7 @@ def run(self, parent, blocks):
self.create_item(sibling, block)
self.parser.state.reset()

def create_item(self, parent: etree.Element, block: str):
def create_item(self, parent: etree.Element, block: str) -> None:
""" Create a new `li` and parse the block with it as the parent. """
li = etree.SubElement(parent, 'li')
self.parser.parseBlocks(li, [block])
Expand Down Expand Up @@ -329,7 +329,7 @@ class OListProcessor(BlockProcessor):

TAG: str = 'ol'
""" The tag used for the the wrapping element. """
STARTSWITH: int = '1'
STARTSWITH: str = '1'
"""
The integer (as a string ) with which the list starts. For example, if a list is initialized as
`3. Item`, then the `ol` tag will be assigned an HTML attribute of `starts="3"`. Default: `"1"`.
Expand All @@ -342,7 +342,7 @@ class OListProcessor(BlockProcessor):
This is the list of types which can be mixed.
"""

def __init__(self, parser):
def __init__(self, parser: BlockParser):
super().__init__(parser)
# Detect an item (`1. item`). `group(1)` contains contents of item.
self.RE = re.compile(r'^[ ]{0,%d}\d+\.[ ]+(.*)' % (self.tab_length - 1))
Expand Down Expand Up @@ -448,7 +448,7 @@ class UListProcessor(OListProcessor):
TAG: str = 'ul'
""" The tag used for the the wrapping element. """

def __init__(self, parser):
def __init__(self, parser: BlockParser):
super().__init__(parser)
# Detect an item (`1. item`). `group(1)` contains contents of item.
self.RE = re.compile(r'^[ ]{0,%d}[*+-][ ]+(.*)' % (self.tab_length - 1))
Expand Down
12 changes: 8 additions & 4 deletions markdown/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import sys
import logging
import importlib
from typing import TYPE_CHECKING, Any, TextIO, Callable
from typing import TYPE_CHECKING, Any, Callable, ClassVar, Mapping, Sequence, TextIO
from . import util
from .preprocessors import build_preprocessors
from .blockprocessors import build_block_parser
Expand Down Expand Up @@ -76,7 +76,7 @@ class Markdown:

doc_tag = "div" # Element used to wrap document - later removed

output_formats: dict[str, Callable[Element]] = {
output_formats: ClassVar[dict[str, Callable[[Element], str]]] = {
'html': to_html_string,
'xhtml': to_xhtml_string,
}
Expand Down Expand Up @@ -156,7 +156,11 @@ def build_parser(self) -> Markdown:
self.postprocessors = build_postprocessors(self)
return self

def registerExtensions(self, extensions: list[Extension | str], configs: dict[str, dict[str, Any]]) -> Markdown:
def registerExtensions(
self,
extensions: Sequence[Extension | str],
configs: Mapping[str, Mapping[str, Any]]
) -> Markdown:
"""
Load a list of extensions into an instance of the `Markdown` class.
Expand Down Expand Up @@ -188,7 +192,7 @@ def registerExtensions(self, extensions: list[Extension | str], configs: dict[st
)
return self

def build_extension(self, ext_name: str, configs: dict[str, Any]) -> Extension:
def build_extension(self, ext_name: str, configs: Mapping[str, Any]) -> Extension:
"""
Build extension from a string name, then return an instance using the given `configs`.
Expand Down
10 changes: 5 additions & 5 deletions markdown/extensions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@

from __future__ import annotations

from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Mapping, Sequence
from ..util import parseBoolValue

if TYPE_CHECKING: # pragma: no cover
Expand All @@ -37,7 +37,7 @@
class Extension:
""" Base class for extensions to subclass. """

config: dict[str, list[Any, str]] = {}
config: Mapping[str, list] = {}
"""
Default configuration for an extension.
Expand Down Expand Up @@ -91,7 +91,7 @@ def getConfigInfo(self) -> list[tuple[str, str]]:
"""
return [(key, self.config[key][1]) for key in self.config.keys()]

def setConfig(self, key: str, value: Any):
def setConfig(self, key: str, value: Any) -> None:
"""
Set a configuration option.
Expand All @@ -112,7 +112,7 @@ def setConfig(self, key: str, value: Any):
value = parseBoolValue(value, preserve_none=True)
self.config[key][0] = value

def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
def setConfigs(self, items: Mapping[str, Any] | Sequence[tuple[str, Any]]):
"""
Loop through a collection of configuration options, passing each to
[`setConfig`][markdown.extensions.Extension.setConfig].
Expand All @@ -129,7 +129,7 @@ def setConfigs(self, items: dict[str, Any] | list[tuple[str, Any]]):
for key, value in items:
self.setConfig(key, value)

def extendMarkdown(self, md: Markdown):
def extendMarkdown(self, md: Markdown) -> None:
"""
Add the various processors and patterns to the Markdown Instance.
Expand Down
2 changes: 1 addition & 1 deletion markdown/extensions/attr_list.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def run(self, doc: Element):
self.assign_attrs(elem, m.group(1))
elem.tail = elem.tail[m.end():]

def assign_attrs(self, elem: Element, attrs: dict[str, str]):
def assign_attrs(self, elem: Element, attrs: str) -> None:
""" Assign `attrs` to element. """
for k, v in get_attrs(attrs):
if k == '.':
Expand Down
14 changes: 7 additions & 7 deletions markdown/extensions/footnotes.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,14 +98,14 @@ def extendMarkdown(self, md):
# Insert a postprocessor after amp_substitute processor
md.postprocessors.register(FootnotePostprocessor(self), 'footnote', 25)

def reset(self):
def reset(self) -> None:
""" Clear footnotes on reset, and prepare for distinct document. """
self.footnotes = OrderedDict()
self.footnotes: OrderedDict[str, str] = OrderedDict()
self.unique_prefix += 1
self.found_refs = {}
self.used_refs = set()

def unique_ref(self, reference, found=False):
def unique_ref(self, reference, found: bool = False):
""" Get a unique reference if there are duplicates. """
if not found:
return reference
Expand Down Expand Up @@ -144,7 +144,7 @@ def finder(element):
res = finder(root)
return res

def setFootnote(self, id, text):
def setFootnote(self, id, text) -> None:
""" Store a footnote for later retrieval. """
self.footnotes[id] = text

Expand All @@ -159,7 +159,7 @@ def makeFootnoteId(self, id):
else:
return 'fn{}{}'.format(self.get_separator(), id)

def makeFootnoteRefId(self, id, found=False):
def makeFootnoteRefId(self, id, found: bool = False):
""" Return footnote back-link id. """
if self.getConfig("UNIQUE_IDS"):
return self.unique_ref('fnref%s%d-%s' % (self.get_separator(), self.unique_prefix, id), found)
Expand Down Expand Up @@ -329,7 +329,7 @@ class FootnotePostTreeprocessor(Treeprocessor):
def __init__(self, footnotes):
self.footnotes = footnotes

def add_duplicates(self, li, duplicates):
def add_duplicates(self, li, duplicates) -> None:
""" Adjust current `li` and add the duplicates: `fnref2`, `fnref3`, etc. """
for link in li.iter('a'):
# Find the link that needs to be duplicated.
Expand All @@ -355,7 +355,7 @@ def get_num_duplicates(self, li):
link_id = '{}ref{}{}'.format(fn, self.footnotes.get_separator(), rest)
return self.footnotes.found_refs.get(link_id, 0)

def handle_duplicates(self, parent):
def handle_duplicates(self, parent) -> None:
""" Find duplicate footnotes and format and add the duplicates. """
for li in list(parent):
# Check number of duplicates footnotes and insert
Expand Down
2 changes: 1 addition & 1 deletion markdown/extensions/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def extendMarkdown(self, md):
self.md = md
md.preprocessors.register(MetaPreprocessor(md), 'meta', 27)

def reset(self):
def reset(self) -> None:
self.md.Meta = {}


Expand Down
10 changes: 5 additions & 5 deletions markdown/extensions/smarty.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def _addPatterns(self, md, patterns, serie, priority):
name = 'smarty-%s-%d' % (serie, ind)
self.inlinePatterns.register(pattern, name, priority-ind)

def educateDashes(self, md):
def educateDashes(self, md) -> None:
emDashesPattern = SubstituteTextPattern(
r'(?<!-)---(?!-)', (self.substitutions['mdash'],), md
)
Expand All @@ -203,13 +203,13 @@ def educateDashes(self, md):
self.inlinePatterns.register(emDashesPattern, 'smarty-em-dashes', 50)
self.inlinePatterns.register(enDashesPattern, 'smarty-en-dashes', 45)

def educateEllipses(self, md):
def educateEllipses(self, md) -> None:
ellipsesPattern = SubstituteTextPattern(
r'(?<!\.)\.{3}(?!\.)', (self.substitutions['ellipsis'],), md
)
self.inlinePatterns.register(ellipsesPattern, 'smarty-ellipses', 10)

def educateAngledQuotes(self, md):
def educateAngledQuotes(self, md) -> None:
leftAngledQuotePattern = SubstituteTextPattern(
r'\<\<', (self.substitutions['left-angle-quote'],), md
)
Expand All @@ -219,7 +219,7 @@ def educateAngledQuotes(self, md):
self.inlinePatterns.register(leftAngledQuotePattern, 'smarty-left-angle-quotes', 40)
self.inlinePatterns.register(rightAngledQuotePattern, 'smarty-right-angle-quotes', 35)

def educateQuotes(self, md):
def educateQuotes(self, md) -> None:
lsquo = self.substitutions['left-single-quote']
rsquo = self.substitutions['right-single-quote']
ldquo = self.substitutions['left-double-quote']
Expand All @@ -243,7 +243,7 @@ def educateQuotes(self, md):

def extendMarkdown(self, md):
configs = self.getConfigs()
self.inlinePatterns = Registry()
self.inlinePatterns: Registry[HtmlInlineProcessor] = Registry()
if configs['smart_ellipses']:
self.educateEllipses(md)
if configs['smart_quotes']:
Expand Down
12 changes: 6 additions & 6 deletions markdown/extensions/toc.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def get_name(el):
return ''.join(text).strip()


def stashedHTML2text(text, md, strip_entities=True):
def stashedHTML2text(text, md, strip_entities: bool = True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
""" Substitute raw html with plain text. """
Expand Down Expand Up @@ -198,7 +198,7 @@ def iterparent(self, node):
yield node, child
yield from self.iterparent(child)

def replace_marker(self, root, elem):
def replace_marker(self, root, elem) -> None:
""" Replace marker with elem. """
for (p, c) in self.iterparent(root):
text = ''.join(c.itertext()).strip()
Expand All @@ -219,14 +219,14 @@ def replace_marker(self, root, elem):
p[i] = elem
break

def set_level(self, elem):
def set_level(self, elem) -> None:
""" Adjust header level according to base level. """
level = int(elem.tag[-1]) + self.base_level
if level > 6:
level = 6
elem.tag = 'h%d' % level

def add_anchor(self, c, elem_id):
def add_anchor(self, c, elem_id) -> None:
anchor = etree.Element("a")
anchor.text = c.text
anchor.attrib["href"] = "#" + elem_id
Expand All @@ -238,7 +238,7 @@ def add_anchor(self, c, elem_id):
c.remove(c[0])
c.append(anchor)

def add_permalink(self, c, elem_id):
def add_permalink(self, c, elem_id) -> None:
permalink = etree.Element("a")
permalink.text = ("%spara;" % AMP_SUBSTITUTE
if self.use_permalinks is True
Expand Down Expand Up @@ -399,7 +399,7 @@ def extendMarkdown(self, md):
tocext = self.TreeProcessorClass(md, self.getConfigs())
md.treeprocessors.register(tocext, 'toc', 5)

def reset(self):
def reset(self) -> None:
self.md.toc = ''
self.md.toc_tokens = []

Expand Down
6 changes: 3 additions & 3 deletions markdown/htmlparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,7 +156,7 @@ def get_endtag_text(self, tag: str) -> str:
# Failed to extract from raw data. Assume well formed and lowercase.
return '</{}>'.format(tag)

def handle_starttag(self, tag: str, attrs: dict[str, str]):
def handle_starttag(self, tag: str, attrs: list[tuple[str, str]]):
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
self.handle_startendtag(tag, attrs)
Expand Down Expand Up @@ -235,7 +235,7 @@ def handle_empty_tag(self, data: str, is_block: bool):
else:
self.cleandoc.append(data)

def handle_startendtag(self, tag: str, attrs: dict[str, str]):
def handle_startendtag(self, tag: str, attrs: list[tuple[str, str]]):
self.handle_empty_tag(self.get_starttag_text(), is_block=self.md.is_block_level(tag))

def handle_charref(self, name: str):
Expand Down Expand Up @@ -277,7 +277,7 @@ def parse_html_declaration(self, i: int) -> int:
# As `__startag_text` is private, all references to it must be in this subclass.
# The last few lines of `parse_starttag` are reversed so that `handle_starttag`
# can override `cdata_mode` in certain situations (in a code span).
__starttag_text = None
__starttag_text: str | None = None

def get_starttag_text(self) -> str:
"""Return full source of start tag: `<...>`."""
Expand Down
Loading

0 comments on commit 99425b4

Please sign in to comment.