-
Notifications
You must be signed in to change notification settings - Fork 35
/
test_hyperlinks.py
76 lines (65 loc) · 2.05 KB
/
test_hyperlinks.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
""" Test that consecutive links pointing to the same address are merged.
:author: Shay Hill
:created: 3/17/2021
Such links will look like this (after removing proofErr, rsid, and other noise).
<w:p>
<w:hyperlink r:id="rId7"> <!-- points to http://www.shayallenhill.com -->
<w:r>
<w:t>hy</w:t>
</w:r>
</w:hyperlink>
<w:hyperlink r:id="rId8"> <!-- points to http://www.shayallenhill.com -->
<w:r>
<w:t>per</w:t>
</w:r>
</w:hyperlink>
<w:hyperlink r:id="rId9"> <!-- points to http://www.shayallenhill.com -->
<w:r>
<w:t>link</w:t>
</w:r>
</w:hyperlink>
</w:p>
Docx2python condenses these to
<w:p>
<w:hyperlink r:id="rId7"> <!-- points to http://www.shayallenhill.com -->
<w:r>
<w:t>hy</w:t>
</w:r>
<w:r>
<w:t>per</w:t>
</w:r>
<w:r>
<w:t>link</w:t>
</w:r>
</w:hyperlink>
</w:p>
Then to
<w:p>
<w:hyperlink r:id="rId7"> <!-- points to http://www.shayallenhill.com -->
<w:r>
<w:t>hyperlink</w:t>
</w:r>
</w:hyperlink>
</w:p>
This module tests the final result.
"""
from docx2python.main import docx2python
from tests.conftest import RESOURCES
class TestHyperlink:
def test_prints(self) -> None:
"""Consecutive hyperlinks referencing same target are joined"""
with docx2python(RESOURCES / "hyperlink.docx") as extraction:
assert extraction.body_runs == [
[
[
[
[
"This is a link to ",
'<a href="http://www.shayallenhill.com/">'
+ "my website</a>",
".",
]
]
]
]
]