@@ -36,11 +36,11 @@ def parse_to_record(self) -> DemonstrationRecord:
36
36
"""
37
37
boundary = self .__find_boundary ()
38
38
self .parts_dict = self .__split_file_by_boundary (boundary )
39
- self .comments = self .__get_comments (
40
- self .parts_dict ['main.htm' ]['Content' ])
41
- self .steps = self .__get_steps (self .parts_dict ['main.htm' ]['Content' ])
39
+ self .comments = self .__get_comments (self .parts_dict ["main.htm" ]["Content" ])
40
+ self .steps = self .__get_steps (self .parts_dict ["main.htm" ]["Content" ])
42
41
record = DemonstrationRecord (
43
- list (set (self .applications )), len (self .steps ), ** self .steps )
42
+ list (set (self .applications )), len (self .steps ), ** self .steps
43
+ )
44
44
45
45
return record
46
46
@@ -54,14 +54,14 @@ def __find_boundary(self) -> str:
54
54
if boundary_start != - 1 :
55
55
boundary_start += len ("boundary=" )
56
56
boundary_end = self .content .find ("\n " , boundary_start )
57
- boundary = self .content [boundary_start :boundary_end ].strip ('\ " ' )
57
+ boundary = self .content [boundary_start :boundary_end ].strip ('"' )
58
58
return boundary
59
59
else :
60
60
raise ValueError ("Boundary not found in the .mht file." )
61
61
62
62
def __split_file_by_boundary (self , boundary : str ) -> dict :
63
63
"""
64
- Split the file by the boundary into parts,
64
+ Split the file by the boundary into parts,
65
65
Store the parts in a dictionary, including the content type,
66
66
content location and content transfer encoding.
67
67
boundary: The boundary of the file.
@@ -72,27 +72,36 @@ def __split_file_by_boundary(self, boundary: str) -> dict:
72
72
for part in parts :
73
73
content_type_start = part .find ("Content-Type:" )
74
74
content_location_start = part .find ("Content-Location:" )
75
- content_transfer_encoding_start = part .find (
76
- "Content-Transfer-Encoding:" )
75
+ content_transfer_encoding_start = part .find ("Content-Transfer-Encoding:" )
77
76
part_info = {}
78
77
if content_location_start != - 1 :
79
78
content_location_end = part .find ("\n " , content_location_start )
80
- content_location = part [content_location_start :content_location_end ].split (":" )[
81
- 1 ].strip ()
79
+ content_location = (
80
+ part [content_location_start :content_location_end ]
81
+ .split (":" )[1 ]
82
+ .strip ()
83
+ )
82
84
83
85
# add the content location
84
86
if content_type_start != - 1 :
85
87
content_type_end = part .find ("\n " , content_type_start )
86
- content_type = part [content_type_start :content_type_end ].split (":" )[
87
- 1 ].strip ()
88
+ content_type = (
89
+ part [content_type_start :content_type_end ].split (":" )[1 ].strip ()
90
+ )
88
91
part_info ["Content-Type" ] = content_type
89
92
90
93
# add the content transfer encoding
91
94
if content_transfer_encoding_start != - 1 :
92
95
content_transfer_encoding_end = part .find (
93
- "\n " , content_transfer_encoding_start )
94
- content_transfer_encoding = part [content_transfer_encoding_start :content_transfer_encoding_end ].split (":" )[
95
- 1 ].strip ()
96
+ "\n " , content_transfer_encoding_start
97
+ )
98
+ content_transfer_encoding = (
99
+ part [
100
+ content_transfer_encoding_start :content_transfer_encoding_end
101
+ ]
102
+ .split (":" )[1 ]
103
+ .strip ()
104
+ )
96
105
part_info ["Content-Transfer-Encoding" ] = content_transfer_encoding
97
106
98
107
content = part [content_location_end :].strip ()
@@ -112,25 +121,30 @@ def __get_steps(self, content: str) -> dict:
112
121
"""
113
122
114
123
user_action_data = re .search (
115
- r'<UserActionData>(.*?)</UserActionData>' , content , re .DOTALL )
124
+ r"<UserActionData>(.*?)</UserActionData>" , content , re .DOTALL
125
+ )
116
126
if user_action_data :
117
127
118
128
root = ET .fromstring (user_action_data .group (1 ))
119
129
steps = {}
120
130
121
- for each_action in root .findall (' EachAction' ):
131
+ for each_action in root .findall (" EachAction" ):
122
132
123
- action_number = each_action .get ('ActionNumber' )
124
- application = each_action .get ('FileName' )
125
- description = each_action .find ('Description' ).text
126
- action = each_action .find ('Action' ).text
127
- screenshot_file_name = each_action .find (
128
- 'ScreenshotFileName' ).text
133
+ action_number = each_action .get ("ActionNumber" )
134
+ application = each_action .get ("FileName" )
135
+ description = each_action .find ("Description" ).text
136
+ action = each_action .find ("Action" ).text
137
+ screenshot_file_name = each_action .find ("ScreenshotFileName" ).text
129
138
screenshot = self .__get_screenshot (screenshot_file_name )
130
139
step_key = f"step_{ int (action_number ) - 1 } "
131
140
132
141
step = DemonstrationStep (
133
- application , description , action , screenshot , self .comments .get (step_key ))
142
+ application ,
143
+ description ,
144
+ action ,
145
+ screenshot ,
146
+ self .comments .get (step_key ),
147
+ )
134
148
steps [step_key ] = step
135
149
self .applications .append (application )
136
150
return steps
@@ -143,16 +157,21 @@ def __get_comments(self, content: str) -> dict:
143
157
content: The content of the main.htm file.
144
158
return: A dictionary of comments for each step.
145
159
"""
146
- soup = BeautifulSoup (content , ' html.parser' )
160
+ soup = BeautifulSoup (content , " html.parser" )
147
161
body = soup .body
148
- steps_html = body .find ('div' , id = 'Steps' )
149
- steps = steps_html .find_all (lambda tag : tag .name == 'div' and tag .has_attr (
150
- 'id' ) and re .match (r'^Step\d+$' , tag ['id' ]))
162
+ steps_html = body .find ("div" , id = "Steps" )
163
+ steps = steps_html .find_all (
164
+ lambda tag : tag .name == "div"
165
+ and tag .has_attr ("id" )
166
+ and re .match (r"^Step\d+$" , tag ["id" ])
167
+ )
151
168
152
169
comments = {}
153
170
for index , step in enumerate (steps ):
154
- comment_tag = step .find ('b' , text = 'Comment: ' )
155
- comments [f'step_{ index } ' ] = comment_tag .next_sibling if comment_tag else None
171
+ comment_tag = step .find ("b" , text = "Comment: " )
172
+ comments [f"step_{ index } " ] = (
173
+ comment_tag .next_sibling if comment_tag else None
174
+ )
156
175
return comments
157
176
158
177
def __get_screenshot (self , screenshot_file_name : str ) -> str :
@@ -163,11 +182,12 @@ def __get_screenshot(self, screenshot_file_name: str) -> str:
163
182
return: The screenshot in base64 string.
164
183
"""
165
184
screenshot_part = self .parts_dict [screenshot_file_name ]
166
- content = screenshot_part [' Content' ]
167
- content_type = screenshot_part [' Content-Type' ]
168
- content_transfer_encoding = screenshot_part [' Content-Transfer-Encoding' ]
185
+ content = screenshot_part [" Content" ]
186
+ content_type = screenshot_part [" Content-Type" ]
187
+ content_transfer_encoding = screenshot_part [" Content-Transfer-Encoding" ]
169
188
170
- screenshot = 'data:{type};{encoding}, {content}' .format (
171
- type = content_type , encoding = content_transfer_encoding , content = content )
189
+ screenshot = "data:{type};{encoding}, {content}" .format (
190
+ type = content_type , encoding = content_transfer_encoding , content = content
191
+ )
172
192
173
193
return screenshot
0 commit comments