Skip to content

Commit

Permalink
Update get_paper_from_pdf.py
Browse files Browse the repository at this point in the history
  • Loading branch information
WangRongsheng authored Mar 16, 2023
1 parent 5f4a61c commit 2998637
Showing 1 changed file with 2 additions and 2 deletions.
4 changes: 2 additions & 2 deletions get_paper_from_pdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def get_title(self):
max_font_size = 0 # 初始化最大字体大小为0
max_string = "" # 初始化最大字体大小对应的字符串为空
max_font_sizes = [0]
for page in doc: # 遍历每一页
for page_index, page in enumerate(doc): # 遍历每一页
text = page.get_text("dict") # 获取页面上的文本信息
blocks = text["blocks"] # 获取文本块列表
for block in blocks: # 遍历每个文本块
Expand All @@ -139,7 +139,7 @@ def get_title(self):
max_font_sizes.sort()
print("max_font_sizes", max_font_sizes[-10:])
cur_title = ''
for page in doc: # 遍历每一页
for page_index, page in enumerate(doc): # 遍历每一页
text = page.get_text("dict") # 获取页面上的文本信息
blocks = text["blocks"] # 获取文本块列表
for block in blocks: # 遍历每个文本块
Expand Down

0 comments on commit 2998637

Please sign in to comment.