Skip to content

Commit

Permalink
The output directory (where the output JSON and PNG files are saved) …
Browse files Browse the repository at this point in the history
…is customizable.

JSON files can be generated only for erroneus PDFs
  • Loading branch information
crux82 committed Mar 23, 2022
1 parent 762c0ca commit d792326
Showing 1 changed file with 13 additions and 5 deletions.
18 changes: 13 additions & 5 deletions aclpubcheck/formatchecker.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import pdfplumber
from tqdm import tqdm
from termcolor import colored
import os


class Error(Enum):
Expand Down Expand Up @@ -49,7 +50,7 @@ def __init__(self):
self.top_offset = 1


def format_check(self, submission, paper_type):
def format_check(self, submission, paper_type, output_dir = ".", print_only_errors = False):
"""
Return True if the paper is correct, False otherwise.
"""
Expand All @@ -63,7 +64,7 @@ def format_check(self, submission, paper_type):

# TODO: A few papers take hours to check. Consider using a timeout
self.check_page_size()
self.check_page_margin()
self.check_page_margin(output_dir)
self.check_page_num(paper_type)
self.check_font()
self.check_references()
Expand All @@ -74,7 +75,7 @@ def format_check(self, submission, paper_type):
logs_json = {}
for k, v in self.logs.items():
logs_json[str(k)] = v
json.dump(logs_json, open(output_file, 'w')) # always write a log file even if it is empty

if self.logs:
print(f"Errors. Check {output_file} for details.")

Expand All @@ -100,6 +101,9 @@ def format_check(self, submission, paper_type):
if warnings == 1:
warning_text = "warning"

if print_only_errors == False or errors >= 1:
json.dump(logs_json, open(os.path.join(output_dir,output_file), 'w')) # always write a log file even if it is empty

# display to user
print()
print("We detected {0} {1} and {2} {3} in your paper.".format(*(errors, error_text, warnings, warning_text)))
Expand All @@ -110,6 +114,9 @@ def format_check(self, submission, paper_type):
return False

else:
if print_only_errors == False:
json.dump(logs_json, open(os.path.join(output_dir,output_file), 'w'))

print(colored("All Clear!", "green"))
return True

Expand All @@ -129,7 +136,7 @@ def check_page_size(self):
self.page_errors.update(pages)


def check_page_margin(self):
def check_page_margin(self, output_dir):
""" Checks if any text or figure is in the margin of pages. """

pages_image = defaultdict(list)
Expand Down Expand Up @@ -202,7 +209,8 @@ def check_page_margin(self):
bbox = (image["x0"], image["top"], image["x1"], image["bottom"])
im.draw_rect(bbox, fill=None, stroke="red", stroke_width=5)

im.save("errors-{0}-page-{1}.png".format(*(self.number, page+1)), format="PNG")
png_file_name = "errors-{0}-page-{1}.png".format(*(self.number, page+1))
im.save(os.path.join(output_dir, png_file_name), format="PNG")
#+ "Specific text: "+str([v for k, v in pages_text.values()])]


Expand Down

0 comments on commit d792326

Please sign in to comment.