forked from ramanathanlab/pdfwf
-
Notifications
You must be signed in to change notification settings - Fork 0
/
tesseract_test.yaml
31 lines (28 loc) · 1.06 KB
/
tesseract_test.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# The directory containing the pdfs to convert
pdf_dir: /lus/eagle/projects/argonne_tpc/siebenschuh/aurora_gpt/small-pdf-dataset/
# The directory to place the converted pdfs in
out_dir: /eagle/projects/argonne_tpc/siebenschuh/aurora_gpt/output/tesseract/
# The settings for the pdf parser
parser_settings:
# The name of the parser to use
name: tesseract
# Image resolution (DPI)
dpi: 50
# Language
lang: eng
# The compute settings for the workflow
compute_settings:
# The name of the compute platform to use
name: polaris
# The number of compute nodes to use
num_nodes: 2
# Make sure to update the path to your conda environment and HF cache
worker_init: "module use /soft/modulefiles; module load conda/2024-04-29; conda activate adaparse"
# The scheduler options to use when submitting jobs
scheduler_options: "#PBS -l filesystems=home:eagle"
# Make sure to change the account to the account you want to charge
account: argonne_tpc
# The HPC queue to submit to
queue: debug
# The amount of time to request for your job
walltime: 00:30:00