forked from camelot-dev/excalibur
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.html
305 lines (277 loc) · 15.2 KB
/
index.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width, initial-scale=1, shrink-to-fit=no">
<meta name="description" content="Excalibur, PDF, Parsing, Table, Extraction, OCR">
<meta name="author" content="Vinayak, Mehta, Nikhil, Sikka">
<title>Excalibur | PDF Table Extraction for Humans</title>
<!-- Bootstrap core CSS -->
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/css/bootstrap.min.css" integrity="sha256-eSi1q2PG6J7g7ib17yAaWMcrr5GrtohYChqibrV7PBE=" crossorigin="anonymous" />
<!-- Custom fonts for this template -->
<link rel="stylesheet" href="https://use.fontawesome.com/releases/v5.5.0/css/all.css" integrity="sha384-B4dIYHKNBt8Bc12p+WXckhzcICo0wtJAoU8YZTY5qE0Id1GSseTk6S+L3BlXeVIU" crossorigin="anonymous">
<link href="https://fonts.googleapis.com/css?family=Montserrat:400,700" rel="stylesheet" type="text/css">
<link href="https://fonts.googleapis.com/css?family=Lato:400,700,400italic,700italic" rel="stylesheet" type="text/css">
<!-- Custom styles for this template -->
<link href="css/index.min.css" rel="stylesheet">
<!-- Start of Async Drift Code -->
<script>
"use strict";
!function() {
var t = window.driftt = window.drift = window.driftt || [];
if (!t.init) {
if (t.invoked) return void (window.console && console.error && console.error("Drift snippet included twice."));
t.invoked = !0, t.methods = [ "identify", "config", "track", "reset", "debug", "show", "ping", "page", "hide", "off", "on" ],
t.factory = function(e) {
return function() {
var n = Array.prototype.slice.call(arguments);
return n.unshift(e), t.push(n), t;
};
}, t.methods.forEach(function(e) {
t[e] = t.factory(e);
}), t.load = function(t) {
var e = 3e5, n = Math.ceil(new Date() / e) * e, o = document.createElement("script");
o.type = "text/javascript", o.async = !0, o.crossorigin = "anonymous", o.src = "https://js.driftt.com/include/" + n + "/" + t + ".js";
var i = document.getElementsByTagName("script")[0];
i.parentNode.insertBefore(o, i);
};
}
}();
drift.SNIPPET_VERSION = '0.3.1';
drift.load('mkxyurrr8tf3');
</script>
<!-- End of Async Drift Code -->
</head>
<body id="page-top">
<!-- Navigation -->
<nav class="navbar navbar-expand-lg bg-secondary fixed-top text-uppercase" id="mainNav">
<div class="container">
<a class="navbar-brand js-scroll-trigger" href="#page-top">Excalibur</a>
<button class="navbar-toggler navbar-toggler-right text-uppercase bg-primary text-white rounded" type="button" data-toggle="collapse" data-target="#navbarResponsive" aria-controls="navbarResponsive" aria-expanded="false" aria-label="Toggle navigation">
Menu
<i class="fas fa-bars"></i>
</button>
<div class="collapse navbar-collapse" id="navbarResponsive">
<ul class="navbar-nav ml-auto">
<li class="nav-item mx-0 mx-lg-1">
<a class="nav-link py-3 px-0 px-lg-3 rounded js-scroll-trigger" href="#about">About</a>
</li>
<li class="nav-item mx-0 mx-lg-1">
<a class="nav-link py-3 px-0 px-lg-3 rounded js-scroll-trigger" href="#usage">Usage</a>
</li>
<li class="nav-item mx-0 mx-lg-1">
<a class="nav-link py-3 px-0 px-lg-3 rounded js-scroll-trigger" href="#contact">Contact</a>
</li>
</ul>
</div>
</div>
</nav>
<!-- Header -->
<header class="masthead bg-primary text-white">
<div class="container">
<div class="row">
<div class="col-md-6 col-sm-12 col-xs-12 text-center">
<img src="https://excalibur-py.readthedocs.io/en/master/_static/excalibur-logo-circle.png" class="rounded-circle img-fluid mb-5 d-block mx-auto"
width="150" height="auto" alt="Excalibur">
<hr class="star-light">
<h1 class="text-uppercase mb-1 lead">Excalibur</h1>
<h4 class="mb-4 mt-3">Extract tables from PDFs into CSVs</h4>
<a class="github-button" href="https://github.com/camelot-dev/excalibur" data-size="large" data-show-count="true" aria-label="Star camelot-dev/excalibur on GitHub">Star</a>
</div>
<div class="col-md-5 offset-md-1 offset-sm-0 col-sm-12 col-xs-12 mt-4">
<div class="text-center">
<h5 class="mb-1">Available for Windows, Mac and Linux</h5>
<p class="lead">Excalibur can be easily installed using pip.</p>
<p class="package-header__pip-instructions">
<span id="pip-command">pip install excalibur-py</span>
<button class="-js-copy-pip-command tooltipped tooltipped-s" data-clipboard-target="#pip-command" aria-label="Copy to clipboard" data-original-label="Copy to clipboard"><i class="fa fa-copy" aria-hidden="true"></i></button>
</p>
</div>
<div class="text-center">
<p class="lead">Or run directly with the executable!</p>
<a class="btn btn-secondary btn-xl" href="https://github.com/camelot-dev/excalibur/releases" target="_blank">
<i class="fas fa-download mr-2"></i>
Download Now!
</a>
</div>
</div>
</div>
</div>
</header>
<!-- About Section -->
<section class="text-secondary mb-0" id="about">
<div class="container-fluid">
<h2 class="text-center text-uppercase text-secondary">About</h2>
<hr class="star-dark mb-5">
</div>
<div class="container">
<div class="row">
<div class="col-lg-6">
<div class="media mt-4">
<h3><i class="fas fa-file-pdf text-accent mr-3"></i></h3>
<div class="media-body">
<h5 class="mb-1 text-accent">The Portable Document Format</h5>
<p class="lead text-helper">A PDF file defines instructions to place characters at precise <strong class="font-weight-bold">x,y</strong> coordinates relative to the bottom-left corner of the page. Words are simulated by placing some characters closer than others. Spaces are simulated by placing words relatively far apart. And finally tables are simulated by placing words as they would appear in a spreadsheet. The format has no internal representation of a table structure.</p>
</div>
</div>
</div>
<div class="col-lg-6">
<div class="media mt-4">
<h3><i class="fa fa-table text-accent mr-3"></i></h3>
<div class="media-body">
<h5 class="mb-1 text-accent">Extracting tables from PDFs is hard</h5>
<p class="lead text-helper">The Portable Document Format was not designed for tabular data. Sadly, a lot of open data is shared as PDFs and getting tables out for analysis is a pain. A simple copy-and-paste doesn't work. <strong class="font-weight-bold">Excalibur makes PDF table extraction very easy</strong>, by automatically detecting tables in PDFs and letting you save them into CSVs and Excel files through a web interface.</p>
</div>
</div>
</div>
<div class="col-lg-6">
<div class="media mt-4">
<h3><i class="fa fa-wrench text-accent mr-3"></i></h3>
<div class="media-body">
<h5 class="mb-1 text-accent">Why another tool?</h5>
<p class="lead text-helper">There are both open and closed-source tools that are widely used for PDF table extraction. They either give a nice output or fail miserably. Excalibur is powered by <a href="https://camelot-py.readthedocs.io">Camelot</a> which gives users additional settings to tweak table extraction and get the best results. You can see how it performs better than other open-source tools and libraries <a href="https://github.com/socialcopsdev/camelot/wiki/Comparison-with-other-PDF-Table-Extraction-libraries-and-tools" target="_blank">in this comparison</a>.</p>
</div>
</div>
</div>
<div class="col-lg-6">
<div class="media mt-4">
<h3><i class="fa fa-rocket text-accent mr-3"></i></h3>
<div class="media-body">
<h5 class="mb-1 text-accent">Secure and built for scale</h5>
<p class="lead text-helper">You get complete control over your data, since all file storage and processing happens on your own local or remote machine. Excalibur can also be configured with MySQL and <a href="http://www.celeryproject.org/" target="_blank">Celery</a> to execute table extraction jobs in a parallel and distributed manner. By default, jobs are executed sequentially.</p>
</div>
</div>
</div>
</div>
</div>
</section>
<!-- Usage Section -->
<section class="bg-primary features" id="usage">
<div class="container">
<h2 class="text-center text-uppercase text-white mb-0">Usage</h2>
<hr class="star-light mb-5">
<div class="row align-items-center mb-5 text-white">
<div class="col-md-6 order-lg-first">
<a href="#">
<img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/upload.gif" alt="">
</a>
</div>
<div class="col-md-6 order-lg-last">
<h3>Upload a PDF</h3>
<p>You can upload a PDF using the web interface. You can also interact with previous uploads.
</p>
</div>
</div>
<div class="row align-items-center mb-5 text-white">
<div class="col-md-6 order-lg-last">
<a href="#">
<img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/auto-detect.gif" alt="">
</a>
</div>
<div class="col-md-6 order-lg-first">
<h3>Autodetect tables</h3>
<p>Excalibur can automatically detect tables in your PDF.</p>
</div>
</div>
<div class="row align-items-center mb-5 text-white">
<div class="col-md-6 order-lg-first">
<a href="#">
<img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/table-and-column.gif" alt="">
</a>
</div>
<div class="col-md-6 order-lg-first">
<h3>Or draw table areas and/or column separators</h3>
<p>You can guide the tool by drawing table areas and column separators in cases where the tables are buried deep inside the text and autodetection fails.</p>
</div>
</div>
<div class="row align-items-center mb-5 text-white">
<div class="col-md-6 order-lg-last">
<a href="#">
<img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/saved-rule.gif" alt="">
</a>
</div>
<div class="col-md-6 order-lg-first">
<h3>Or load saved settings</h3>
<p>You can save table extraction settings for a PDF once, and apply them on new PDFs to extract tables with similar structures.</p>
</div>
</div>
<div class="row align-items-center mb-5 text-white">
<div class="col-md-6 order-lg-first">
<a href="#">
<img class="img-fluid rounded mb-3 mb-md-0" src="https://excalibur-py.readthedocs.io/en/master/_static/gifs/download.gif" alt="">
</a>
</div>
<div class="col-md-6 order-lg-first">
<h3>View and download data</h3>
<p>Finally, you can view the extracted tables and download them as CSVs or Excel files. Excalibur also supports JSON and HTML.</p>
</div>
</div>
</div>
</section>
<!-- Contact Section -->
<section id="contact">
<div class="container">
<h2 class="text-center text-uppercase text-secondary">Contact</h2>
<hr class="star-dark mb-5">
</div>
<div class="container">
<div class="row">
<div class="col-md-3"></div>
<div class="col-md-6">
<h4 class="lead">Do you have feedback or want us to build a new feature? Just holler!</h4>
<hr>
<form action="https://formspree.io/[email protected]" method="POST">
<label for="emailID">E-mail</label>
<input class="form-control form-control-lg mb-3" type="email" name="emailID" id="email" placeholder="[email protected]">
<label for="comment">Message</label>
<textarea class="form-control form-control-lg mb-3" name="comment" id="comment" cols="30" rows="5" placeholder="Give us feedback, tell us about your awesome use case or just say hello!"></textarea>
<div class="text-right">
<button type="submit" value="Send" class="btn btn-lg btn-block btn-accent">Submit</button>
</div>
</form>
</div>
</div>
</div>
</section>
<!-- Footer -->
<footer class="footer text-center">
<div class="container">
<div class="row">
<div class="col-md-12">
<ul class="list-inline mb-0">
<li class="list-inline-item">
<a class="btn btn-outline-light btn-social text-center rounded-circle" href="https://github.com/camelot-dev/excalibur" target="_blank">
<i class="fab fa-fw fa-github"></i>
</a>
</li>
</ul>
<a class="mt-2 d-inline-block text-white" href="https://github.com/camelot-dev/excalibur" target="_blank">View on Github</a>
</div>
</div>
</div>
</footer>
<div class="copyright py-4 text-center text-white">
<div class="container">
<small><p class="mb-2">Copyright <a href="https://github.com/camelot-dev/" target="_blank">© Camelot Dev</a> 2018</p></small>
<small><p>Made with <i class="fa fa-heart mx-1"></i> in New Delhi, India</p></small>
</div>
</div>
<!-- Scroll to Top Button (Only visible on small and extra-small screen sizes) -->
<div class="scroll-to-top d-lg-none position-fixed ">
<a class="js-scroll-trigger d-block text-center text-white rounded" href="#page-top">
<i class="fa fa-chevron-up"></i>
</a>
</div>
<!-- Bootstrap core JavaScript -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.min.js"></script>
<script src="https://cdnjs.cloudflare.com/ajax/libs/twitter-bootstrap/4.1.3/js/bootstrap.bundle.min.js"></script>
<!-- Plugin JavaScript -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery-easing/1.4.1/jquery.easing.min.js"></script>
<!-- Clipboard -->
<script src="https://cdnjs.cloudflare.com/ajax/libs/clipboard.js/2.0.0/clipboard.min.js"></script>
<!-- Place this tag in your head or just before your close body tag. -->
<script async defer src="https://buttons.github.io/buttons.js"></script>
<!-- Custom scripts for this template -->
<script src="js/main.min.js"></script>
</body>
</html>