forked from tesseract-ocr/tesseract
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrunalltests.sh
executable file
·134 lines (129 loc) · 4.05 KB
/
runalltests.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
#!/bin/bash
# File: runalltests.sh
# Description: Script to run a set of UNLV test sets.
# Author: Ray Smith
# Created: Thu Jun 14 08:21:01 PDT 2007
#
# (C) Copyright 2007, Google Inc.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if [ $# -ne 2 ]
then
echo "Usage:$0 unlv-data-dir version-id"
exit 1
fi
if [ ! -d api ]
then
echo "Run $0 from the tesseract-ocr root directory!"
exit 1
fi
if [ ! -r api/tesseract -a ! -r tesseract.exe ]
then
echo "Please build tesseract before running $0"
exit 1
fi
if [ ! -r testing/unlv/accuracy -a ! -r testing/unlv/accuracy.exe ]
then
echo "Please download the UNLV accuracy tools (and build) to testing/unlv"
exit 1
fi
#deltapc new old calculates the %change from old to new
deltapc() {
awk ' BEGIN {
printf("%.2f", 100.0*('$1'-'$2')/'$2');
}'
}
#timesum computes the total cpu time
timesum() {
awk ' BEGIN {
total = 0.0;
}
{
total += $2;
}
END {
printf("%.2f\n", total);
}' $1
}
imdir="$1"
vid="$2"
bindir=${0%/*}
if [ "$bindir" = "$0" ]
then
bindir="./"
fi
rdir=testing/reports
testsets="bus.3B doe3.3B mag.3B news.3B"
totalerrs=0
totalwerrs=0
totalnswerrs=0
totalolderrs=0
totaloldwerrs=0
totaloldnswerrs=0
for set in $testsets
do
if [ -r $imdir/$set/pages ]
then
# Run tesseract on all the pages.
$bindir/runtestset.sh $imdir/$set/pages
# Count the errors on all the pages.
$bindir/counttestset.sh $imdir/$set/pages
# Get the old character word and nonstop word errors.
olderrs=`cat testing/reports/1995.$set.sum | cut -f3`
oldwerrs=`cat testing/reports/1995.$set.sum | cut -f6`
oldnswerrs=`cat testing/reports/1995.$set.sum | cut -f9`
# Get the new character word and nonstop word errors and accuracy.
cherrs=`head -4 testing/reports/$set.characc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
chacc=`head -5 testing/reports/$set.characc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
wderrs=`head -4 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
wdacc=`head -5 testing/reports/$set.wordacc |tail -1 |cut -c1-9 |
tr -d '[:blank:]'`
nswderrs=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
cut -c10-17 |tr -d '[:blank:]'`
nswdacc=`grep Total testing/reports/$set.wordacc |head -2 |tail -1 |
cut -c19-26 |tr -d '[:blank:]'`
# Compute the percent change.
chdelta=`deltapc $cherrs $olderrs`
wdelta=`deltapc $wderrs $oldwerrs`
nswdelta=`deltapc $nswderrs $oldnswerrs`
sumfile=$rdir/$vid.$set.sum
if [ -r testing/reports/$set.times ]
then
total_time=`timesum testing/reports/$set.times`
if [ -r testing/reports/prev/$set.times ]
then
paste testing/reports/prev/$set.times testing/reports/$set.times |
awk '{ printf("%s %.2f\n", $1, $4-$2); }' |sort -k2n >testing/reports/$set.timedelta
fi
else
total_time='0.0'
fi
echo "$vid $set $cherrs $chacc $chdelta% $wderrs $wdacc\
$wdelta% $nswderrs $nswdacc $nswdelta% ${total_time}s" >$sumfile
# Sum totals over all the testsets.
let totalerrs=totalerrs+cherrs
let totalwerrs=totalwerrs+wderrs
let totalnswerrs=totalnswerrs+nswderrs
let totalolderrs=totalolderrs+olderrs
let totaloldwerrs=totaloldwerrs+oldwerrs
let totaloldnswerrs=totaloldnswerrs+oldnswerrs
fi
done
# Compute grand total percent change.
chdelta=`deltapc $totalerrs $totalolderrs`
wdelta=`deltapc $totalwerrs $totaloldwerrs`
nswdelta=`deltapc $totalnswerrs $totaloldnswerrs `
tfile=$rdir/$vid.total.sum
echo "$vid Total $totalerrs - $chdelta% $totalwerrs\
- $wdelta% $totalnswerrs - $nswdelta%" >$tfile
cat $rdir/1995.*.sum $rdir/$vid.*.sum >$rdir/$vid.summary