-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathdictfmt_index2suffix.in
101 lines (90 loc) · 1.64 KB
/
dictfmt_index2suffix.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#!/bin/sh
#
# By Aleksey Cheusov <[email protected]>
#
usage (){
printf "\
Converts .index file from DICTD database to the index file .suffix\n\
usage: dictfmt_index2suffix [OPTIONS] [files...]\n\
OPTIONS:\n\
--help display this screen\n\
--utf8 for creating utf8 .index file\n\
all other -X and --XXX options are passed to dictfmt -I\n\
"
}
LC_ALL=C
export LC_ALL
arg_locale=C
# Processing arguments
while [ $# != 0 ]; do
case $1 in
--help)
usage
exit 0;;
--utf8)
utf8_mode=1
args="$args $1";;
-*)
args="$args $1";;
*)
break;;
esac
shift
done
if test $BASH; then
exit_="echo \${PIPESTATUS[@]} | egrep '^0( 0)*$' >/dev/null"
else
exit_='exit $?'
fi
@AWK@ -v "utf8_mode=$utf8_mode" '
function charlen_utf8 (str){
if (str == ""){
return 0
}else if (str ~ /^[\1-\177]/){
return 1
}else if (str ~ /^[\200-\277]/){
return -1
}else if (str ~ /^[\300-\337]/){
return 2
}else if (str ~ /^[\340-\357]/){
return 3
}else if (str ~ /^[\360-\367]/){
return 4
}else if (str ~ /^[\370-\373]/){
return 5
}else if (str ~ /^[\374-\375]/){
return 6
}else{
return -1;
}
}
BEGIN {
FS = OFS = "\t"
}
{
if (!utf8_mode){
for (i = length($1); i >= 1; --i){
printf "%s", substr($1, i, 1)
}
}else{
i = 1
idx = 1
while (i < length($1)){
rest = substr($1, i)
char_len = charlen_utf8(rest)
if (char_len < 0){
print "invalid UTF-8 input: `" rest "`" > "/dev/stderr"
exit
}
inverse_char [idx] = substr($1, i, char_len)
i += char_len
++idx
}
while (idx--){
printf "%s", inverse_char [idx]
}
}
$1 = ""
print $0
}' "$@" | dictfmt -I $args | uniq
eval $exit_