forked from Sudomemo/sudomemo-locales
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfind_problems.sh
executable file
·113 lines (86 loc) · 3.98 KB
/
find_problems.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
#!/bin/bash
DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" >/dev/null 2>&1 && pwd)"
echo "# sudomemo-locales Problem Finder"
echo "Scans for problems as well as untranslated (or identical) strings"
echo
# you can pass a langcode to filter by language
# We exclude English variations from this, needs to update so we can optionally include
# e.g ./find_untranslated.sh ja_JP
if [ ! -z $1 ]; then
LANGFILTER=$1
else
LANGFILTER="."
fi
for lang in $(echo ??_?? | tr ' ' '\n' | grep $LANGFILTER); do
echo "## $lang"
echo
for domain in $(ls en_US/LC_MESSAGES/*.po | xargs -I{} basename {} .po); do
# Missing locale file?
if [ ! -f $lang/LC_MESSAGES/$domain.po ]; then
echo "Error: $lang/LC_MESSAGES/$domain.po is missing"
exit 1
fi
# Wrong text encoding?
CHECK_ENCODING=$(file $lang/LC_MESSAGES/$domain.po | grep -Ev "(UTF-8|ASCII|empty)")
if [ ! -z "$CHECK_ENCODING" ]; then
echo "Error: $lang/LC_MESSAGES/$domain.po has wrong encoding: should be detected as UTF-8, ASCII, or empty"
file $lang/LC_MESSAGES/$domain.po
exit 1
fi
# Invalid?
msgfmt --check-format $lang/LC_MESSAGES/$domain.po -o - >/dev/null
if [ $? -ne 0 ]; then
echo "Error: Check the formatting for $lang/LC_MESSAGES/$domain.po"
exit 1
fi
# Missing/extra strings?
ENG_MSGID_LIST=$(grep -Po "^msgid..\K.+?(?=\")" en_US/LC_MESSAGES/$domain.po | sort -u)
NEW_MSGID_LIST=$(grep -Po "^msgid..\K.+?(?=\")" $lang/LC_MESSAGES/$domain.po | sort -u)
COMPARE_MISSING=$(comm -23 <(echo "$ENG_MSGID_LIST") <(echo "$NEW_MSGID_LIST"))
COMPARE_EXTRA=$(comm -23 <(echo "$NEW_MSGID_LIST") <(echo "$ENG_MSGID_LIST"))
if [ ! -z "$COMPARE_MISSING" ]; then
echo "Error: $lang/LC_MESSAGES/$domain.po is missing msgid's present in en_US/LC_MESSAGES/$domain.po :"
echo "$COMPARE_MISSING"
echo "Please fix before continuing."
exit 1
fi
if [ ! -z "$COMPARE_EXTRA" ]; then
echo "Error: $lang/LC_MESSAGES/$domain.po has extra msgid's compared to en_US/LC_MESSAGES/$domain.po :"
echo "$COMPARE_EXTRA"
echo "Please fix before continuing."
exit 1
fi
# Number of lines differs?
# Disabled for now
# ENG_LINE_COUNT=$(wc -l < en_US/LC_MESSAGES/$domain.po)
# NEW_LINE_COUNT=$(wc -l < $lang/LC_MESSAGES/$domain.po)
# if [ "$ENG_LINE_COUNT" -ne "$NEW_LINE_COUNT" ]; then
# echo "Line count for $lang/LC_MESSAGES/$domain.po different from en_US/LC_MESSAGES/$domain.po"
# echo "Check for missing/extra newlines inside and at the top/bottom of the file"
# echo "en_US: $ENG_LINE_COUNT"
# echo "$lang: $NEW_LINE_COUNT"
# exit 1;
# fi;
# Untranslated strings?
SKIP_DIFF_REGEX="en_(AU|GB|US)"
if [[ $lang =~ $SKIP_DIFF_REGEX ]]; then
echo "Skipping comparison for $domain.po: $lang is an English variant"
continue
fi
# Combine multiline strings into single lines using the "multiline" marker
en_strings=$(sed ':a;N;$!ba;s/\n"/multiline/g' en_US/LC_MESSAGES/$domain.po)
lang_strings=$(sed ':a;N;$!ba;s/\n"/multiline/g' $lang/LC_MESSAGES/$domain.po)
# Compare the temp files for untranslated strings
RESULTS=$(diff --unchanged-line-format='%L' --old-line-format='' --new-line-format='' <(echo "$en_strings") <(echo "$lang_strings") | sed '/^$/d' | grep -B1 msgstr | grep -Po "^msgid..\K.+?(?=\")" | sort)
# check ignore list
if [ -f $DIR/ignores/$lang/$domain.txt ]; then
PRUNE=$(comm -23 <(echo "$RESULTS") <(sort -u $DIR/ignores/$lang/$domain.txt))
RESULTS=$PRUNE
fi
if [ ! -z "$RESULTS" ]; then
echo "$lang/LC_MESSAGES/$domain.po:"
echo "$RESULTS"
echo
fi
done
done