forked from cncf/devstats.archive
-
Notifications
You must be signed in to change notification settings - Fork 0
/
unicode.go
26 lines (22 loc) · 838 Bytes
/
unicode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
package gha2db
import (
"strings"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// StripUnicode strip non-unicode and control characters from a string
// From: https://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Go
func StripUnicode(str string) string {
isOk := func(r rune) bool {
return r < 32 || r >= 127
}
t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
str, _, _ = transform.String(t, str)
return str
}
// NormalizeName - clean DB string from -, /, ., " ", trim leading and trailing space, lowercase
// Normalize Unicode characters
func NormalizeName(str string) string {
r := strings.NewReplacer("-", "_", "/", "_", ".", "_", " ", "_", ",", "_", ";", "_", ":", "_", "`", "_")
return r.Replace(strings.ToLower(strings.TrimSpace(StripUnicode(str))))
}