diff --git a/string.go b/string.go index eba3452..b0be2c3 100644 --- a/string.go +++ b/string.go @@ -1762,3 +1762,44 @@ func (ks *LkkString) DBC2SBC(s string) string { func (ks *LkkString) SBC2DBC(s string) string { return width.Narrow.String(s) } + +// Levenshtein 计算两个字符串之间的编辑距离,返回值越小字符串越相似. +// 注意字符串最大长度为255. +func (ks *LkkString) Levenshtein(a, b string) int { + la := len(a) + lb := len(b) + + if a == b { + return 0 + } else if la > 255 || lb > 255 { + return -1 + } + + d := make([]int, la+1) + var lastdiag, olddiag, temp int + for i := 1; i <= la; i++ { + d[i] = i + } + for i := 1; i <= lb; i++ { + d[0] = i + lastdiag = i - 1 + for j := 1; j <= la; j++ { + olddiag = d[j] + min := d[j] + 1 + if (d[j-1] + 1) < min { + min = d[j-1] + 1 + } + if (a)[j-1] == (b)[i-1] { + temp = 0 + } else { + temp = 1 + } + if (lastdiag + temp) < min { + min = lastdiag + temp + } + d[j] = min + lastdiag = olddiag + } + } + return d[la] +} diff --git a/string_test.go b/string_test.go index 26ce893..515bfef 100644 --- a/string_test.go +++ b/string_test.go @@ -2866,3 +2866,26 @@ func BenchmarkString_SBC2DBC(b *testing.B) { KStr.SBC2DBC(helloWidth) } } + +func TestString_Levenshtein(t *testing.T) { + var res int + + res = KStr.Levenshtein(helloEng, strHello) + assert.Greater(t, res, 0) + + res = KStr.Levenshtein(helloEng, helloEngICase) + assert.Greater(t, res, 0) + + res = KStr.Levenshtein(strHello, strHello) + assert.Equal(t, res, 0) + + res = KStr.Levenshtein(strHello, tesHtmlDoc) + assert.Equal(t, res, -1) +} + +func BenchmarkString_Levenshtein(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + KStr.Levenshtein(helloEng, helloEngICase) + } +} diff --git a/testdata.go b/testdata.go index f8b281b..743cf7e 100644 --- a/testdata.go +++ b/testdata.go @@ -172,6 +172,9 @@ var tesStr35 = "Hi jac. $a=3*5, (can you hear me?)" var tesStr36 = "A 'quote' is bold" var tesStr37 = "A 'quote' is <b>bold</b>" var tesStr38 = "The quick brown fox jumped over the lazy dog" +var tesStr39 = "中国" +var tesStr40 = "中华人民共和国" +var tesStr41 = "中华" //中文名 var tesChineseName1 = "李四"