Skip to content

Commit

Permalink
add KStr.Levenshtein
Browse files Browse the repository at this point in the history
  • Loading branch information
kakuilan committed May 16, 2021
1 parent ba430ea commit 4c02daf
Show file tree
Hide file tree
Showing 3 changed files with 67 additions and 0 deletions.
41 changes: 41 additions & 0 deletions string.go
Original file line number Diff line number Diff line change
Expand Up @@ -1762,3 +1762,44 @@ func (ks *LkkString) DBC2SBC(s string) string {
func (ks *LkkString) SBC2DBC(s string) string {
return width.Narrow.String(s)
}

// Levenshtein 计算两个字符串之间的编辑距离,返回值越小字符串越相似.
// 注意字符串最大长度为255.
func (ks *LkkString) Levenshtein(a, b string) int {
la := len(a)
lb := len(b)

if a == b {
return 0
} else if la > 255 || lb > 255 {
return -1
}

d := make([]int, la+1)
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if (a)[j-1] == (b)[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
d[j] = min
lastdiag = olddiag
}
}
return d[la]
}
23 changes: 23 additions & 0 deletions string_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2866,3 +2866,26 @@ func BenchmarkString_SBC2DBC(b *testing.B) {
KStr.SBC2DBC(helloWidth)
}
}

func TestString_Levenshtein(t *testing.T) {
var res int

res = KStr.Levenshtein(helloEng, strHello)
assert.Greater(t, res, 0)

res = KStr.Levenshtein(helloEng, helloEngICase)
assert.Greater(t, res, 0)

res = KStr.Levenshtein(strHello, strHello)
assert.Equal(t, res, 0)

res = KStr.Levenshtein(strHello, tesHtmlDoc)
assert.Equal(t, res, -1)
}

func BenchmarkString_Levenshtein(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
KStr.Levenshtein(helloEng, helloEngICase)
}
}
3 changes: 3 additions & 0 deletions testdata.go
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,9 @@ var tesStr35 = "Hi jac. $a=3*5, (can you hear me?)"
var tesStr36 = "A 'quote' is <b>bold</b>"
var tesStr37 = "A &#39;quote&#39; is &lt;b&gt;bold&lt;/b&gt;"
var tesStr38 = "The quick brown fox jumped over the lazy dog"
var tesStr39 = "中国"
var tesStr40 = "中华人民共和国"
var tesStr41 = "中华"

//中文名
var tesChineseName1 = "李四"
Expand Down

0 comments on commit 4c02daf

Please sign in to comment.