-
Notifications
You must be signed in to change notification settings - Fork 0
/
EditDistance.cc
105 lines (91 loc) · 2.11 KB
/
EditDistance.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
///
/// @file EditDistance.cc
/// @author sgzed([email protected])
/// @date 2018-03-27 17:16:15
///
#include "EditDistance.h"
//获取一个字节高位开头为1的个数
size_t nBytesCode(const char ch)
{
if(ch&(1<<7))
{
int nBytes = 1;
for(int idx=0;idx!=6;++idx)
{
if(ch&(1<<(6-idx)))
++nBytes;
else
break;
}
cout << nBytes << endl;
return nBytes;
}
return 1;
}
size_t length(const string& str)
{
size_t ilen = 0;
for(size_t idx=0;idx!=str.size();++idx)
{
int nBytes = nBytesCode(str[idx]);
idx+=(nBytes-1);
++ilen;
}
return ilen;
}
size_t min(size_t a,size_t b,size_t c)
{
size_t tmp = a<b?a:b;
size_t ret = c<tmp?c:tmp;
// cout << ret;
return ret;
}
size_t editDistance(const string& lhs,const string& rhs)
{
size_t lhs_len = length(lhs);
size_t rhs_len = length(rhs);
size_t editDist[lhs_len+1][rhs_len+1];
for(size_t idx = 0;idx<=lhs_len;++idx)
editDist[idx][0] = idx;
for(size_t idx = 0;idx<=rhs_len;++idx)
editDist[0][idx] = idx;
//for(size_t row=1; row<=lhs_len ;++row)
//{
// for(size_t col=1;col<=rhs_len;++col)
// {
// int cost = lhs[row-1]==rhs[col-1]?0:1;
// int deletion = editDist[row-1][col]+1;
// int insertion = editDist[row][col-1]+1;
// int substitution = editDist[row-1][col-1]+cost;
//
// editDist[row][col] = min(deletion,insertion,substitution);
//// cout << editDist[row][col] << " ";
// }
// cout << endl;
// }
string sublhs,subrhs;
for(std::size_t dist_i = 1, lhs_idx = 0; dist_i <= lhs_len; ++dist_i, ++lhs_idx)
{
size_t nBytes = nBytesCode(lhs[lhs_idx]);
sublhs = lhs.substr(lhs_idx, nBytes);
lhs_idx += (nBytes - 1);
for(std::size_t dist_j = 1, rhs_idx = 0; dist_j <= rhs_len; ++dist_j, ++rhs_idx)
{
nBytes = nBytesCode(rhs[rhs_idx]);
subrhs = rhs.substr(rhs_idx, nBytes);
rhs_idx += (nBytes - 1);
if(sublhs == subrhs)
{
editDist[dist_i][dist_j] = editDist[dist_i - 1][dist_j - 1];
}
else
{
editDist[dist_i][dist_j] = min(
editDist[dist_i][dist_j-1]+1,
editDist[dist_i-1][dist_j]+1,
editDist[dist_i-1][dist_j-1]+1);
}
}
}
return editDist[lhs_len][rhs_len];
}