Skip to content

Commit

Permalink
Merge pull request HIT-SCIR#159 from luoleicn/master
Browse files Browse the repository at this point in the history
修正全角空格的bug
  • Loading branch information
Oneplus committed Mar 8, 2016
2 parents d1260f2 + 532e80a commit 631006e
Showing 1 changed file with 7 additions and 1 deletion.
8 changes: 7 additions & 1 deletion src/segmentor/preprocessor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -184,10 +184,16 @@ int Preprocessor::preprocess(const std::string& sentence,
width = 1;
}
else if ((sent[i]&0xE0)==0xC0) { width = 2; }
else if ((sent[i]&0xF0)==0xE0) { width = 3; }
else if ((sent[i]&0xF0)==0xE0) {
width = 3;
if (i + 3 <= len && sent[i] == 0xffffffe3 && sent[i + 1] == 0xffffff80 && sent[i + 2] == 0xffffff80) {
is_space = true;
}
}
else if ((sent[i]&0xF8)==0xF0) { width = 4; }
else { return -1; }


if (is_space) {
left_status = HAS_SPACE_ON_LEFT;
if (chartypes.size() > 0) { chartypes.back() |= HAS_SPACE_ON_RIGHT; }
Expand Down

0 comments on commit 631006e

Please sign in to comment.