Skip to content

Commit

Permalink
fixed issue hankcs#22
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed May 27, 2015
1 parent 90cd0a5 commit d56c9b6
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 10 deletions.
22 changes: 12 additions & 10 deletions src/main/java/com/hankcs/hanlp/collection/trie/DoubleArrayTrie.java
Original file line number Diff line number Diff line change
Expand Up @@ -1108,25 +1108,27 @@ public Searcher(int offset, char[] charArray)

public boolean next()
{
if (i == arrayLength)
{
++begin;
i = begin;
last = base[0];
}
int b = last;
int n;
int p;

for (; i < arrayLength; ++i)
for (; ; ++i)
{
p = b + (int) (charArray[i]) + 1; // 状态转移 p = base[char[i-1]] + char[i] + 1
if (i == arrayLength) // 指针到头了,将起点往前挪一个,重新开始,状态归零
{
++begin;
if (begin == arrayLength) break;
i = begin;
b = base[0];
}
p = b + (int) (charArray[i]) + 1; // 状态转移 p = base[char[i-1]] + char[i] + 1
if (b == check[p]) // base[char[i-1]] == check[base[char[i-1]] + char[i] + 1]
b = base[p];
b = base[p]; // 转移成功
else
{
i = begin;
i = begin; // 转移失败,也将起点往前挪一个,重新开始,状态归零
++begin;
if (begin == arrayLength) break;
b = base[0];
continue;
}
Expand Down
13 changes: 13 additions & 0 deletions src/test/java/com/hankcs/test/seg/TestSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.dictionary.CoreBiGramTableDictionary;
import com.hankcs.hanlp.dictionary.CoreDictionary;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.dictionary.other.CharTable;
import com.hankcs.hanlp.dictionary.other.CharType;
Expand Down Expand Up @@ -278,4 +279,16 @@ public void testIssuse17() throws Exception
HanLP.Config.Normalization = true;
System.out.println(StandardTokenizer.segment("号 "));
}

public void testIssue22() throws Exception
{
CoreDictionary.Attribute attribute = CoreDictionary.get("年");
System.out.println(attribute);
List<Term> termList = StandardTokenizer.segment("三年");
System.out.println(termList);
assertEquals(attribute.nature[0], termList.get(1).nature);
System.out.println(StandardTokenizer.segment("三元"));
StandardTokenizer.SEGMENT.enableNumberQuantifierRecognize(true);
System.out.println(StandardTokenizer.segment("三年"));
}
}

0 comments on commit d56c9b6

Please sign in to comment.