Skip to content

Commit

Permalink
修复升级带来的问题:hankcs#358
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Dec 1, 2016
1 parent b60d57c commit 3bcea1c
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -3756,7 +3756,7 @@ public static void parsePattern(List<NT> ntList, List<Vertex> vertexList, final
public void hit(int begin, int end, String keyword)
{
StringBuilder sbName = new StringBuilder();
for (int i = begin; i <= end; ++i)
for (int i = begin; i < end; ++i)
{
sbName.append(wordArray[i].realWord);
}
Expand Down
27 changes: 19 additions & 8 deletions src/test/java/com/hankcs/test/seg/TestSegment.java
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ public void testIssue2() throws Exception

public void testIssue3() throws Exception
{
assertEquals(CharType.CT_DELIMITER, CharType.get('*'));;
assertEquals(CharType.CT_DELIMITER, CharType.get('*'));
System.out.println(HanLP.segment("300g*2"));
System.out.println(HanLP.segment("300g*2"));
System.out.println(HanLP.segment("鱼300克*2/组"));
Expand Down Expand Up @@ -169,16 +169,15 @@ public void testSpeedOfSecondViterbi() throws Exception
String text = "王总和小丽结婚了";
Segment segment = new ViterbiSegment().enableAllNamedEntityRecognize(false)
.enableNameRecognize(false) // 人名识别需要二次维特比,比较慢
.enableCustomDictionary(false)
;
.enableCustomDictionary(false);
System.out.println(segment.seg(text));
long start = System.currentTimeMillis();
int pressure = 1000000;
for (int i = 0; i < pressure; ++i)
{
segment.seg(text);
}
double costTime = (System.currentTimeMillis() - start) / (double)1000;
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("分词速度:%.2f字每秒", text.length() * pressure / costTime);
}

Expand Down Expand Up @@ -243,13 +242,13 @@ public void testMultiThreading() throws Exception
text = sbBigText.toString();
long start = System.currentTimeMillis();
List<Term> termList1 = segment.seg(text);
double costTime = (System.currentTimeMillis() - start) / (double)1000;
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime);

segment.enableMultithreading(4);
start = System.currentTimeMillis();
List<Term> termList2 = segment.seg(text);
costTime = (System.currentTimeMillis() - start) / (double)1000;
costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("四线程分词速度:%.2f字每秒\n", text.length() / costTime);

assertEquals(termList1.size(), termList2.size());
Expand Down Expand Up @@ -323,7 +322,7 @@ public void testIssue71() throws Exception

public void testIssue193() throws Exception
{
String[] testCase = new String[] {
String[] testCase = new String[]{
"以每台约200元的价格送到苹果售后维修中心换新机(苹果的保修基本是免费换新机)",
"可能以2500~2800元的价格回收",
"3700个益农信息社打通服务“最后一公里”",
Expand All @@ -336,7 +335,8 @@ public void testIssue193() throws Exception
"则应从排名第八的投标人开始依次递补三名投标人"
};
Segment segment = HanLP.newSegment().enableOrganizationRecognize(true).enableNumberQuantifierRecognize(true);
for (String sentence : testCase) {
for (String sentence : testCase)
{
List<Term> termList = segment.seg(sentence);
System.out.println(termList);
}
Expand Down Expand Up @@ -388,4 +388,15 @@ public void testIssue343() throws Exception
Segment segment = HanLP.newSegment().enableIndexMode(true);
System.out.println(segment.seg("1酷我音乐2酷我音乐3酷我4酷我音乐6酷7酷我音乐"));
}

public void testIssue358() throws Exception
{
HanLP.Config.enableDebug();
String text = "受约束,需要遵守心理学会所定的道德原则,所需要时须说明该实验与所能得到的知识的关系";

Segment segment = StandardTokenizer.SEGMENT.enableAllNamedEntityRecognize(false).enableCustomDictionary(false)
.enableOrganizationRecognize(true);

System.out.println(segment.seg(text));
}
}

0 comments on commit 3bcea1c

Please sign in to comment.