Skip to content

Commit

Permalink
初步实现了地址识别
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Mar 12, 2015
1 parent 8c6ce7f commit 9d40c62
Showing 1 changed file with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions src/main/java/com/hankcs/hanlp/HanLP.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@

import com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence;
import com.hankcs.hanlp.dependency.MaxEntDependencyParser;
import com.hankcs.hanlp.dictionary.address.AddressDictionary;
import com.hankcs.hanlp.dictionary.py.Pinyin;
import com.hankcs.hanlp.dictionary.py.PinyinDictionary;
import com.hankcs.hanlp.dictionary.ts.SimplifiedChineseDictionary;
Expand All @@ -22,13 +23,15 @@
import com.hankcs.hanlp.seg.Segment;
import com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment;
import com.hankcs.hanlp.seg.Viterbi.ViterbiSegment;
import com.hankcs.hanlp.seg.common.AddressTerm;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.summary.TextRankKeyword;
import com.hankcs.hanlp.summary.TextRankSentence;
import com.hankcs.hanlp.tokenizer.StandardTokenizer;

import java.io.File;
import java.io.InputStreamReader;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
Expand Down Expand Up @@ -205,6 +208,8 @@ public static final class Config
MaxEntModelPath = root + p.getProperty("MaxEntModelPath", MaxEntModelPath);
CRFSegmentModelPath = root + p.getProperty("CRFSegmentModelPath", CRFSegmentModelPath);
CRFDependencyModelPath = root + p.getProperty("CRFDependencyModelPath", CRFDependencyModelPath);
AddressRoleDictionaryPath = root + p.getProperty("AddressRoleDictionaryPath", AddressRoleDictionaryPath);
AddressExamplePath = root + p.getProperty("AddressExamplePath", AddressExamplePath);
}
catch (Exception e)
{
Expand Down Expand Up @@ -397,6 +402,17 @@ public static List<String> extractKeyword(String document, int size)
return TextRankKeyword.getKeywordList(document, size);
}

/**
* 地址提取接口
*
* @param text 文本
* @return 地址列表
*/
public static LinkedList<AddressTerm> extractAddress(String text)
{
return AddressDictionary.extractAddress(text.toCharArray());
}

/**
* 自动摘要
* @param document 目标文档
Expand Down

0 comments on commit 9d40c62

Please sign in to comment.