Skip to content

Commit

Permalink
优化com/hankcs/hanlp/summary/TextRankSentence.java
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Nov 3, 2015
1 parent d394b14 commit f46b0b2
Showing 1 changed file with 19 additions and 23 deletions.
42 changes: 19 additions & 23 deletions src/main/java/com/hankcs/hanlp/summary/TextRankSentence.java
Original file line number Diff line number Diff line change
Expand Up @@ -184,15 +184,13 @@ static List<String> spiltSentence(String document)
}

/**
* 一句话调用接口
* @param document 目标文档
* @param size 需要的关键句的个数
* @return 关键句列表
* 将句子列表转化为文档
* @param sentenceList
* @return
*/
public static List<String> getTopSentenceList(String document, int size)
private static List<List<String>> convertSentenceListToDocument(List<String> sentenceList)
{
List<String> sentenceList = spiltSentence(document);
List<List<String>> docs = new ArrayList<List<String>>();
List<List<String>> docs = new ArrayList<List<String>>(sentenceList.size());
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
Expand All @@ -205,8 +203,20 @@ public static List<String> getTopSentenceList(String document, int size)
}
}
docs.add(wordList);
// System.out.println(wordList);
}
return docs;
}

/**
* 一句话调用接口
* @param document 目标文档
* @param size 需要的关键句的个数
* @return 关键句列表
*/
public static List<String> getTopSentenceList(String document, int size)
{
List<String> sentenceList = spiltSentence(document);
List<List<String>> docs = convertSentenceListToDocument(sentenceList);
TextRankSentence textRank = new TextRankSentence(docs);
int[] topSentence = textRank.getTopSentence(size);
List<String> resultList = new LinkedList<String>();
Expand All @@ -231,21 +241,7 @@ public static String getSummary(String document, int max_length)
int document_length = document.length();
int sentence_length_avg = document_length/sentence_count;
int size = max_length/sentence_length_avg + 1;
List<List<String>> docs = new ArrayList<List<String>>();
for (String sentence : sentenceList)
{
List<Term> termList = StandardTokenizer.segment(sentence.toCharArray());
List<String> wordList = new LinkedList<String>();
for (Term term : termList)
{
if (CoreStopWordDictionary.shouldInclude(term))
{
wordList.add(term.word);
}
}
docs.add(wordList);
}

List<List<String>> docs = convertSentenceListToDocument(sentenceList);
TextRankSentence textRank = new TextRankSentence(docs);
int[] topSentence = textRank.getTopSentence(size);
List<String> resultList = new LinkedList<String>();
Expand Down

0 comments on commit f46b0b2

Please sign in to comment.