Skip to content

Commit

Permalink
实现了用户自定义词性,同时支持代码动态增加和词典文件增加用户词性
Browse files Browse the repository at this point in the history
  • Loading branch information
hankcs committed Mar 26, 2016
1 parent c81495f commit a1ef70f
Show file tree
Hide file tree
Showing 13 changed files with 815 additions and 20 deletions.
9 changes: 4 additions & 5 deletions src/main/java/com/hankcs/hanlp/corpus/io/ByteArray.java
Original file line number Diff line number Diff line change
Expand Up @@ -105,13 +105,12 @@ public boolean hasMore()
*/
public String nextString()
{
StringBuilder sb = new StringBuilder();
int length = nextInt();
for (int i = 0; i < length; ++i)
char[] buffer = new char[nextInt()];
for (int i = 0; i < buffer.length; ++i)
{
sb.append(nextChar());
buffer[i] = nextChar();
}
return sb.toString();
return new String(buffer);
}

public float nextFloat()
Expand Down
10 changes: 10 additions & 0 deletions src/main/java/com/hankcs/hanlp/corpus/io/IOUtil.java
Original file line number Diff line number Diff line change
Expand Up @@ -477,4 +477,14 @@ public static void loadDictionary(BufferedReader br, TreeMap<String, CoreDiction
}
br.close();
}

public static void writeCustomNature(DataOutputStream out, LinkedHashSet<Nature> customNatureCollector) throws IOException
{
if (customNatureCollector.size() == 0) return;
out.writeInt(-customNatureCollector.size());
for (Nature nature : customNatureCollector)
{
TextUtility.writeString(nature.toString(), out);
}
}
}
21 changes: 20 additions & 1 deletion src/main/java/com/hankcs/hanlp/corpus/tag/Nature.java
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
*/
package com.hankcs.hanlp.corpus.tag;

import com.hankcs.hanlp.corpus.util.CustomNatureUtility;

/**
* 词性
*
Expand Down Expand Up @@ -810,4 +812,21 @@ public static Nature fromString(String name)
return null;
}
}
}

/**
* 创建自定义词性,如果已有该对应词性,则直接返回已有的词性
* @param name 字符串词性
* @return Enum词性
*/
public static Nature create(String name)
{
try
{
return Nature.valueOf(name);
}
catch (Exception e)
{
return CustomNatureUtility.addNature(name);
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* <summary></summary>
* <author>He Han</author>
* <email>[email protected]</email>
* <create-date>2016/1/4 16:02</create-date>
*
* <copyright file="CustomNatureUtility.java" company="码农场">
* Copyright (c) 2008-2016, 码农场. All Right Reserved, http://www.hankcs.com/
* This source is subject to Hankcs. Please contact Hankcs to get more information.
* </copyright>
*/
package com.hankcs.hanlp.corpus.util;

import com.hankcs.hanlp.corpus.tag.Nature;
import com.hankcs.hanlp.dictionary.CustomDictionary;
import com.hankcs.hanlp.recognition.nr.PersonRecognition;
import com.hankcs.hanlp.recognition.nt.OrganizationRecognition;
import com.hankcs.hanlp.seg.common.Vertex;
import static com.hankcs.hanlp.utility.Predefine.logger;

/**
* 运行时动态增加词性工具
*
* @author hankcs
*/
public class CustomNatureUtility
{
static
{
logger.warning("已激活自定义词性功能,由于采用了反射技术,用户需对本地环境的兼容性和稳定性负责!\n" +
"如果用户代码X.java中有switch(nature)语句,需要调用CustomNatureUtility.registerSwitchClass(X.class)注册X这个类");
}

/**
* 动态增加词性工具
*/
private static EnumBuster<Nature> enumBuster = new EnumBuster<Nature>(Nature.class,
CustomDictionary.class,
Vertex.class,
PersonRecognition.class,
OrganizationRecognition.class);

/**
* 增加词性
* @param name 词性名称
* @return 词性
*/
public static Nature addNature(String name)
{
Nature customNature = enumBuster.make(name);
enumBuster.addByValue(customNature);

return customNature;
}

/**
* 注册switch(nature)语句类
* @param switchUsers 任何使用了switch(nature)语句的类
*/
public static void registerSwitchClass(Class... switchUsers)
{
enumBuster.registerSwitchClass(switchUsers);
}

/**
* 还原对词性的全部修改
*/
public static void restore()
{
enumBuster.restore();
}
}
Loading

0 comments on commit a1ef70f

Please sign in to comment.