Skip to content

Commit

Permalink
add srl training suite to rock, including samples, assets
Browse files Browse the repository at this point in the history
  • Loading branch information
jiangfeng1124 committed Jan 14, 2014
1 parent a55726e commit 29aebc7
Show file tree
Hide file tree
Showing 11 changed files with 427 additions and 28 deletions.
8 changes: 7 additions & 1 deletion test/multi_ltp_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -117,7 +117,13 @@ void multithreaded_ltp( void * args) {
}

string result;
xml4nlp.SaveDOM(result);
vector<string> words;
xml4nlp.GetWordsFromSentence(words, 0);
size_t ii = 0;
for (; ii < words.size() - 1; ++ii)
result += words[ii] + " ";
result += words[ii];
// xml4nlp.SaveDOM(result);
xml4nlp.ClearDOM();

dispatcher->output(ret, result);
Expand Down
116 changes: 116 additions & 0 deletions tools/train/conf/srl/assets/Chinese.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
<?xml version="1.0" encoding="UTF-8"?>
<head>
<language>
Chinese
</language>
<features_pred_rg>
DepRelation
HeadwordPOS
DepwordPOS
Headword
Depword
HeadwordLemma
DepwordLemma
FirstWord
LastWord
FirstPOS
LastPOS
FirstLemma
LastLemma
ConstituentPOSPattern
ChildrenPOS
ChildrenPOSNoDup
ChildrenREL
ChildrenRELNoDup
SiblingsPOS
SiblingsPOSNoDup
SiblingsREL
SiblingsRELNoDup
</features_pred_rg>
<features_pred_cl>
Predicate
PredicateLemma
PredicateBagOfWords
PredicateBagOfWordsOrdered
PredicateBagOfPOSOrdered
PredicateBagOfPOSNumbered
PredicateWindow5Bigram
PredicateChildrenPOS
PredicateChildrenPOSNoDup
PredicateChildrenREL
PredicateChildrenRELNoDup
PredicateSiblingsPOS
PredicateSiblingsPOSNoDup
PredicateSiblingsREL
PredicateSiblingsRELNoDup
HeadwordPOS
DepRelation
Headword
DepwordPOS
HeadwordLemma
PredicateWindow5BigramPOS
PredicateBagOfPOSWindow5
PredicateBagOfPOSorderedWindow5
PredicateBagOfPOSNumberedWindow5
PredicateBagOfWordsAndIsDesOfPRED
</features_pred_cl>
<features_role_cl>
DepRelation
HeadwordPOS
DepwordPOS
Headword
Depword
HeadwordLemma
DepwordLemma
FirstWord
LastWord
FirstPOS
LastPOS
FirstLemma
LastLemma
ConstituentPOSPattern
ChildrenPOS
ChildrenPOSNoDup
ChildrenREL
ChildrenRELNoDup
SiblingsPOS
SiblingsPOSNoDup
SiblingsREL
SiblingsRELNoDup
PredicateChildrenPOS
PredicateChildrenPOSNoDup
PredicateChildrenREL
PredicateChildrenRELNoDup
PredicateSiblingsPOS
PredicateSiblingsPOSNoDup
PredicateSiblingsREL
PredicateSiblingsRELNoDup
PredicateLemma
Predicate
PredicateSense
Path
UpPath
RelationPath
UpRelationPath
PathLength
UpPathLength
DownPathLength
DescendantOfPredicate
Position
PredicateFamilyship
PredicateWindow5Bigram
</features_role_cl>
<predicate>
<noun>
NN
NR
NT
</noun>
<verb>
VA
VC
VE
VV
</verb>
</predicate>
</head>
20 changes: 20 additions & 0 deletions tools/train/conf/srl/assets/srl.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
PredicateSiblingsRELNoDup
HeadwordLemma+RelationPath
UpRelationPath+HeadwordLemma
ConstituentPOSPattern+HeadwordLemma
Path+RelationPath
Predicate+PredicateFamilyship
DepwordLemma+RelationPath
DepRelation+HeadwordLemma+DepwordLemma
Path
PathLength
DepRelation
HeadwordLemma
DepwordLemma
Position
RelationPath
UpPath
PredicateLemma
LastLemma
FirstLemma
ConstituentPOSPattern
27 changes: 0 additions & 27 deletions tools/train/conf/srl/lgsrl.cfg

This file was deleted.

11 changes: 11 additions & 0 deletions tools/train/conf/srl/srl-prg.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[train-prg]
prg-train-file = sample/srl/example-train.srl
core-config = conf/srl/assets/Chinese.xml
prg-instance-file = build/srl/prg-instances.train/train.inst
prg-model-file = build/srl/prg.model
solver-type = 0 # L1-owlqn
#solver-type = 1 # L1-sgd
#solver-type = 2 # L2-lbfgs
#nheldout = 0


13 changes: 13 additions & 0 deletions tools/train/conf/srl/srl-srl.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
[train-srl]
srl-train-file = sample/srl/example-train.srl
core-config = conf/srl/assets/Chinese.xml
srl-config = conf/srl/assets/srl.cfg
srl-feature-dir = build/srl/srl-features.train
srl-instance-file = build/srl/srl-instances.train/train.inst
srl-model-file = build/srl/srl.model
solver-type = 0 # L1-owlqn
#solver-type = 1 # L1-sgd
#solver-type = 2 # L2-lbfgs
#nheldout = 0


5 changes: 5 additions & 0 deletions tools/train/conf/srl/srl-test.cnf
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[test]
test-file = sample/srl/example-test.srl
config-dir = build/srl/
output-file = sample/srl/example-test.srl.predict

58 changes: 58 additions & 0 deletions tools/train/rock.sh
Original file line number Diff line number Diff line change
Expand Up @@ -179,3 +179,61 @@ else
echo "[4.3] TRACE: Parser-o2carreras train model test is passed."
fi

#################################################
# THE SRL-PRG SESSION #
#################################################

SRL_PRG_MODEL_DIR=$BUILD_DIR/srl
SRL_PRG_MODEL_PATH=$SRL_PRG_MODEL_DIR/prg.model
SRL_PRG_INSTANCE_DIR=$SRL_PRG_MODEL_DIR/prg-instances.train

SRL_PRG_CONF_DIR=$CONF_DIR/srl
SRL_PRG_CONF_TRAIN_PATH=$SRL_PRG_CONF_DIR/srl-prg.cnf

SRL_PRG_LOG_DIR=$LOG_DIR/srl
SRL_PRG_LOG_TRAIN_PATH=$SRL_PRG_LOG_DIR/example-prg.train.log

mkdir -p $SRL_PRG_MODEL_DIR
mkdir -p $SRL_PRG_LOG_DIR
mkdir -p $SRL_PRG_INSTANCE_DIR

SRL_PRG_EXE=./lgsrl

$SRL_PRG_EXE $SRL_PRG_CONF_TRAIN_PATH >& $SRL_PRG_LOG_TRAIN_PATH

if [ ! -f $SRL_PRG_MODEL_PATH ]; then
echo "[1] ERROR: SRL model is not detected!"
else
echo "[1] TRACE: SRL train model test is passed."
fi

#################################################
# THE SRL-SRL SESSION #
#################################################

SRL_SRL_MODEL_DIR=$BUILD_DIR/srl
SRL_SRL_MODEL_PATH=$SRL_SRL_MODEL_DIR/srl.model
SRL_SRL_FEATURES_DIR=$SRL_SRL_MODEL_DIR/srl-features.train
SRL_SRL_INSTANCE_DIR=$SRL_SRL_MODEL_DIR/srl-instances.train

SRL_SRL_CONF_DIR=$CONF_DIR/srl
SRL_SRL_CONF_TRAIN_PATH=$SRL_SRL_CONF_DIR/srl-srl.cnf

SRL_SRL_LOG_DIR=$LOG_DIR/srl
SRL_SRL_LOG_TRAIN_PATH=$SRL_SRL_LOG_DIR/example-srl.train.log

mkdir -p $SRL_SRL_MODEL_DIR
mkdir -p $SRL_SRL_LOG_DIR
mkdir -p $SRL_SRL_FEATURES_DIR
mkdir -p $SRL_SRL_INSTANCE_DIR

SRL_SRL_EXE=./lgsrl

$SRL_SRL_EXE $SRL_SRL_CONF_TRAIN_PATH >& $SRL_SRL_LOG_TRAIN_PATH

if [ ! -f $SRL_SRL_MODEL_PATH ]; then
echo "[1] ERROR: SRL model is not detected!"
else
echo "[1] TRACE: SRL train model test is passed."
fi

61 changes: 61 additions & 0 deletions tools/train/sample/srl/example-heldout.srl
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
1 中国 中国 中国 NR NR _ _ 5 5 NMOD NMOD _ _ _
2 最大 最大 最大 JJ JJ _ _ 5 5 AMOD AMOD _ _ _
3 氨纶丝 氨纶丝 氨纶丝 NN NN _ _ 5 5 NMOD NMOD _ _ _
4 生产 生产 生产 NN NN _ _ 5 5 NMOD NMOD _ _ _
5 基地 基地 基地 NN NN _ _ 8 8 SBJ SBJ _ _ A1
6 在 在 在 P P _ _ 8 8 LOC LOC _ _ LOC
7 连云港 连云港 连云港 NR NR _ _ 6 6 OBJ OBJ _ _ _
8 建成 建成 建成 VV VV _ _ 0 0 ROOT ROOT Y 建成.01 _

1 新华社 新华社 新华社 NN NR _ _ 5 5 UNK UNK _ _
2 南京 南京 南京 NR NR _ _ 5 5 UNK UNK _ _
3 十二月 十二月 十二月 NT NT _ _ 5 5 UNK UNK _ _
4 四日 四日 四日 NT NT _ _ 5 5 UNK UNK _ _
5 电 电 电 NN NN _ _ 0 0 ROOT ROOT _ _

1 中国 中国 中国 NR NR _ _ 3 3 LOC LOC _ _ LOC _ _
2 最 最 最 AD AD _ _ 3 3 ADV ADV _ _ ADV _ _
3 大 大 大 VA VA _ _ 4 4 COMP COMP Y 大.01 _ _ _
4 的 的 的 DEC DEC _ _ 7 7 RELC RELC _ _ _ _ _
5 氨伦丝 氨伦丝 氨伦丝 NN NN _ _ 7 7 NMOD NMOD _ _ _ _ _
6 生产 生产 生产 NN NN _ _ 7 7 NMOD NMOD _ _ _ _ _
7 基地 基地 基地 NN NN _ _ 18 18 SBJ SBJ _ _ A0 A1 A1
8 -- -- -- PU PU _ _ 12 12 UNK UNK _ _ _ _ _
9 钟山 钟山 钟山 NR NR _ _ 12 12 NMOD NMOD _ _ _ _ _
10 氨伦 氨伦 氨伦 NN NR _ _ 12 12 NMOD NMOD _ _ _ _ _
11 有限 有限 有限 JJ JJ _ _ 12 12 AMOD AMOD _ _ _ _ _
12 公司 公司 公司 NN NN _ _ 7 7 UNK UNK _ _ _ _ _
13 , , , PU PU _ _ 18 18 UNK UNK _ _ _ _ _
14 日前 日前 日前 NT NT _ _ 18 18 TMP TMP _ _ _ TMP TMP
15 在 在 在 P P _ _ 18 18 LOC LOC _ _ _ LOC LOC
16 连云港 连云港 连云港 NR NR _ _ 17 17 NMOD NMOD _ _ _ _ _
17 开发区 开发区 开发区 NN NN _ _ 15 15 OBJ OBJ _ _ _ _ _
18 建成 建成 建成 VV VV _ _ 0 0 ROOT ROOT Y 建成.01 _ _ _
19 并 并 并 CC CC _ _ 18 18 CJTN CJTN _ _ _ _ _
20 投产 投产 投产 VV VV _ _ 19 19 CJT CJT Y 投产.01 _ _ _
21 。 。 。 PU PU _ _ 18 18 UNK UNK _ _ _ _ _

1 这 这 这 DT DT _ _ 10 10 DMOD DMOD _ _ _ _ _
2 个 个 个 M M _ _ 1 1 COMP COMP _ _ _ _ _
3 采用 采用 采用 VV VV _ _ 8 8 ADV ADV Y 采用.01 _ _ _
4 差别化 差别化 差别化 NN VV _ _ 7 7 NMOD NMOD _ _ _ _ _
5 氨伦丝 氨伦丝 氨伦丝 NN JJ _ _ 7 7 NMOD NMOD _ _ _ _ _
6 生产 生产 生产 NN NN _ _ 7 7 NMOD NMOD _ _ _ _ _
7 技术 技术 技术 NN NN _ _ 3 3 COMP COMP _ _ A1 _ _
8 改造 改造 改造 VV NN _ _ 9 9 COMP COMP Y 改造.01 _ _ _
9 的 的 的 DEC DEC _ _ 10 10 RELC RELC _ _ _ _ _
10 项目 项目 项目 NN NN _ _ 14 14 TPC TPC _ _ A0 A1 _
11 , , , PU PU _ _ 14 14 UNK UNK _ _ _ _ _
12 总 总 总 JJ JJ _ _ 13 13 AMOD AMOD _ _ _ _ _
13 投资 投资 投资 NN NN _ _ 14 14 SBJ SBJ _ _ _ _ _
14 七千万 七千万 七千万 CD CD _ _ 0 0 ROOT ROOT _ _ _ _ _
15 元 元 元 M M _ _ 14 14 COMP COMP _ _ _ _ _
16 , , , PU PU _ _ 14 14 CJTN CJTN _ _ _ _ _
17 累计 累计 累计 AD AD _ _ 19 19 ADV ADV _ _ _ _ ADV
18 年 年 年 AD AD _ _ 19 19 ADV ADV _ _ _ _ ADV
19 产 产 产 VV VV _ _ 16 16 CJT CJT Y 产.01 _ _ _
20 氨纶丝 氨纶丝 氨纶丝 NN NN _ _ 19 19 COMP COMP _ _ _ _ A1
21 一千五百 一千五百 一千五百 CD CD _ _ 19 19 EXT EXT _ _ _ _ A2
22 吨 吨 吨 M M _ _ 21 21 COMP COMP _ _ _ _ _
23 。 。 。 PU PU _ _ 14 14 UNK UNK _ _ _ _ _

47 changes: 47 additions & 0 deletions tools/train/sample/srl/example-test.srl
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
1 �й� �й� �й� NR NR _ _ 2 2 SBJ SBJ _ _ A0 _
2 ���� ���� ���� VV VV _ _ 0 0 ROOT ROOT Y ����.02 _ _
3 ��Ӫ ��Ӫ ��Ӫ JJ JJ _ _ 4 4 AMOD AMOD _ _ _ _
4 ��ҵ�� ��ҵ�� ��ҵ�� NN NN _ _ 2 2 COMP COMP _ _ A1 _
5 Ͷ�� Ͷ�� Ͷ�� VV VV _ _ 2 2 COMP COMP Y Ͷ��.01 A2 _
6 ���� ���� ���� NN NN _ _ 8 8 NMOD NMOD _ _ _ _
7 ���� ���� ���� NN NN _ _ 8 8 NMOD NMOD _ _ _ _
8 ���� ���� ���� NN NN _ _ 5 5 COMP COMP _ _ _ A1

1 �»��� �»��� �»��� NN NR _ _ 12 13 UNK UNK _ _
2 �ϲ� �ϲ� �ϲ� NR NR _ _ 12 13 UNK UNK _ _
3 ʮ���� ʮ���� ʮ���� NT NT _ _ 12 13 UNK UNK _ _
4 ���� ���� ���� NT NT _ _ 12 13 UNK UNK _ _
5 �� �� �� NN NN _ _ 12 13 UNK UNK _ _
6 �� �� �� PU PU _ _ 12 13 UNK UNK _ _
7 ���� ���� ���� NN NN _ _ 12 13 UNK UNK _ _
8 ������ ������ ������ NR NR _ _ 12 13 UNK UNK _ _
9 �� �� �� PU PU _ _ 12 8 UNK CJTN _ _
10 ��ƽ ��ƽ ��ƽ NR NR _ _ 12 9 UNK CJT _ _
11 �� �� �� PU PU _ _ 12 8 UNK cCJTN _ _
12 ���� ���� ���� NR NR _ _ 0 11 ROOT CJT _ _
13 �� �� �� PU PU _ _ 12 0 UNK ROOT _ _

1 �ش� �ش� �ش� VV VV _ _ 7 7 COMP COMP Y �ش�.01 _ _ _
2 ���� ���� ���� NN NN _ _ 3 3 NMOD NMOD _ _ _ _ _
3 ��ɽ ��ɽ ��ɽ NN NN _ _ 4 4 NMOD NMOD _ _ _ _ _
4 ®ɽ ®ɽ ®ɽ NR NR _ _ 5 5 NMOD NMOD _ _ _ _ _
5 �� �� �� NN NN _ _ 6 6 COMP COMP _ _ _ _ _
6 �� �� �� LC LC _ _ 1 1 COMP COMP _ _ A1 _ _
7 �� �� �� DEC DEG _ _ 12 12 RELC RELC _ _ _ _ _
8 �Ž� �Ž� �Ž� NR NR _ _ 12 12 NMOD NMOD _ _ _ _ _
9 ���� ���� ���� NN NN _ _ 10 10 NMOD NMOD _ _ _ _ _
10 ���� ���� ���� NN NN _ _ 12 12 NMOD NMOD _ _ _ _ _
11 ���� ���� ���� JJ JJ _ _ 12 12 AMOD AMOD _ _ _ _ _
12 ��˾ ��˾ ��˾ NN NN _ _ 15 14 SBJ SBJ _ _ A0 _ A0
13 �� �� �� PU PU _ _ 15 14 UNK UNK _ _ _ _ _
14 �� �� �� VC VC _ _ 15 0 AUX ROOT Y ��.01 _ _ _
15 ���� ���� ���� VV VV _ _ 0 14 ROOT PRD Y ����.01 _ _ _
16 �й� �й� �й� NR NR _ _ 21 21 NMOD NMOD _ _ _ _ _
17 ��� ��� ��� CD CD _ _ 21 21 DMOD DMOD _ _ _ _ _
18 �� �� �� M M _ _ 17 17 COMP COMP _ _ _ _ _
19 ��� ��� ��� JJ JJ _ _ 21 21 AMOD AMOD _ _ _ _ _
20 ˽Ӫ ˽Ӫ ˽Ӫ JJ JJ _ _ 21 21 AMOD AMOD _ _ _ _ _
21 ��ҵ ��ҵ ��ҵ NN NN _ _ 15 15 COMP COMP _ _ _ _ A1
22 �� �� �� PU PU _ _ 15 14 UNK UNK _ _ _ _ _


Loading

0 comments on commit 29aebc7

Please sign in to comment.