Skip to content

Commit

Permalink
No commit message
Browse files Browse the repository at this point in the history
  • Loading branch information
johnmccrae committed Apr 21, 2009
1 parent d885190 commit 1aec1a2
Show file tree
Hide file tree
Showing 8 changed files with 37 additions and 24 deletions.
5 changes: 3 additions & 2 deletions SRLGUI/src/srl/corpus/pre/PreSplitter.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,9 @@ public List<SrlDocument> split(Collection<Token> doc, String docName) {
List<SrlDocument> rv = new LinkedList<SrlDocument>();

for(Token t : doc) {
if(t.termLength() == 1 && t.termBuffer()[0] == PreTokenizer.SPLITTER_CHAR) {
rv.add(srlDoc);
if(t.termLength() == 1 && (t.termBuffer()[0] == PreTokenizer.SPLITTER_CHAR1 || t.termBuffer()[0] == PreTokenizer.SPLITTER_CHAR2)) {
if(srlDoc.size() != 0)
rv.add(srlDoc);
srlDoc = new SrlDocument(docName + " " + ++docNumber);
} else {
srlDoc.add(t);
Expand Down
2 changes: 1 addition & 1 deletion SRLGUI/src/srl/corpus/pre/PreTokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
public class PreTokenizer extends org.apache.lucene.analysis.Tokenizer implements PreTokenizerConstants {
boolean finished = false;

public static char SPLITTER_CHAR = '\u00b6';
public static char SPLITTER_CHAR1 = '\n', SPLITTER_CHAR2 = '\r';

/** Constructs a tokenizer for this Reader. */
public PreTokenizer(Reader reader) {
Expand Down
2 changes: 1 addition & 1 deletion SRLGUI/src/srl/corpus/pre/PreTokenizerConstants.java
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ public interface PreTokenizerConstants {
"<EOF>",
"<BEGIN_TAG>",
"<END_TAG>",
"\"\\u00b6\"",
"<SPLITTER>",
"<ANYTHING>",
"<ALPHANUM>",
"\" \"",
Expand Down
41 changes: 24 additions & 17 deletions SRLGUI/src/srl/corpus/pre/PreTokenizerTokenManager.java
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,6 @@ private int jjMoveStringLiteralDfa0_0()
{
switch(curChar)
{
case 182:
return jjStopAtPos(0, 3);
default :
return jjMoveNfa_0(0, 0);
}
Expand All @@ -53,12 +51,12 @@ private int jjMoveStringLiteralDfa0_0()
0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
};
static final long[] jjbitVec2 = {
0x0L, 0x0L, 0xffbfffffffffffffL, 0xffffffffffffffffL
0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
};
private int jjMoveNfa_0(int startState, int curPos)
{
int startsAt = 0;
jjnewStateCnt = 15;
jjnewStateCnt = 16;
int i = 1;
jjstateSet[0] = startState;
int kind = 0x7fffffff;
Expand All @@ -74,11 +72,16 @@ private int jjMoveNfa_0(int startState, int curPos)
switch(jjstateSet[--i])
{
case 0:
if ((0xfffffffeffffffffL & l) != 0L)
if ((0xfffffffeffffdbffL & l) != 0L)
{
if (kind > 4)
kind = 4;
jjCheckNAdd(14);
jjCheckNAdd(15);
}
else if ((0x2400L & l) != 0L)
{
if (kind > 3)
kind = 3;
}
if (curChar == 60)
jjstateSet[jjnewStateCnt++] = 10;
Expand Down Expand Up @@ -130,11 +133,15 @@ private int jjMoveNfa_0(int startState, int curPos)
jjstateSet[jjnewStateCnt++] = 10;
break;
case 14:
if ((0xfffffffeffffffffL & l) == 0L)
if ((0x2400L & l) != 0L)
kind = 3;
break;
case 15:
if ((0xfffffffeffffdbffL & l) == 0L)
break;
if (kind > 4)
kind = 4;
jjCheckNAdd(14);
jjCheckNAdd(15);
break;
default : break;
}
Expand All @@ -148,10 +155,10 @@ else if (curChar < 128)
switch(jjstateSet[--i])
{
case 0:
case 14:
case 15:
if (kind > 4)
kind = 4;
jjCheckNAdd(14);
jjCheckNAdd(15);
break;
case 1:
if ((0x7fffffe07fffffeL & l) != 0L)
Expand Down Expand Up @@ -189,12 +196,12 @@ else if (curChar < 128)
switch(jjstateSet[--i])
{
case 0:
case 14:
case 15:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 4)
kind = 4;
jjCheckNAdd(14);
jjCheckNAdd(15);
break;
default : break;
}
Expand All @@ -207,7 +214,7 @@ else if (curChar < 128)
kind = 0x7fffffff;
}
++curPos;
if ((i = jjnewStateCnt) == (startsAt = 15 - (jjnewStateCnt = startsAt)))
if ((i = jjnewStateCnt) == (startsAt = 16 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
Expand All @@ -231,7 +238,7 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo

/** Token literal values. */
public static final String[] jjstrLiteralImages = {
"", null, null, "\266", null, null, null, };
"", null, null, null, null, null, null, };

/** Lexer state names. */
public static final String[] lexStateNames = {
Expand All @@ -244,8 +251,8 @@ private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, lo
0x40L,
};
protected CharStream input_stream;
private final int[] jjrounds = new int[15];
private final int[] jjstateSet = new int[30];
private final int[] jjrounds = new int[16];
private final int[] jjstateSet = new int[32];
protected char curChar;
/** Constructor. */
public PreTokenizerTokenManager(CharStream stream){
Expand All @@ -270,7 +277,7 @@ private void ReInitRounds()
{
int i;
jjround = 0x80000001;
for (i = 15; i-- > 0;)
for (i = 16; i-- > 0;)
jjrounds[i] = 0x80000000;
}

Expand Down
6 changes: 3 additions & 3 deletions SRLGUI/src/srl/corpus/pre/pre_tokenizer.jj
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import srl.corpus.*;
public class PreTokenizer extends org.apache.lucene.analysis.Tokenizer {
boolean finished = false;

public static char SPLITTER_CHAR = '\u00b6';
public static char SPLITTER_CHAR1 = '\n', SPLITTER_CHAR2 = '\r';

/** Constructs a tokenizer for this Reader. */
public PreTokenizer(Reader reader) {
Expand Down Expand Up @@ -60,8 +60,8 @@ PARSER_END(PreTokenizer)
TOKEN : { // token patterns
< BEGIN_TAG : "<" (<ALPHANUM>)+ " cl=\"" (<ALPHANUM>)+ "\">">
| < END_TAG : "</" (<ALPHANUM>)+ ">" >
| < SPLITTER: "\u00b6" >
| < ANYTHING: ~[ " ", "\u00b6" ] (~[ " ", "\u00b6" ])* >
| < SPLITTER: [ "\n", "\r" ] >
| < ANYTHING: ~[ " ", "\n", "\r" ] (~[ " ", "\n", "\r" ])* >
| < #ALPHANUM : [
"0"-"9",
"A"-"Z",
Expand Down
2 changes: 2 additions & 0 deletions SRLGUI/src/srl/rule/NegativeLiteral.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ public void skip(Token token) {
public String toString() {
if(literal != null)
return "not(\"" + literal + "\")";
else if(listMatcher.set)
return "not(%" + listMatcher.wordListName + ")";
else
return "not(@" + listMatcher.wordListName + ")";
}
Expand Down
2 changes: 2 additions & 0 deletions SRLGUI/src/srl/rule/OptionalLiteral.java
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ public void skip(Token token) {
public String toString() {
if(literal != null)
return "optional(\"" + literal + "\")";
else if(listMatcher.set)
return "optional(%" + listMatcher.wordListName + ")";
else
return "optional(@" + listMatcher.wordListName + ")";
}
Expand Down
1 change: 1 addition & 0 deletions SRLGUI/src/srl/rule/Rule.java
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,7 @@ public List<HashMap<Entity, SrlMatchRegion>> getMatch(SrlDocument sentence, bool
lookBackStack.removeLast();
continue MAIN;
}
lookBackStack.add(tk2);
}
while(lookBackStack.size() > i && !lookBackStack.isEmpty())
lookBackStack.removeLast();
Expand Down

0 comments on commit 1aec1a2

Please sign in to comment.