Skip to content

Commit

Permalink
Sentence 21 works! ContentTypeSegmenter: if new segment type is align…
Browse files Browse the repository at this point in the history
…ed, overwrite to return replaced instead (alignments would have been caught in the first round). Segment: added copy constructor, with overridable type.
  • Loading branch information
rhdekker committed Mar 28, 2018
1 parent 3d74e98 commit 3be3509
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,10 @@ public List<Segment> calculateSegmentation(EditGraphTable editTable) {
Cell currentCell = iterateTable.next();
// we change state based on type, in case of mixed that we look at the movement through the table
if (editTable.determineUniqueCellType(currentCell) != editTable.determineUniqueCellType(lastCell)) {
Segment newSegment = editTable.createSegmentOfCellsWithType(currentCell, lastCell, Segment.Type.replacement);
Segment newSegment = editTable.createSegmentOfCells(currentCell, lastCell);
if (newSegment.type == Segment.Type.aligned) {
newSegment = new Segment(newSegment, Segment.Type.replacement);
}
superwitness.add(0, newSegment);
lastCell = currentCell;
}
Expand Down
12 changes: 3 additions & 9 deletions src/main/java/prioritised_xml_collation/EditGraphTable.java
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,13 @@ else if (segmentTokensA.isEmpty()) {
}
}

Segment createSegmentOfCellsWithType(Cell currentCell, Cell lastCell, Segment.Type type) {
List<XMLToken> segmentTokensA = tokensA.subList(currentCell.x, lastCell.x);
List<XMLToken> segmentTokensB = tokensB.subList(currentCell.y, lastCell.y);
return new Segment(segmentTokensA, segmentTokensB, type);
}

private String cellToString(Cell cell) {
XMLToken tokenA = tokensA.get(cell.x - 1);
XMLToken tokenB = tokensB.get(cell.y - 1);
return tokenA+" : "+tokenB;
}

public CellType establishTypeOfCell(Cell cell){
CellType establishTypeOfCell(Cell cell){
if (cell.x == 0 && cell.y == 0) {
return CellType.root;
}
Expand All @@ -110,7 +104,7 @@ else if (markupType) {
else return CellType.mix;
}

public CellType determineTypeOfToken(XMLToken tokenA) {
private CellType determineTypeOfToken(XMLToken tokenA) {
boolean punctuationType = (tokenA.content.matches("\\W+"));
boolean contentType = (tokenA.content.matches("\\w+") && tokenA instanceof TextToken);
boolean markupType = (tokenA instanceof ElementToken);
Expand All @@ -126,7 +120,7 @@ else if (markupType) {
else return CellType.mix;
}

public CellType determineUniqueCellType(Cell cell) {
CellType determineUniqueCellType(Cell cell) {
CellType type = establishTypeOfCell(cell);
if (type == CellType.mix) {
if (cell.movedVertical()) {
Expand Down
6 changes: 6 additions & 0 deletions src/main/java/prioritised_xml_collation/Segment.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@ public Segment(Type type) {
this.type = type;
}

public Segment(Segment original, Type type) {
this.tokensWa = original.tokensWa;
this.tokensWb = original.tokensWb;
this.type = type;
}

// Factory method
public static Segment s(Type type) {
return new Segment(type);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ public void testSentence21() throws Exception {
SegmentMatcher m4 = sM(addition).tokensWb("lb", "/lb");
SegmentMatcher m5 = sM(aligned).tokensWa("vrouw");
SegmentMatcher m6 = sM(replacement).tokensWa(",").tokensWb("!");
assertThat(segments, contains(m1, m2, m3, m4, m5, m6));
SegmentMatcher m7 = sM(addition).tokensWb("/s", "s");
SegmentMatcher m8 = sM(replacement).tokensWa("de", "ongewisheid").tokensWb("Die", "dagen", "van", "nerveuze","verwachting");
SegmentMatcher m9 = sM(aligned).tokensWa("voor", "de");
SegmentMatcher m10 = sM(addition).tokensWb("lb", "/lb");
SegmentMatcher m11 = sM(aligned).tokensWa("liefelijke", "toestemming");
SegmentMatcher m12 = sM(replacement).tokensWa("!", ".");
SegmentMatcher m13 = sM(aligned).tokensWa("/s", "/div", "/body", "/text");
assertThat(segments, contains(m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13));
}
}

0 comments on commit 3be3509

Please sign in to comment.