-
-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathSolution.js
99 lines (84 loc) · 3.03 KB
/
Solution.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
class Solution {
constructor (pairs) {
this.pair = pairs || []
this.score = 0.0 // absolute score
this.penalty = 0.0
}
// create a deep copy of this solution
copy () {
return new Solution(this.pair.slice(0))
}
// return true if $this totally covers the target solution
// ie. target solution is a subset of $this without any unique ranges
covers (solution) {
return solution.pair.every(p => this.pair.some(pp => pp.span.covers(p.span)))
}
// same as above but classifications must also match
coversSameClassification (solution) {
return solution.pair.every(p => this.pair.some(pp => {
return (
pp.classification.constructor.name === p.classification.constructor.name &&
pp.span.covers(p.span)
)
}))
}
computeScore (tokenizer) {
// iterate pairs to compute a score
let score = this.pair.reduce((memo, cur) => {
// use the span range if it does not have children
let range = cur.span.end - cur.span.start
// if it does have children, iterate them so that
// delimiters such as spaces are not counted in the range
if (cur.span.graph.length('child')) {
range = cur.span.graph.findAll('child').reduce((sum, child) => {
return sum + (child.end - child.start)
}, 0)
}
// total characters covered
memo.coverage += range
// confidence of match multiplied by characters covered
memo.confidence += (cur.classification.confidence * range)
return memo
},
{ coverage: 0, confidence: 0 }
)
// absolute score
// the average character score coveered divided by the total coverage
this.score = (score.confidence / score.coverage) * (score.coverage / tokenizer.coverage) * (1.0 - this.penalty)
}
// return a mask of the input for this solution
// which shows the areas covered by different types of classification
// N = housenumber, S = street, P = postcode, A = administrative, U = unit
mask (tokenizer) {
// use the original input, mask should be the same length
let body = tokenizer.span.body
let mask = Array(body.length).fill(' ')
let map = {
'venue': 'V',
'housenumber': 'N',
'street': 'S',
'postcode': 'P',
'unit': 'U',
'unit_type': 'U',
'default': 'A'
}
// scan the input letter-by-letter from left-to-right
for (let i = 0; i < body.length; i++) {
// find which fields cover this character (should only be covered by 0 or 1 field)
let coveredBy = this.pair.filter(p => p.span.start <= i && p.span.end >= i)
if (coveredBy.length) {
let label = coveredBy[0].classification.label
let code = map.hasOwnProperty(label) ? map[label] : map.default
for (let j = coveredBy[0].span.start; j < coveredBy[0].span.end; j++) {
mask[j] = code
}
// skip forward to avoid scanning the same token again
i = coveredBy[0].span.end
}
}
return mask.join('')
}
// @todo implement this
// equals(solution) {}
}
module.exports = Solution