9
9
package aw
10
10
11
11
import (
12
+ "log"
12
13
"sort"
13
14
"strings"
15
+ "unicode"
16
+
17
+ "golang.org/x/text/transform"
18
+ "golang.org/x/text/unicode/norm"
14
19
)
15
20
16
21
// Default bonuses and penalties for fuzzy sorting. To customise
@@ -24,8 +29,15 @@ const (
24
29
DefaultLeadingLetterPenalty = - 3.0 // Penalty applied for every letter in string before first match
25
30
DefaultMaxLeadingLetterPenalty = - 9.0 // Maximum penalty for leading letters
26
31
DefaultUnmatchedLetterPenalty = - 1.0 // Penalty for every letter that doesn't match
32
+ DefaultStripDiacritics = true // Strip diacritics from sort keys if query is plain ASCII
27
33
)
28
34
35
+ var stripper transform.Transformer
36
+
37
+ func init () {
38
+ stripper = transform .Chain (norm .NFD , transform .RemoveFunc (isMn ), norm .NFC )
39
+ }
40
+
29
41
// Sortable makes the implementer fuzzy-sortable. It is a superset
30
42
// of sort.Interface (i.e. your struct must also implement sort.Interface).
31
43
type Sortable interface {
@@ -57,6 +69,7 @@ type SortOptions struct {
57
69
LeadingLetterPenalty float64 // Penalty applied for every letter in string before first match
58
70
MaxLeadingLetterPenalty float64 // Maximum penalty for leading letters
59
71
UnmatchedLetterPenalty float64 // Penalty for every letter that doesn't match
72
+ StripDiacritics bool // Strip diacritics from sort keys if query is plain ASCII
60
73
}
61
74
62
75
// NewSortOptions creates a SortOptions object with the default values.
@@ -68,6 +81,7 @@ func NewSortOptions() *SortOptions {
68
81
LeadingLetterPenalty : DefaultLeadingLetterPenalty ,
69
82
MaxLeadingLetterPenalty : DefaultMaxLeadingLetterPenalty ,
70
83
UnmatchedLetterPenalty : DefaultUnmatchedLetterPenalty ,
84
+ StripDiacritics : DefaultStripDiacritics ,
71
85
}
72
86
}
73
87
@@ -77,8 +91,10 @@ type Sorter struct {
77
91
Data Sortable
78
92
// Options contains the bonuses and penalties
79
93
Options * SortOptions
80
- // results stores the results of the fuzzy sort
81
- results []* Result
94
+
95
+ query string // Search query
96
+ stripDiacritics bool // Whether sort keys need folding
97
+ results []* Result // Results of the fuzzy sort
82
98
}
83
99
84
100
// NewSorter returns a new Sorter. If opts is nil, Sorter is initialised
@@ -116,57 +132,31 @@ func (s *Sorter) Swap(i, j int) {
116
132
117
133
// Sort sorts data against query.
118
134
func (s * Sorter ) Sort (query string ) []* Result {
119
- if s .results == nil {
120
- s .results = make ([]* Result , s .Data .Len ())
135
+ s .results = make ([]* Result , s .Data .Len ())
136
+ s .query = query
137
+ if isASCII (query ) && s .Options .StripDiacritics {
138
+ s .stripDiacritics = true
121
139
}
122
-
140
+ // Generate matches for Data, then call sort.Sort()
123
141
for i := 0 ; i < s .Data .Len (); i ++ {
124
142
key := s .Data .SortKey (i )
125
- // s.matches[i] = match
126
- // s.scores[i] = score
127
- s .results [i ] = Match (key , query , s .Options )
143
+ s .results [i ] = s .Match (key )
128
144
}
129
145
sort .Sort (s )
130
146
return s .results
131
147
}
132
148
133
- // Sort sorts data against query. Convenience that creates and
134
- // uses a Sorter with the default settings.
135
- func Sort (data Sortable , query string ) []* Result {
136
- s := NewSorter (data , nil )
137
- return s .Sort (query )
138
- }
139
-
140
- // stringSlice implements sort.Interface for []string.
141
- // It is a helper for SortStrings.
142
- type stringSlice struct {
143
- data []string
144
- }
145
-
146
- // Len etc. implement sort.Interface.
147
- func (s stringSlice ) Len () int { return len (s .data ) }
148
- func (s stringSlice ) Less (i , j int ) bool { return s .data [i ] < s .data [j ] }
149
- func (s stringSlice ) Swap (i , j int ) { s .data [i ], s .data [j ] = s .data [j ], s .data [i ] }
150
-
151
- // SortKey implements Sortable.
152
- func (s stringSlice ) SortKey (i int ) string { return s .data [i ] }
153
-
154
- // Sort is a convenience method.
155
- func (s stringSlice ) Sort (query string ) []* Result { return Sort (s , query ) }
156
-
157
- // SortStrings is a convenience function.
158
- func SortStrings (data []string , query string ) []* Result {
159
- s := stringSlice {data }
160
- return s .Sort (query )
161
- }
162
-
163
149
// Match scores str against query using fuzzy matching and the specified sort options.
164
- func Match (str , query string , o * SortOptions ) * Result {
150
+ func (s * Sorter ) Match (str string ) * Result {
151
+ if s .stripDiacritics {
152
+ str = stripDiacritics (str )
153
+ }
154
+
165
155
var (
166
156
match = false
167
157
score = 0.0
168
158
uStr = []rune (str )
169
- uQuery = []rune (query )
159
+ uQuery = []rune (s . query )
170
160
strLen = len (uStr )
171
161
queryLen = len (uQuery )
172
162
)
@@ -232,26 +222,26 @@ func Match(str, query string, o *SortOptions) *Result {
232
222
233
223
// Apply penalty for letters before first match
234
224
if queryIdx == 0 {
235
- penalty = float64 (strIdx ) * o .LeadingLetterPenalty
236
- if penalty <= o .MaxLeadingLetterPenalty {
237
- penalty = o .MaxLeadingLetterPenalty
225
+ penalty = float64 (strIdx ) * s . Options .LeadingLetterPenalty
226
+ if penalty <= s . Options .MaxLeadingLetterPenalty {
227
+ penalty = s . Options .MaxLeadingLetterPenalty
238
228
}
239
229
score += penalty
240
230
}
241
231
242
232
// Apply bonus for consecutive matches
243
233
if prevMatched {
244
- newScore += o .AdjacencyBonus
234
+ newScore += s . Options .AdjacencyBonus
245
235
}
246
236
247
237
// Apply bonus for match after separator
248
238
if prevSeparator {
249
- newScore += o .SeparatorBonus
239
+ newScore += s . Options .SeparatorBonus
250
240
}
251
241
252
242
// Apply bonus across camel case boundaries
253
243
if prevLower && strChar == strUpper && strLower != strUpper {
254
- newScore += o .CamelBonus
244
+ newScore += s . Options .CamelBonus
255
245
}
256
246
257
247
// Update query index if next query letter was matched
@@ -264,7 +254,7 @@ func Match(str, query string, o *SortOptions) *Result {
264
254
if newScore >= bestLetterScore {
265
255
266
256
if bestLetter != "" {
267
- score += o .UnmatchedLetterPenalty
257
+ score += s . Options .UnmatchedLetterPenalty
268
258
}
269
259
270
260
bestLetter = strChar
@@ -274,7 +264,7 @@ func Match(str, query string, o *SortOptions) *Result {
274
264
275
265
prevMatched = true
276
266
} else {
277
- score += o .UnmatchedLetterPenalty
267
+ score += s . Options .UnmatchedLetterPenalty
278
268
prevMatched = false
279
269
}
280
270
@@ -303,5 +293,61 @@ func Match(str, query string, o *SortOptions) *Result {
303
293
}
304
294
305
295
// log.Printf("query=%#v, str=%#v", match=%v, score=%v, query, str, match, score)
306
- return & Result {match , query , score , str }
296
+ return & Result {match , s .query , score , str }
297
+ }
298
+
299
+ // Sort sorts data against query. Convenience that creates and
300
+ // uses a Sorter with the default settings.
301
+ func Sort (data Sortable , query string ) []* Result {
302
+ s := NewSorter (data , nil )
303
+ return s .Sort (query )
304
+ }
305
+
306
+ // stringSlice implements sort.Interface for []string.
307
+ // It is a helper for SortStrings.
308
+ type stringSlice struct {
309
+ data []string
310
+ }
311
+
312
+ // Len etc. implement sort.Interface.
313
+ func (s stringSlice ) Len () int { return len (s .data ) }
314
+ func (s stringSlice ) Less (i , j int ) bool { return s .data [i ] < s .data [j ] }
315
+ func (s stringSlice ) Swap (i , j int ) { s .data [i ], s .data [j ] = s .data [j ], s .data [i ] }
316
+
317
+ // SortKey implements Sortable.
318
+ func (s stringSlice ) SortKey (i int ) string { return s .data [i ] }
319
+
320
+ // Sort is a convenience method.
321
+ func (s stringSlice ) Sort (query string ) []* Result { return Sort (s , query ) }
322
+
323
+ // SortStrings is a convenience function.
324
+ func SortStrings (data []string , query string ) []* Result {
325
+ s := stringSlice {data }
326
+ return s .Sort (query )
327
+ }
328
+
329
+ // Match scores str against query using fuzzy matching and the specified sort options.
330
+ // WARNING: Match creates a new Sorter for every call. Don't use this on
331
+ // large datasets.
332
+ func Match (str , query string , o * SortOptions ) * Result {
333
+ data := stringSlice {[]string {str }}
334
+ s := NewSorter (data , o )
335
+ return s .Sort (query )[0 ]
336
+ }
337
+
338
+ func isMn (r rune ) bool {
339
+ return unicode .Is (unicode .Mn , r ) // Mn: non-spacing mark
340
+ }
341
+
342
+ func stripDiacritics (s string ) string {
343
+ stripped , _ , err := transform .String (stripper , s )
344
+ if err != nil {
345
+ log .Printf ("Couldn't strip diacritics from `%s`: %s" , s , err )
346
+ return s
347
+ }
348
+ return stripped
349
+ }
350
+
351
+ func isASCII (s string ) bool {
352
+ return stripDiacritics (s ) == s
307
353
}
0 commit comments