forked from swiftlang/swift
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWordBreaking.swift
66 lines (53 loc) · 1.82 KB
/
WordBreaking.swift
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
//===----------------------------------------------------------------------===//
//
// This source file is part of the Swift.org open source project
//
// Copyright (c) 2022 Apple Inc. and the Swift project authors
// Licensed under Apache License v2.0 with Runtime Library Exception
//
// See https://swift.org/LICENSE.txt for license information
// See https://swift.org/CONTRIBUTORS.txt for the list of Swift project authors
//
//===----------------------------------------------------------------------===//
// Normalization tests are currently only avaible on Darwin, awaiting a sensible
// file API...
#if _runtime(_ObjC)
import Foundation
func parseWordBreakTests(
_ data: String,
into result: inout [(String, [String])]
) {
for line in data.split(separator: "\n") {
// Only look at actual tests
guard line.hasPrefix("÷") else {
continue
}
let components = line.split(separator: "#").first!.split(separator: " ")
var string = ""
var words: [String] = [""]
for i in components.indices.dropFirst() {
// If we're an odd index, this is a scalar.
if !i.isMultiple(of: 2) {
let scalar = Unicode.Scalar(UInt32(components[i], radix: 16)!)!
string.unicodeScalars.append(scalar)
words[words.count - 1].unicodeScalars.append(scalar)
} else {
// Otherwise, it is a word breaking operator.
// If this is a break, record the +1 count. Otherwise it is × which is
// not a break.
if components[i] == "÷" {
words.append("")
}
}
}
words.removeLast()
result.append((string, words))
}
}
public let wordBreakTests: [(String, [String])] = {
var result: [(String, [String])] = []
let testFile = readInputFile("WordBreakTest.txt")
parseWordBreakTests(testFile, into: &result)
return result
}()
#endif