forked from ccgus/fmdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFMDatabaseFTS3Tests.m
157 lines (118 loc) · 5.3 KB
/
FMDatabaseFTS3Tests.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
//
// FMDatabaseFTS3Tests.m
// fmdb
//
// Created by Seaview Software on 8/26/14.
//
//
#import "FMDBTempDBTests.h"
#import "FMDatabase+FTS3.h"
#import "FMTokenizers.h"
@interface FMDatabaseFTS3Tests : FMDBTempDBTests
@end
/**
This tokenizer extends the simple tokenizer to remove 's' from the end of each token if present.
Used for testing of the tokenization system.
*/
@interface FMDepluralizerTokenizer : NSObject <FMTokenizerDelegate>
+ (instancetype)tokenizerWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer;
@property (nonatomic, strong) id<FMTokenizerDelegate> m_baseTokenizer;
@end
static id<FMTokenizerDelegate> g_simpleTok = nil;
static id<FMTokenizerDelegate> g_depluralizeTok = nil;
@implementation FMDatabaseFTS3Tests
+ (void)populateDatabase:(FMDatabase *)db
{
[db executeUpdate:@"CREATE VIRTUAL TABLE mail USING fts3(subject, body)"];
[db executeUpdate:@"INSERT INTO mail VALUES('hello world', 'This message is a hello world message.')"];
[db executeUpdate:@"INSERT INTO mail VALUES('urgent: serious', 'This mail is seen as a more serious mail')"];
// Create a tokenizer instance that will not be de-allocated when the method finishes.
g_simpleTok = [[FMSimpleTokenizer alloc] initWithLocale:NULL];
[FMDatabase registerTokenizer:g_simpleTok withKey:@"testTok"];
g_depluralizeTok = [FMDepluralizerTokenizer tokenizerWithBaseTokenizer:g_simpleTok];
[FMDatabase registerTokenizer:g_depluralizeTok withKey:@"depluralize"];
}
- (void)setUp
{
[super setUp];
// Put setup code here. This method is called before the invocation of each test method in the class.
}
- (void)tearDown
{
// Put teardown code here. This method is called after the invocation of each test method in the class.
[super tearDown];
}
- (void)testOffsets
{
FMResultSet *results = [self.db executeQuery:@"SELECT offsets(mail) FROM mail WHERE mail MATCH 'world'"];
if ([results next]) {
FMTextOffsets *offsets = [results offsetsForColumnIndex:0];
[offsets enumerateWithBlock:^(NSInteger columnNumber, NSInteger termNumber, NSRange matchRange) {
if (columnNumber == 0) {
XCTAssertEqual(termNumber, 0L);
XCTAssertEqual(matchRange.location, 6UL);
XCTAssertEqual(matchRange.length, 5UL);
} else if (columnNumber == 1) {
XCTAssertEqual(termNumber, 0L);
XCTAssertEqual(matchRange.location, 24UL);
XCTAssertEqual(matchRange.length, 5UL);
}
}];
}
}
- (void)testTokenizer
{
[self.db installTokenizerModule];
BOOL ok = [self.db executeUpdate:@"CREATE VIRTUAL TABLE simple USING fts3(tokenize=fmdb testTok)"];
XCTAssertTrue(ok, @"Failed to create virtual table: %@", [self.db lastErrorMessage]);
// The FMSimpleTokenizer handles non-ASCII characters well, since it's based on CFStringTokenizer.
NSString *text = @"I like the band Queensrÿche. They are really great musicians.";
ok = [self.db executeUpdate:@"INSERT INTO simple VALUES(?)", text];
XCTAssertTrue(ok, @"Failed to insert data: %@", [self.db lastErrorMessage]);
FMResultSet *results = [self.db executeQuery:@"SELECT * FROM simple WHERE simple MATCH ?", @"Queensrÿche"];
XCTAssertTrue([results next], @"Failed to find result");
ok = [self.db executeUpdate:@"CREATE VIRTUAL TABLE depluralize_t USING fts3(tokenize=fmdb depluralize)"];
XCTAssertTrue(ok, @"Failed to create virtual table with depluralize tokenizer: %@", [self.db lastErrorMessage]);
ok = [self.db executeUpdate:@"INSERT INTO depluralize_t VALUES(?)", text];
XCTAssertTrue(ok, @"Failed to insert data: %@", [self.db lastErrorMessage]);
//If depluralization is working, searching for 'bands' should still provide a match as 'band' is in the text
results = [self.db executeQuery:@"SELECT * FROM depluralize_t WHERE depluralize_t MATCH ?", @"bands"];
XCTAssertTrue([results next], @"Failed to find result");
//Demonstrate that depluralization mattered; we should NOT find any results when searching the simple table as it does not use that tokenizer
results = [self.db executeQuery:@"SELECT * FROM simple WHERE simple MATCH ?", @"bands"];
XCTAssertFalse([results next], @"Found a result where none should be found");
}
@end
#pragma mark -
@implementation FMDepluralizerTokenizer
+ (instancetype)tokenizerWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer
{
return [[self alloc] initWithBaseTokenizer:tokenizer];
}
- (instancetype)initWithBaseTokenizer:(id<FMTokenizerDelegate>)tokenizer
{
NSParameterAssert(tokenizer);
if ((self = [super init])) {
self.m_baseTokenizer = tokenizer;
}
return self;
}
- (void)openTokenizerCursor:(FMTokenizerCursor *)cursor
{
[self.m_baseTokenizer openTokenizerCursor:cursor];
}
- (BOOL)nextTokenForCursor:(FMTokenizerCursor *)cursor
{
BOOL done = [self.m_baseTokenizer nextTokenForCursor:cursor];
if (!done) {
NSMutableString *tokenString = (__bridge NSMutableString *)(cursor->tokenString);
if ([tokenString hasSuffix:@"s"])
[tokenString deleteCharactersInRange:NSMakeRange(tokenString.length-1, 1)];
}
return done;
}
- (void)closeTokenizerCursor:(FMTokenizerCursor *)cursor
{
[self.m_baseTokenizer closeTokenizerCursor:cursor];
}
@end