forked from torvalds/linux
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[LIB]: Naive finite state machine based textsearch
A finite state machine consists of n states (struct ts_fsm_token) representing the pattern as a finite automation. The data is read sequentially on a octet basis. Every state token specifies the number of recurrences and the type of value accepted which can be either a specific character or ctype based set of characters. The available type of recurrences include 1, (0|1), [0 n], and [1 n]. The algorithm differs between strict/non-strict mode specyfing whether the pattern has to start at the first octect. Strict mode is enabled by default and can be disabled by inserting TS_FSM_HEAD_IGNORE as the first token in the chain. The runtime performance of the algorithm should be around O(n), however while in strict mode the average runtime can be better. Signed-off-by: Thomas Graf <[email protected]> Signed-off-by: David S. Miller <[email protected]>
- Loading branch information
Showing
4 changed files
with
398 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
#ifndef __LINUX_TEXTSEARCH_FSM_H | ||
#define __LINUX_TEXTSEARCH_FSM_H | ||
|
||
#include <linux/types.h> | ||
|
||
enum { | ||
TS_FSM_SPECIFIC, /* specific character */ | ||
TS_FSM_WILDCARD, /* any character */ | ||
TS_FSM_DIGIT, /* isdigit() */ | ||
TS_FSM_XDIGIT, /* isxdigit() */ | ||
TS_FSM_PRINT, /* isprint() */ | ||
TS_FSM_ALPHA, /* isalpha() */ | ||
TS_FSM_ALNUM, /* isalnum() */ | ||
TS_FSM_ASCII, /* isascii() */ | ||
TS_FSM_CNTRL, /* iscntrl() */ | ||
TS_FSM_GRAPH, /* isgraph() */ | ||
TS_FSM_LOWER, /* islower() */ | ||
TS_FSM_UPPER, /* isupper() */ | ||
TS_FSM_PUNCT, /* ispunct() */ | ||
TS_FSM_SPACE, /* isspace() */ | ||
__TS_FSM_TYPE_MAX, | ||
}; | ||
#define TS_FSM_TYPE_MAX (__TS_FSM_TYPE_MAX - 1) | ||
|
||
enum { | ||
TS_FSM_SINGLE, /* 1 occurrence */ | ||
TS_FSM_PERHAPS, /* 1 or 0 occurrence */ | ||
TS_FSM_ANY, /* 0..n occurrences */ | ||
TS_FSM_MULTI, /* 1..n occurrences */ | ||
TS_FSM_HEAD_IGNORE, /* 0..n ignored occurrences at head */ | ||
__TS_FSM_RECUR_MAX, | ||
}; | ||
#define TS_FSM_RECUR_MAX (__TS_FSM_RECUR_MAX - 1) | ||
|
||
/** | ||
* struct ts_fsm_token - state machine token (state) | ||
* @type: type of token | ||
* @recur: number of recurrences | ||
* @value: character value for TS_FSM_SPECIFIC | ||
*/ | ||
struct ts_fsm_token | ||
{ | ||
__u16 type; | ||
__u8 recur; | ||
__u8 value; | ||
}; | ||
|
||
#endif |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.