From cef4da6d1b39440f9c889d8d28fbb1448906bc29 Mon Sep 17 00:00:00 2001 From: Ivan Shalganov Date: Fri, 5 Apr 2024 17:46:55 +0200 Subject: [PATCH] Wording (#17) * issue 11: fix number parser --- parser.go | 2 +- readme.md | 12 ++++++------ stream.go | 43 ++++++++++++++++++++++--------------------- tokenizer.go | 22 +++++++++++----------- 4 files changed, 40 insertions(+), 39 deletions(-) diff --git a/parser.go b/parser.go index fe81844..3b6fef7 100644 --- a/parser.go +++ b/parser.go @@ -151,7 +151,7 @@ func (p *parsing) checkPoint() bool { // parse bytes (p.str) to tokens and append them to the end if stream of tokens. func (p *parsing) parse() { if len(p.str) == 0 { - if p.reader == nil || p.loadChunk() == 0 { // if it's not infinite stream or this is the end of stream + if p.reader == nil || p.loadChunk() == 0 { // if it's not an infinite stream, or this is the end of the stream return } } diff --git a/readme.md b/readme.md index 69f8adb..b4d9c76 100644 --- a/readme.md +++ b/readme.md @@ -20,7 +20,7 @@ Main features: * Autodetect white space symbols. * Parse any data syntax (xml, [json](https://github.com/bzick/tokenizer/blob/master/example_test.go), yaml), any programming language. * Single pass through the data. -* Parses [infinite incoming data](#parse-buffer) and don't panic. +* Parses infinite incoming data and don't panic. Use cases: - Parsing html, xml, [json](./example_test.go), yaml and other text formats. @@ -46,7 +46,7 @@ parser.DefineTokens(TDot, []string{"."}) parser.DefineTokens(TMath, []string{"+", "-", "/", "*", "%"}) parser.DefineStringToken(`"`, `"`).SetEscapeSymbol(tokenizer.BackSlash) -// create tokens stream +// create tokens' stream stream := parser.ParseString(`user_id = 119 and modified > "2020-01-01 00:00:00" or amount >= 122.34`) defer stream.Close() @@ -96,7 +96,7 @@ parser.AllowKeywordUnderscore() // ... and other configuration code ``` -There is two ways to **parse string or slice**: +There are two ways to **parse string or slice**: - `parser.ParseString(str)` - `parser.ParseBytes(slice)` @@ -192,8 +192,8 @@ stream: [ Keyword may be modified with `tokenizer.AllowKeywordSymbols(majorSymbols, minorSymbols)` -- Major symbols (any quantity in the keyword) can be at the beginning, in the middle and in the end of the keyword. -- Minor symbols (any quantity in the keyword) can be in the middle and in the end of the keyword. +- Major symbols (any quantity in the keyword) can be in the beginning, in the middle and at the end of the keyword. +- Minor symbols (any quantity in the keyword) can be in the middle and at the end of the keyword. ```go parser.AllowKeywordSymbols(tokenizer.Underscore, tokenizer.Numbers) @@ -260,7 +260,7 @@ fmt.Print("Token is %d", stream.CurrentToken().GetFloat()) // Token is 130 ### Framed string Strings that are framed with tokens are called framed strings. An obvious example is quoted a string like `"one two"`. -There quotes — edge tokens. +There are quotes — edge tokens. You can create and customize framed string through `tokenizer.DefineStringToken()`: diff --git a/stream.go b/stream.go index 2c391c4..6bc55c9 100644 --- a/stream.go +++ b/stream.go @@ -6,21 +6,21 @@ import ( ) // Stream iterator via parsed tokens. -// If data reads from an infinite buffer then the iterator will be read data from reader chunk-by-chunk. +// If data reads from an infinite buffer, then the iterator will be read data from the reader chunk-by-chunk. type Stream struct { t *Tokenizer // count of tokens in the stream len int - // pointer to the node of double-linked list of tokens + // pointer to the node of a token double-linked list current *Token - // pointer of valid token if current moved to out of bounds (out of end list) + // pointer of valid token if current moved to out of bounds (out of end the list) prev *Token - // pointer of valid token if current moved to out of bounds (out of begin list) + // pointer of valid token if current moved to out of bounds (out of begin the list) next *Token - // pointer to head of list + // pointer to head of the list head *Token - // last whitespaces before end of source + // last whitespaces before the end of a source wsTail []byte // count of parsed bytes parsed int @@ -92,7 +92,7 @@ func (s *Stream) GetParsedLength() int { } } -// GoNext moves stream pointer to the next token. +// GoNext moves the stream pointer to the next token. // If there is no token, it initiates the parsing of the next chunk of data. // If there is no data, the pointer will point to the TokenUndef token. func (s *Stream) GoNext() *Stream { @@ -119,7 +119,7 @@ func (s *Stream) GoNext() *Stream { return s } -// GoPrev moves pointer of stream to the next token. +// GoPrev moves the pointer of stream to the next token. // The number of possible calls is limited if you specified SetHistorySize. // If the beginning of the stream or the end of the history is reached, the pointer will point to the TokenUndef token. func (s *Stream) GoPrev() *Stream { @@ -135,8 +135,7 @@ func (s *Stream) GoPrev() *Stream { return s } -// GoTo moves pointer of stream to specific token. -// The search is done by token ID. +// GoTo moves the pointer of stream to specific token. func (s *Stream) GoTo(id int) *Stream { if s.current == undefToken { if s.prev != nil && id <= s.prev.id { // we at the end of the stream @@ -229,22 +228,22 @@ func (s *Stream) IsAnyNextSequence(keys ...[]TokenKey) bool { return result } -// HeadToken returns pointer to head-token. -// Head-token may be changed by parser if history size is enabled. +// HeadToken returns the pointer to head-token. +// Parser may change Head token if history size is enabled. func (s *Stream) HeadToken() *Token { return s.head } // CurrentToken always returns the token. -// If the pointer is not valid (see IsValid) CurrentToken will be returns TokenUndef token. +// If the pointer is not valid (see IsValid), CurrentToken will be return TokenUndef token. // Do not save result (Token) into variables — current token may be changed at any time. func (s *Stream) CurrentToken() *Token { return s.current } // PrevToken returns previous token from the stream. -// If previous token doesn't exist method return TypeUndef token. -// Do not save result (Token) into variables — previous token may be changed at any time. +// If the previous token doesn't exist, the method returns TypeUndef token. +// Do not save a result (Token) into variables — the previous token may be changed at any time. func (s *Stream) PrevToken() *Token { if s.current.prev != nil { return s.current.prev @@ -253,8 +252,8 @@ func (s *Stream) PrevToken() *Token { } // NextToken returns next token from the stream. -// If next token doesn't exist method return TypeUndef token. -// Do not save result (Token) into variables — next token may be changed at any time. +// If next token doesn't exist, the method returns TypeUndef token. +// Do not save a result (Token) into variables — the next token may be changed at any time. func (s *Stream) NextToken() *Token { if s.current.next != nil { return s.current.next @@ -262,8 +261,9 @@ func (s *Stream) NextToken() *Token { return undefToken } -// GoNextIfNextIs moves stream pointer to the next token if the next token has specific token keys. -// If keys matched pointer will be updated and method returned true. Otherwise, returned false. +// GoNextIfNextIs moves the stream pointer to the next token if the next token has specific token keys. +// If keys matched pointer will be updated and the method returned true. +// Otherwise, returned false. func (s *Stream) GoNextIfNextIs(key TokenKey, otherKeys ...TokenKey) bool { if s.NextToken().Is(key, otherKeys...) { s.GoNext() @@ -329,8 +329,9 @@ func (s *Stream) GetSnippet(before, after int) []Token { } // GetSnippetAsString returns tokens before and after current token as string. -// `maxStringLength` specify max length of each token string. Zero — unlimited token string length. -// If string greater than maxLength method removes some runes in the middle of the string. +// `maxStringLength` specifies max length of each token string. +// Zero — unlimited token string length. +// If string is greater than maxLength method removes some runes in the middle of the string. func (s *Stream) GetSnippetAsString(before, after, maxStringLength int) string { segments := s.GetSnippet(before, after) str := make([]string, len(segments)) diff --git a/tokenizer.go b/tokenizer.go index 9e1be74..2ed7a91 100644 --- a/tokenizer.go +++ b/tokenizer.go @@ -14,23 +14,23 @@ type TokenKey int const ( // TokenUnknown means that this token not embedded token and not user defined. TokenUnknown TokenKey = -6 - // TokenStringFragment means that this is only fragment of quoted string with injections + // TokenStringFragment means that this is only fragment of the quoted string with injections. // For example, "one {{ two }} three", where "one " and " three" — TokenStringFragment TokenStringFragment TokenKey = -5 - // TokenString means than this token is quoted string. + // TokenString means that this token is quoted string. // For example, "one two" TokenString TokenKey = -4 - // TokenFloat means that this token is float number with point and/or exponent. + // TokenFloat means that this token is a float number with point and/or exponent. // For example, 1.2, 1e6, 1E-6 TokenFloat TokenKey = -3 - // TokenInteger means that this token is integer number. + // TokenInteger means that this token is an integer number. // For example, 3, 49983 TokenInteger TokenKey = -2 // TokenKeyword means that this token is word. // For example, one, two, три TokenKeyword TokenKey = -1 // TokenUndef means that token doesn't exist. - // Then stream out of range of token list any getter or checker will return TokenUndef token. + // Then stream out of range of a token list any getter or checker will return TokenUndef token. TokenUndef TokenKey = 0 ) @@ -87,7 +87,7 @@ type StringSettings struct { // AddInjection configure injection in to string. // Injection - parsable fragment of framed(quoted) string. -// Often used for parsing of placeholders or template's expressions in the framed string. +// Often used for parsing of placeholders or template expressions in the framed string. func (q *StringSettings) AddInjection(startTokenKey, endTokenKey TokenKey) *StringSettings { q.Injects = append(q.Injects, QuoteInjectSettings{StartKey: startTokenKey, EndKey: endTokenKey}) return q @@ -95,7 +95,7 @@ func (q *StringSettings) AddInjection(startTokenKey, endTokenKey TokenKey) *Stri // SetEscapeSymbol set escape symbol for framed(quoted) string. // Escape symbol allows ignoring close token of framed string. -// Also escape symbol allows using special symbols in the frame strings, like \n, \t. +// Also, escape symbol allows using special symbols in the frame strings, like \n, \t. func (q *StringSettings) SetEscapeSymbol(symbol byte) *StringSettings { q.EscapeSymbol = symbol return q @@ -110,7 +110,7 @@ func (q *StringSettings) SetSpecialSymbols(special map[byte]byte) *StringSetting return q } -// AddSpecialStrings set mapping of all escapable string for escape symbol, like \n, \t, \r. +// AddSpecialStrings set mapping of all escapable strings for escape symbol, like \n, \t, \r. func (q *StringSettings) AddSpecialStrings(special []string) *StringSettings { for _, s := range special { q.SpecSymbols = append(q.SpecSymbols, []byte(s)) @@ -118,7 +118,7 @@ func (q *StringSettings) AddSpecialStrings(special []string) *StringSettings { return q } -// Tokenizer stores all tokens configuration and behaviors. +// Tokenizer stores all token configuration and behaviors. type Tokenizer struct { stopOnUnknown bool allowNumberUnderscore bool @@ -199,7 +199,7 @@ func (t *Tokenizer) AllowNumberUnderscore() *Tokenizer { } // DefineTokens add custom token. -// There `key` unique is identifier of `tokens`, `tokens` — slice of string of tokens. +// The `key` is the identifier of `tokens`, `tokens` — slice of tokens as string. // If a key already exists, tokens will be rewritten. func (t *Tokenizer) DefineTokens(key TokenKey, tokens []string) *Tokenizer { var tks []*tokenRef @@ -227,7 +227,7 @@ func (t *Tokenizer) DefineTokens(key TokenKey, tokens []string) *Tokenizer { } // DefineStringToken defines a token string. -// For example, a piece of data surrounded by quotes: "string in quotes" or 'string on sigle quotes'. +// For example, a piece of data surrounded by quotes: "string in quotes" or 'string on single quotes'. // Arguments startToken and endToken defines open and close "quotes". // // - `t.DefineStringToken("`", "`")` - parse string "one `two three`" will be parsed as