-
Notifications
You must be signed in to change notification settings - Fork 95
/
Copy pathstring.go
769 lines (672 loc) · 20.9 KB
/
string.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
// Copyright 2018 The go-python Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// String objects
//
// Note that string objects in Python are arrays of unicode
// characters. However we are using the native Go string which is
// UTF-8 encoded. This makes very little difference most of the time,
// but care is needed when indexing, slicing or iterating through
// strings.
package py
import (
"bytes"
"fmt"
"strconv"
"strings"
"unicode"
"unicode/utf8"
)
type String string
var StringType = ObjectType.NewType("str",
`str(object='') -> str
str(bytes_or_buffer[, encoding[, errors]]) -> str
Create a new string object from the given object. If encoding or
errors is specified, then the object must expose a data buffer
that will be decoded using the given encoding and error handler.
Otherwise, returns the result of object.__str__() (if defined)
or repr(object).
encoding defaults to sys.getdefaultencoding().
errors defaults to 'strict'.`, StrNew, nil)
// Escape the py.String
func StringEscape(a String, ascii bool) string {
s := string(a)
var out bytes.Buffer
quote := '\''
if strings.ContainsRune(s, '\'') && !strings.ContainsRune(s, '"') {
quote = '"'
}
if !ascii {
out.WriteRune(quote)
}
for _, c := range s {
switch {
case c < 0x20:
switch c {
case '\t':
out.WriteString(`\t`)
case '\n':
out.WriteString(`\n`)
case '\r':
out.WriteString(`\r`)
default:
fmt.Fprintf(&out, `\x%02x`, c)
}
case !ascii && c < 0x7F:
if c == '\\' || (quote == '\'' && c == '\'') || (quote == '"' && c == '"') {
out.WriteRune('\\')
}
out.WriteRune(c)
case c < 0x100:
if ascii || strconv.IsPrint(c) {
out.WriteRune(c)
} else {
fmt.Fprintf(&out, "\\x%02x", c)
}
case c < 0x10000:
if !ascii && strconv.IsPrint(c) {
out.WriteRune(c)
} else {
fmt.Fprintf(&out, "\\u%04x", c)
}
default:
if !ascii && strconv.IsPrint(c) {
out.WriteRune(c)
} else {
fmt.Fprintf(&out, "\\U%08x", c)
}
}
}
if !ascii {
out.WriteRune(quote)
}
return out.String()
}
// standard golang strings.Fields doesn't have a 'first N' argument
func fieldsN(s string, n int) []string {
out := []string{}
cur := []rune{}
for _, c := range s {
//until we have covered the first N elements, multiple white-spaces are 'merged'
if n < 0 || len(out) < n {
if unicode.IsSpace(c) {
if len(cur) > 0 {
out = append(out, string(cur))
cur = []rune{}
}
} else {
cur = append(cur, c)
}
//until we see the next letter, after collecting the first N fields, continue to merge whitespaces
} else if len(out) == n && len(cur) == 0 {
if !unicode.IsSpace(c) {
cur = append(cur, c)
}
//now that enough words have been collected, just copy into the last element
} else {
cur = append(cur, c)
}
}
if len(cur) > 0 {
out = append(out, string(cur))
}
return out
}
func init() {
StringType.Dict["endswith"] = MustNewMethod("endswith", func(self Object, args Tuple) (Object, error) {
selfStr := string(self.(String))
suffix := []string{}
if len(args) > 0 {
if s, ok := args[0].(String); ok {
suffix = append(suffix, string(s))
} else if s, ok := args[0].(Tuple); ok {
for _, t := range s {
if v, ok := t.(String); ok {
suffix = append(suffix, string(v))
}
}
} else {
return nil, ExceptionNewf(TypeError, "endswith first arg must be str, unicode, or tuple, not %s", args[0].Type())
}
} else {
return nil, ExceptionNewf(TypeError, "endswith() takes at least 1 argument (0 given)")
}
for _, s := range suffix {
if strings.HasSuffix(selfStr, s) {
return Bool(true), nil
}
}
return Bool(false), nil
}, 0, "endswith(suffix[, start[, end]]) -> bool")
StringType.Dict["find"] = MustNewMethod("find", func(self Object, args Tuple) (Object, error) {
return self.(String).find(args)
}, 0, `find(...)
S.find(sub[, start[, end]]) -> int
Return the lowest index in S where substring sub is found,
such that sub is contained within S[start:end]. Optional
arguments start and end are interpreted as in slice notation.
Return -1 on failure.`)
StringType.Dict["replace"] = MustNewMethod("replace", func(self Object, args Tuple) (Object, error) {
return self.(String).Replace(args)
}, 0, `replace(self, old, new, count=-1) -> return a copy with all occurrences of substring old replaced by new.
count
Maximum number of occurrences to replace.
-1 (the default value) means replace all occurrences.
If the optional argument count is given, only the first count occurrences are
replaced.`)
StringType.Dict["split"] = MustNewMethod("split", func(self Object, args Tuple, kwargs StringDict) (Object, error) {
return self.(String).Split(args, kwargs)
}, 0, "split(sub) -> split string with sub.")
StringType.Dict["startswith"] = MustNewMethod("startswith", func(self Object, args Tuple) (Object, error) {
selfStr := string(self.(String))
prefix := []string{}
if len(args) > 0 {
if s, ok := args[0].(String); ok {
prefix = append(prefix, string(s))
} else if s, ok := args[0].(Tuple); ok {
for _, t := range s {
if v, ok := t.(String); ok {
prefix = append(prefix, string(v))
}
}
} else {
return nil, ExceptionNewf(TypeError, "startswith first arg must be str, unicode, or tuple, not %s", args[0].Type())
}
} else {
return nil, ExceptionNewf(TypeError, "startswith() takes at least 1 argument (0 given)")
}
if len(args) > 1 {
if s, ok := args[1].(Int); ok {
selfStr = selfStr[s:]
}
}
for _, s := range prefix {
if strings.HasPrefix(selfStr, s) {
return Bool(true), nil
}
}
return Bool(false), nil
}, 0, "startswith(prefix[, start[, end]]) -> bool")
StringType.Dict["strip"] = MustNewMethod("strip", func(self Object, args Tuple, kwargs StringDict) (Object, error) {
return self.(String).Strip(args)
}, 0, "strip(chars) -> replace chars from begining and end of string")
StringType.Dict["rstrip"] = MustNewMethod("rstrip", func(self Object, args Tuple, kwargs StringDict) (Object, error) {
return self.(String).RStrip(args)
}, 0, "rstrip(chars) -> replace chars from end of string")
StringType.Dict["lstrip"] = MustNewMethod("lstrip", func(self Object, args Tuple, kwargs StringDict) (Object, error) {
return self.(String).LStrip(args)
}, 0, "lstrip(chars) -> replace chars from begining of string")
StringType.Dict["upper"] = MustNewMethod("upper", func(self Object, args Tuple, kwargs StringDict) (Object, error) {
return self.(String).Upper()
}, 0, "upper() -> a copy of the string converted to uppercase")
StringType.Dict["lower"] = MustNewMethod("lower", func(self Object, args Tuple, kwargs StringDict) (Object, error) {
return self.(String).Lower()
}, 0, "lower() -> a copy of the string converted to lowercase")
}
// Type of this object
func (s String) Type() *Type {
return StringType
}
// StrNew
func StrNew(metatype *Type, args Tuple, kwargs StringDict) (Object, error) {
var (
sObj Object = String("")
encoding Object
errors Object
)
// FIXME ignoring encoding and errors
err := ParseTupleAndKeywords(args, kwargs, "|OOO:str", []string{"bytes_or_buffer", "encoding", "errors"}, &sObj, &encoding, &errors)
if err != nil {
return nil, err
}
// FIXME ignoring encoding
// FIXME ignoring buffer protocol
return Str(sObj)
}
// Intern s possibly returning a reference to an already interned string
func (s String) Intern() String {
// fmt.Printf("FIXME interning %q\n", s)
return s
}
func (a String) M__str__() (Object, error) {
return a, nil
}
func (a String) M__repr__() (Object, error) {
out := StringEscape(a, false)
return String(out), nil
}
func (s String) M__bool__() (Object, error) {
return NewBool(len(s) > 0), nil
}
// len returns length of the string in unicode characters
func (s String) len() int {
return utf8.RuneCountInString(string(s))
}
func (s String) M__len__() (Object, error) {
return Int(s.len()), nil
}
func (a String) M__add__(other Object) (Object, error) {
if b, ok := other.(String); ok {
return a + b, nil
}
return NotImplemented, nil
}
func (a String) M__radd__(other Object) (Object, error) {
if b, ok := other.(String); ok {
return b + a, nil
}
return NotImplemented, nil
}
func (a String) M__iadd__(other Object) (Object, error) {
return a.M__add__(other)
}
func (a String) M__mul__(other Object) (Object, error) {
if b, ok := convertToInt(other); ok {
if b < 0 {
b = 0
}
var out bytes.Buffer
for i := 0; i < int(b); i++ {
out.WriteString(string(a))
}
return String(out.String()), nil
}
return NotImplemented, nil
}
func (a String) M__rmul__(other Object) (Object, error) {
return a.M__mul__(other)
}
func (a String) M__imul__(other Object) (Object, error) {
return a.M__mul__(other)
}
// Convert an Object to an String
//
// Returns ok as to whether the conversion worked or not
func convertToString(other Object) (String, bool) {
switch b := other.(type) {
case String:
return b, true
}
return "", false
}
// Rich comparison
func (a String) M__lt__(other Object) (Object, error) {
if b, ok := convertToString(other); ok {
return NewBool(a < b), nil
}
return NotImplemented, nil
}
func (a String) M__le__(other Object) (Object, error) {
if b, ok := convertToString(other); ok {
return NewBool(a <= b), nil
}
return NotImplemented, nil
}
func (a String) M__eq__(other Object) (Object, error) {
if b, ok := convertToString(other); ok {
return NewBool(a == b), nil
}
return NotImplemented, nil
}
func (a String) M__ne__(other Object) (Object, error) {
if b, ok := convertToString(other); ok {
return NewBool(a != b), nil
}
return NotImplemented, nil
}
func (a String) M__gt__(other Object) (Object, error) {
if b, ok := convertToString(other); ok {
return NewBool(a > b), nil
}
return NotImplemented, nil
}
func (a String) M__ge__(other Object) (Object, error) {
if b, ok := convertToString(other); ok {
return NewBool(a >= b), nil
}
return NotImplemented, nil
}
// % operator
/*
4.7.2. printf-style String Formatting
Note The formatting operations described here exhibit a variety of
quirks that lead to a number of common errors (such as failing to
display tuples and dictionaries correctly). Using the newer
str.format() interface helps avoid these errors, and also provides a
generally more powerful, flexible and extensible approach to
formatting text.
String objects have one unique built-in operation: the % operator
(modulo). This is also known as the string formatting or interpolation
operator. Given format % values (where format is a string), %
conversion specifications in format are replaced with zero or more
elements of values. The effect is similar to using the sprintf() in
the C language.
If format requires a single argument, values may be a single non-tuple
object. [5] Otherwise, values must be a tuple with exactly the number
of items specified by the format string, or a single mapping object
(for example, a dictionary).
A conversion specifier contains two or more characters and has the
following components, which must occur in this order:
The '%' character, which marks the start of the specifier.
Mapping key (optional), consisting of a parenthesised sequence of
characters (for example, (somename)).
Conversion flags (optional), which affect the result of some
conversion types.
Minimum field width (optional). If specified as an '*' (asterisk), the
actual width is read from the next element of the tuple in values, and
the object to convert comes after the minimum field width and optional
precision.
Precision (optional), given as a '.' (dot) followed by the
precision. If specified as '*' (an asterisk), the actual precision is
read from the next element of the tuple in values, and the value to
convert comes after the precision.
Length modifier (optional).
Conversion type.
When the right argument is a dictionary (or other mapping type), then
the formats in the string must include a parenthesised mapping key
into that dictionary inserted immediately after the '%' character. The
mapping key selects the value to be formatted from the mapping. For
example:
>>>
>>> print('%(language)s has %(number)03d quote types.' %
... {'language': "Python", "number": 2})
Python has 002 quote types.
In this case no * specifiers may occur in a format (since they require
a sequential parameter list).
The conversion flag characters are:
Flag Meaning
'#' The value conversion will use the “alternate form” (where defined below).
'0' The conversion will be zero padded for numeric values.
'-' The converted value is left adjusted (overrides the '0' conversion if both are given).
' ' (a space) A blank should be left before a positive number (or empty string) produced by a signed conversion.
'+' A sign character ('+' or '-') will precede the conversion (overrides a “space” flag).
A length modifier (h, l, or L) may be present, but is ignored as it is
not necessary for Python – so e.g. %ld is identical to %d.
The conversion types are:
Conversion Meaning Notes
'd' Signed integer decimal.
'i' Signed integer decimal.
'o' Signed octal value. (1)
'u' Obsolete type – it is identical to 'd'. (7)
'x' Signed hexadecimal (lowercase). (2)
'X' Signed hexadecimal (uppercase). (2)
'e' Floating point exponential format (lowercase). (3)
'E' Floating point exponential format (uppercase). (3)
'f' Floating point decimal format. (3)
'F' Floating point decimal format. (3)
'g' Floating point format. Uses lowercase exponential format if exponent is less than -4 or not less than precision, decimal format otherwise. (4)
'G' Floating point format. Uses uppercase exponential format if exponent is less than -4 or not less than precision, decimal format otherwise. (4)
'c' Single character (accepts integer or single character string).
'r' String (converts any Python object using repr()). (5)
's' String (converts any Python object using str()). (5)
'a' String (converts any Python object using ascii()). (5)
'%' No argument is converted, results in a '%' character in the result.
Notes:
The alternate form causes a leading zero ('0') to be inserted between
left-hand padding and the formatting of the number if the leading
character of the result is not already a zero.
The alternate form causes a leading '0x' or '0X' (depending on whether
the 'x' or 'X' format was used) to be inserted between left-hand
padding and the formatting of the number if the leading character of
the result is not already a zero.
The alternate form causes the result to always contain a decimal
point, even if no digits follow it.
The precision determines the number of digits after the decimal point
and defaults to 6.
The alternate form causes the result to always contain a decimal
point, and trailing zeroes are not removed as they would otherwise be.
The precision determines the number of significant digits before and
after the decimal point and defaults to 6.
If precision is N, the output is truncated to N characters.
See PEP 237. Since Python strings have an explicit length, %s
conversions do not assume that '\0' is the end of the string.
Changed in version 3.1: %f conversions for numbers whose absolute
value is over 1e50 are no longer replaced by %g conversions.
*/
func (a String) M__mod__(other Object) (Object, error) {
var values Tuple
switch b := other.(type) {
case Tuple:
values = b
default:
values = Tuple{other}
}
// FIXME not a full implementation ;-)
params := make([]interface{}, len(values))
for i := range values {
params[i] = values[i]
}
s := string(a)
s = strings.Replace(s, "%s", "%v", -1)
s = strings.Replace(s, "%r", "%#v", -1)
return String(fmt.Sprintf(s, params...)), nil
}
func (a String) M__rmod__(other Object) (Object, error) {
switch b := other.(type) {
case String:
return b.M__mod__(a)
}
return NotImplemented, nil
}
func (a String) M__imod__(other Object) (Object, error) {
return a.M__mod__(other)
}
// Returns position in string of n-th character
//
// returns end of string if not found
func (s String) pos(n int) int {
characterNumber := 0
for i := range s {
if characterNumber == n {
return i
}
characterNumber++
}
return len(s)
}
// slice returns the slice of this string using character positions
//
// length should be the length of the string in unicode characters
func (s String) slice(start, stop, length int) String {
if start >= stop {
return String("")
}
if length == len(s) {
return s[start:stop] // ascii only
}
if start <= 0 && stop >= length {
return s
}
startI := s.pos(start)
stopI := s[startI:].pos(stop-start) + startI
return s[startI:stopI]
}
func (s String) M__getitem__(key Object) (Object, error) {
length := s.len()
asciiOnly := length == len(s)
if slice, ok := key.(*Slice); ok {
start, stop, step, slicelength, err := slice.GetIndices(length)
if err != nil {
return nil, err
}
if step == 1 {
// Return a subslice since strings are immutable
return s.slice(start, stop, length), nil
}
if asciiOnly {
newString := make([]byte, slicelength)
for i, j := start, 0; j < slicelength; i, j = i+step, j+1 {
newString[j] = s[i]
}
return String(newString), nil
}
// Unpack the string into a []rune to do this for speed
runeString := []rune(string(s))
newString := make([]rune, slicelength)
for i, j := start, 0; j < slicelength; i, j = i+step, j+1 {
newString[j] = runeString[i]
}
return String(newString), nil
}
i, err := IndexIntCheck(key, length)
if err != nil {
return nil, err
}
if asciiOnly {
return s[i : i+1], nil
}
s = s[s.pos(i):]
_, runeSize := utf8.DecodeRuneInString(string(s))
return s[:runeSize], nil
}
func (s String) M__contains__(item Object) (Object, error) {
needle, ok := item.(String)
if !ok {
return nil, ExceptionNewf(TypeError, "'in <string>' requires string as left operand, not %s", item.Type().Name)
}
return NewBool(strings.Contains(string(s), string(needle))), nil
}
func (s String) find(args Tuple) (Object, error) {
var (
pysub Object
pybeg Object = Int(0)
pyend Object = Int(s.len())
pyfmt = "s|ii:find"
)
err := ParseTuple(args, pyfmt, &pysub, &pybeg, &pyend)
if err != nil {
return nil, err
}
var (
beg = int(pybeg.(Int))
end = int(pyend.(Int))
size = s.len()
)
if beg > size {
beg = size
}
if end < 0 {
end = size
}
if end > size {
end = size
}
var (
off = s.slice(0, beg, s.len()).len()
str = string(s.slice(beg, end, s.len()))
sub = string(pysub.(String))
idx = strings.Index(str, sub)
)
if idx < 0 {
return Int(idx), nil
}
return Int(off + String(str[:idx]).len()), nil
}
func (s String) Split(args Tuple, kwargs StringDict) (Object, error) {
var (
pyval Object = None
pymax Object = Int(-2)
pyfmt = "|Oi:split"
kwlst = []string{"sep", "maxsplit"}
)
err := ParseTupleAndKeywords(args, kwargs, pyfmt, kwlst, &pyval, &pymax)
if err != nil {
return nil, err
}
var (
max = pymax.(Int)
vs []string
)
switch v := pyval.(type) {
case String:
vs = strings.SplitN(string(s), string(v), int(max)+1)
case NoneType:
vs = fieldsN(string(s), int(max))
default:
return nil, ExceptionNewf(TypeError, "Can't convert '%s' object to str implicitly", pyval.Type())
}
o := List{}
for _, j := range vs {
o.Items = append(o.Items, String(j))
}
return &o, nil
}
func (s String) Replace(args Tuple) (Object, error) {
var (
pyold Object = None
pynew Object = None
pycnt Object = Int(-1)
)
err := ParseTuple(args, "ss|i:replace", &pyold, &pynew, &pycnt)
if err != nil {
return nil, err
}
var (
old = string(pyold.(String))
new = string(pynew.(String))
cnt = int(pycnt.(Int))
)
return String(strings.Replace(string(s), old, new, cnt)), nil
}
func stripFunc(args Tuple) (func(rune) bool, error) {
var (
pyval Object = None
)
err := ParseTuple(args, "|s", &pyval)
if err != nil {
return nil, err
}
f := unicode.IsSpace
switch v := pyval.(type) {
case String:
chars := []rune(string(v))
f = func(s rune) bool {
for _, i := range chars {
if s == i {
return true
}
}
return false
}
}
return f, nil
}
func (s String) Strip(args Tuple) (Object, error) {
f, err := stripFunc(args)
if err != nil {
return nil, err
}
return String(strings.TrimFunc(string(s), f)), nil
}
func (s String) LStrip(args Tuple) (Object, error) {
f, err := stripFunc(args)
if err != nil {
return nil, err
}
return String(strings.TrimLeftFunc(string(s), f)), nil
}
func (s String) RStrip(args Tuple) (Object, error) {
f, err := stripFunc(args)
if err != nil {
return nil, err
}
return String(strings.TrimRightFunc(string(s), f)), nil
}
func (s String) Upper() (Object, error) {
return String(strings.ToUpper(string(s))), nil
}
func (s String) Lower() (Object, error) {
return String(strings.ToLower(string(s))), nil
}
// Check stringerface is satisfied
var (
_ richComparison = String("")
_ sequenceArithmetic = String("")
_ I__mod__ = String("")
_ I__rmod__ = String("")
_ I__imod__ = String("")
_ I__len__ = String("")
_ I__bool__ = String("")
_ I__getitem__ = String("")
_ I__contains__ = String("")
)