-
-
Notifications
You must be signed in to change notification settings - Fork 169
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
150 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -89,3 +89,7 @@ ENV/ | |
.ropeproject | ||
|
||
coverage | ||
|
||
|
||
yarl/_quoting.c | ||
yarl/_quoting.html |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import timeit | ||
|
||
|
||
cython_setup = """\ | ||
from yarl.quoting import _quote as quote | ||
from yarl.quoting import _unquote as unquote | ||
""" | ||
|
||
python_setup = """\ | ||
from yarl.quoting import _py_quote as quote | ||
from yarl.quoting import _py_unquote as unquote | ||
""" | ||
|
||
|
||
print("Cython quote: {:.3f} sec".format( | ||
timeit.timeit("quote(s)", cython_setup+"s='/path/to'"))) | ||
|
||
|
||
print("Python quote: {:.3f} sec".format( | ||
timeit.timeit("quote(s)", python_setup+"s='/path/to'"))) | ||
|
||
|
||
print("Cython unquote: {:.3f} sec".format( | ||
timeit.timeit("unquote(s)", cython_setup+"s='/path/to'"))) | ||
|
||
|
||
print("Python unquote: {:.3f} sec".format( | ||
timeit.timeit("unquote(s)", python_setup+"s='/path/to'"))) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
[aliases] | ||
test=pytest |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,116 @@ | ||
# cython: language_level=3 | ||
|
||
from string import ascii_letters, ascii_lowercase, digits | ||
|
||
cdef str ASCII_LOWERCASE = ascii_lowercase | ||
cdef str GEN_DELIMS = ":/?#[]@" | ||
cdef str SUB_DELIMS = "!$&'()*+,;=" | ||
cdef str RESERVED = GEN_DELIMS + SUB_DELIMS | ||
cdef str UNRESERVED = ascii_letters + digits + '-._~' | ||
|
||
cdef set PCT_ALLOWED = {'%{:02X}'.format(i) for i in range(256)} | ||
cdef dict UNRESERVED_QUOTED = {'%{:02X}'.format(ord(ch)): ch | ||
for ch in UNRESERVED} | ||
|
||
|
||
cdef Py_UCS4 _hex(unsigned long v): | ||
if v < 10: | ||
return <Py_UCS4>(v+0x30) # ord('0') == 0x30 | ||
else: | ||
return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41 | ||
|
||
|
||
def _quote(val, *, str safe='', bint plus=False): | ||
if val is None: | ||
return None | ||
if not isinstance(val, str): | ||
raise TypeError("Argument should be str") | ||
if not val: | ||
return '' | ||
cdef str _val = <str>val | ||
cdef list ret = [] | ||
cdef list pct = [] | ||
cdef unsigned char b | ||
cdef Py_UCS4 ch | ||
cdef str tmp | ||
for ch in _val: | ||
if pct: | ||
if u'a' <= ch <= u'z': | ||
ch = <Py_UCS4>(<unsigned long>ch - 32) | ||
pct.append(ch) | ||
if len(pct) == 3: | ||
tmp = "".join(pct) | ||
unquoted = UNRESERVED_QUOTED.get(tmp) | ||
if unquoted: | ||
ret.append(unquoted) | ||
elif tmp not in PCT_ALLOWED: | ||
raise ValueError("Unallowed PCT {}".format(pct)) | ||
else: | ||
ret.append(tmp) | ||
del pct[:] | ||
continue | ||
elif ch == u'%': | ||
pct = [ch] | ||
continue | ||
|
||
if plus: | ||
if ch == u' ': | ||
ret.append(u'+') | ||
continue | ||
if ch in UNRESERVED: | ||
ret.append(ch) | ||
continue | ||
if ch in safe: | ||
ret.append(ch) | ||
continue | ||
|
||
for b in <bytes>ch.encode('utf8'): | ||
ret.append('%') | ||
ret.append(_hex(<unsigned char>b >> 4)) | ||
ret.append(_hex(<unsigned char>b & 0x0f)) | ||
|
||
return ''.join(ret) | ||
|
||
|
||
def _unquote(val, *, unsafe='', plus=False): | ||
if val is None: | ||
return None | ||
if not isinstance(val, str): | ||
raise TypeError("Argument should be str") | ||
if not val: | ||
return '' | ||
pct = '' | ||
pcts = bytearray() | ||
ret = [] | ||
for ch in val: | ||
if pct: | ||
pct += ch | ||
if len(pct) == 3: # pragma: no branch # peephole optimizer | ||
pcts.append(int(pct[1:], base=16)) | ||
pct = '' | ||
continue | ||
if pcts: | ||
try: | ||
unquoted = pcts.decode('utf8') | ||
except UnicodeDecodeError: | ||
pass | ||
else: | ||
if unquoted in unsafe: | ||
ret.append(_quote(unquoted)) | ||
else: | ||
ret.append(unquoted) | ||
del pcts[:] | ||
|
||
if ch == '%': | ||
pct = ch | ||
continue | ||
|
||
ret.append(ch) | ||
|
||
if pcts: | ||
unquoted = pcts.decode('utf8') | ||
if unquoted in unsafe: | ||
ret.append(_quote(unquoted)) | ||
else: | ||
ret.append(unquoted) | ||
return ''.join(ret) |