Skip to content

Commit

Permalink
6x boost for quoting
Browse files Browse the repository at this point in the history
  • Loading branch information
asvetlov committed Nov 2, 2016
1 parent 9da189a commit 5c59e0f
Show file tree
Hide file tree
Showing 4 changed files with 150 additions and 0 deletions.
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -89,3 +89,7 @@ ENV/
.ropeproject

coverage


yarl/_quoting.c
yarl/_quoting.html
28 changes: 28 additions & 0 deletions benchmark.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import timeit


cython_setup = """\
from yarl.quoting import _quote as quote
from yarl.quoting import _unquote as unquote
"""

python_setup = """\
from yarl.quoting import _py_quote as quote
from yarl.quoting import _py_unquote as unquote
"""


print("Cython quote: {:.3f} sec".format(
timeit.timeit("quote(s)", cython_setup+"s='/path/to'")))


print("Python quote: {:.3f} sec".format(
timeit.timeit("quote(s)", python_setup+"s='/path/to'")))


print("Cython unquote: {:.3f} sec".format(
timeit.timeit("unquote(s)", cython_setup+"s='/path/to'")))


print("Python unquote: {:.3f} sec".format(
timeit.timeit("unquote(s)", python_setup+"s='/path/to'")))
2 changes: 2 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
[aliases]
test=pytest
116 changes: 116 additions & 0 deletions yarl/_quoting.pyx
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
# cython: language_level=3

from string import ascii_letters, ascii_lowercase, digits

cdef str ASCII_LOWERCASE = ascii_lowercase
cdef str GEN_DELIMS = ":/?#[]@"
cdef str SUB_DELIMS = "!$&'()*+,;="
cdef str RESERVED = GEN_DELIMS + SUB_DELIMS
cdef str UNRESERVED = ascii_letters + digits + '-._~'

cdef set PCT_ALLOWED = {'%{:02X}'.format(i) for i in range(256)}
cdef dict UNRESERVED_QUOTED = {'%{:02X}'.format(ord(ch)): ch
for ch in UNRESERVED}


cdef Py_UCS4 _hex(unsigned long v):
if v < 10:
return <Py_UCS4>(v+0x30) # ord('0') == 0x30
else:
return <Py_UCS4>(v+0x41-10) # ord('A') == 0x41


def _quote(val, *, str safe='', bint plus=False):
if val is None:
return None
if not isinstance(val, str):
raise TypeError("Argument should be str")
if not val:
return ''
cdef str _val = <str>val
cdef list ret = []
cdef list pct = []
cdef unsigned char b
cdef Py_UCS4 ch
cdef str tmp
for ch in _val:
if pct:
if u'a' <= ch <= u'z':
ch = <Py_UCS4>(<unsigned long>ch - 32)
pct.append(ch)
if len(pct) == 3:
tmp = "".join(pct)
unquoted = UNRESERVED_QUOTED.get(tmp)
if unquoted:
ret.append(unquoted)
elif tmp not in PCT_ALLOWED:
raise ValueError("Unallowed PCT {}".format(pct))
else:
ret.append(tmp)
del pct[:]
continue
elif ch == u'%':
pct = [ch]
continue

if plus:
if ch == u' ':
ret.append(u'+')
continue
if ch in UNRESERVED:
ret.append(ch)
continue
if ch in safe:
ret.append(ch)
continue

for b in <bytes>ch.encode('utf8'):
ret.append('%')
ret.append(_hex(<unsigned char>b >> 4))
ret.append(_hex(<unsigned char>b & 0x0f))

return ''.join(ret)


def _unquote(val, *, unsafe='', plus=False):
if val is None:
return None
if not isinstance(val, str):
raise TypeError("Argument should be str")
if not val:
return ''
pct = ''
pcts = bytearray()
ret = []
for ch in val:
if pct:
pct += ch
if len(pct) == 3: # pragma: no branch # peephole optimizer
pcts.append(int(pct[1:], base=16))
pct = ''
continue
if pcts:
try:
unquoted = pcts.decode('utf8')
except UnicodeDecodeError:
pass
else:
if unquoted in unsafe:
ret.append(_quote(unquoted))
else:
ret.append(unquoted)
del pcts[:]

if ch == '%':
pct = ch
continue

ret.append(ch)

if pcts:
unquoted = pcts.decode('utf8')
if unquoted in unsafe:
ret.append(_quote(unquoted))
else:
ret.append(unquoted)
return ''.join(ret)

0 comments on commit 5c59e0f

Please sign in to comment.