Skip to content

Commit

Permalink
Add API methods to serialize form-related elements according to spec
Browse files Browse the repository at this point in the history
Fix newlines in form element values
  • Loading branch information
whybin committed Aug 15, 2018
1 parent fe9ec57 commit 63d1715
Show file tree
Hide file tree
Showing 2 changed files with 280 additions and 1 deletion.
135 changes: 134 additions & 1 deletion pyquery/pyquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
#
# Distributed under the BSD license, see LICENSE.txt
from .cssselectpatch import JQueryTranslator
from collections import OrderedDict
from .openers import url_opener
from .text import extract_text
from copy import deepcopy
Expand Down Expand Up @@ -1039,7 +1040,11 @@ def _get_value(tag):
return 'on'
else:
return val
# <input> and everything else.
# <input>
elif tag.tag == 'input':
val = self._copy(tag).attr('value')
return val.replace('\n', '') if val else ''
# everything else.
return self._copy(tag).attr('value') or ''

def _set_value(pq, value):
Expand Down Expand Up @@ -1518,10 +1523,138 @@ def fn(self, *args, **kwargs):
setattr(PyQuery, name, fn)
fn = Fn()


########
# AJAX #
########

@with_camel_case_alias
def serialize_array(self):
"""Serialize form elements as an array of dictionaries, whose structure
mirrors that produced by the jQuery API. Notably, it does not handle the
deprecated `keygen` form element.
>>> d = PyQuery('<form><input name="order" value="spam"></form>')
>>> d.serialize_array() == [{'name': 'order', 'value': 'spam'}]
True
>>> d.serializeArray() == [{'name': 'order', 'value': 'spam'}]
True
"""
return list(map(
lambda p: {'name': p[0], 'value': p[1]},
self.serialize_pairs()
))

def serialize(self):
"""Serialize form elements as a URL-encoded string.
>>> h = (
... '<form><input name="order" value="spam">'
... '<input name="order2" value="baked beans"></form>'
... )
>>> d = PyQuery(h)
>>> d.serialize()
'order=spam&order2=baked%20beans'
"""
return urlencode(self.serialize_pairs()).replace('+', '%20')


#####################################################
# Additional methods that are not in the jQuery API #
#####################################################

@with_camel_case_alias
def serialize_pairs(self):
"""Serialize form elements as an array of 2-tuples conventional for
typical URL-parsing operations in Python.
>>> d = PyQuery('<form><input name="order" value="spam"></form>')
>>> d.serialize_pairs()
[('order', 'spam')]
>>> d.serializePairs()
[('order', 'spam')]
"""
# https://github.com/jquery/jquery/blob
# /2d4f53416e5f74fa98e0c1d66b6f3c285a12f0ce/src/serialize.js#L14
_submitter_types = ['submit', 'button', 'image', 'reset', 'file']

controls = self._copy([])
# Expand list of form controls
for el in self.items():
if el[0].tag == 'form':
form_id = el.attr('id')
if form_id:
# Include inputs outside of their form owner
root = self._copy(el.root.getroot())
controls.extend(root(
'#%s :not([form]):input, [form="%s"]:input'
% (form_id, form_id)))
else:
controls.extend(el(':not([form]):input'))
elif el[0].tag == 'fieldset':
controls.extend(el(':input'))
else:
controls.extend(el)
# Filter controls
selector = '[name]:enabled:not(button)' # Not serializing image button
selector += ''.join(map(
lambda s: ':not([type="%s"])' % s,
_submitter_types))
controls = controls.filter(selector)

def _filter_out_unchecked(_, el):
el = controls._copy(el)
return not el.is_(':checkbox:not(:checked)') \
and not el.is_(':radio:not(:checked)')
controls = controls.filter(_filter_out_unchecked)

# jQuery serializes inputs with the datalist element as an ancestor
# contrary to WHATWG spec as of August 2018
#
# xpath = 'self::*[not(ancestor::datalist)]'
# results = []
# for tag in controls:
# results.extend(tag.xpath(xpath, namespaces=controls.namespaces))
# controls = controls._copy(results)

# Serialize values
ret = []
for field in controls:
val = self._copy(field).val()
if isinstance(val, list):
ret.extend(map(
lambda v: (field.attrib['name'], v.replace('\n', '\r\n')),
val
))
else:
ret.append((field.attrib['name'], val.replace('\n', '\r\n')))
return ret

@with_camel_case_alias
def serialize_dict(self):
"""Serialize form elements as an ordered dictionary. Multiple values
corresponding to the same input name are concatenated into one list.
>>> d = PyQuery('''<form>
... <input name="order" value="spam">
... <input name="order" value="eggs">
... <input name="order2" value="ham">
... </form>''')
>>> d.serialize_dict()
OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')])
>>> d.serializeDict()
OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')])
"""
ret = OrderedDict()
for name, val in self.serialize_pairs():
if name not in ret:
ret[name] = val
elif not isinstance(ret[name], list):
ret[name] = [ret[name], val]
else:
ret[name].append(val)
return ret

@property
def base_url(self):
"""Return the url of current html document or None if not available.
Expand Down
146 changes: 146 additions & 0 deletions tests/test_pyquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,13 @@ class TestManipulating(TestCase):
<input type="radio" value="Ham">
'''

html2_newline = '''
<input id="newline-text" type="text" name="order" value="S
pam">
<input id="newline-radio" type="radio" name="order" value="S
pam">
'''

html3 = '''
<textarea id="textarea-single">Spam</textarea>
<textarea id="textarea-multi">Spam
Expand Down Expand Up @@ -475,6 +482,11 @@ def test_val_for_inputs(self):
self.assertEqual(d('input:checkbox').val(), '44')
self.assertEqual(d('input:radio').val(), '45')

def test_val_for_inputs_with_newline(self):
d = pq(self.html2_newline)
self.assertEqual(d('#newline-text').val(), 'Spam')
self.assertEqual(d('#newline-radio').val(), 'S\npam')

def test_val_for_textarea(self):
d = pq(self.html3)
self.assertEqual(d('#textarea-single').val(), 'Spam')
Expand Down Expand Up @@ -577,6 +589,140 @@ def test_html_replacement(self):
self.assertIn(replacement, new_html)


class TestAjax(TestCase):

html = '''
<div id="div">
<input form="dispersed" name="order" value="spam">
</div>
<form id="dispersed">
<div><input name="order" value="eggs"></div>
<input form="dispersed" name="order" value="ham">
<input form="other-form" name="order" value="nothing">
<input form="" name="order" value="nothing">
</form>
<form id="other-form">
<input form="dispersed" name="order" value="tomato">
</form>
<form class="no-id">
<input form="dispersed" name="order" value="baked beans">
<input name="spam" value="Spam">
</form>
'''

html2 = '''
<form id="first">
<input name="order" value="spam">
<fieldset>
<input name="fieldset" value="eggs">
<input id="input" name="fieldset" value="ham">
</fieldset>
</form>
<form id="datalist">
<datalist><div><input name="datalist" value="eggs"></div></datalist>
<input type="checkbox" name="checkbox" checked>
<input type="radio" name="radio" checked>
</form>
'''

html3 = '''
<form>
<input name="order" value="spam">
<input id="noname" value="sausage">
<fieldset disabled>
<input name="order" value="sausage">
</fieldset>
<input name="disabled" value="ham" disabled>
<input type="submit" name="submit" value="Submit">
<input type="button" name="button" value="">
<input type="image" name="image" value="">
<input type="reset" name="reset" value="Reset">
<input type="file" name="file" value="">
<button type="submit" name="submit" value="submit"></button>
<input type="checkbox" name="spam">
<input type="radio" name="eggs">
</form>
'''

html4 = '''
<form>
<input name="spam" value="Spam/
spam">
<select name="order" multiple>
<option value="baked
beans" selected>
<option value="tomato" selected>
<option value="spam">
</select>
<textarea name="multiline">multiple
lines
of text</textarea>
</form>
'''

def test_serialize_pairs_form_id(self):
d = pq(self.html)
self.assertEqual(d('#div').serialize_pairs(), [])
self.assertEqual(d('#dispersed').serialize_pairs(), [
('order', 'spam'), ('order', 'eggs'), ('order', 'ham'),
('order', 'tomato'), ('order', 'baked beans'),
])
self.assertEqual(d('.no-id').serialize_pairs(), [
('spam', 'Spam'),
])

def test_serialize_pairs_form_controls(self):
d = pq(self.html2)
self.assertEqual(d('fieldset').serialize_pairs(), [
('fieldset', 'eggs'), ('fieldset', 'ham'),
])
self.assertEqual(d('#input, fieldset, #first').serialize_pairs(), [
('order', 'spam'), ('fieldset', 'eggs'), ('fieldset', 'ham'),
('fieldset', 'eggs'), ('fieldset', 'ham'), ('fieldset', 'ham'),
])
self.assertEqual(d('#datalist').serialize_pairs(), [
('datalist', 'eggs'), ('checkbox', 'on'), ('radio', 'on'),
])

def test_serialize_pairs_filter_controls(self):
d = pq(self.html3)
self.assertEqual(d('form').serialize_pairs(), [
('order', 'spam')
])

def test_serialize_pairs_form_values(self):
d = pq(self.html4)
self.assertEqual(d('form').serialize_pairs(), [
('spam', 'Spam/spam'), ('order', 'baked\r\nbeans'),
('order', 'tomato'), ('multiline', 'multiple\r\nlines\r\nof text'),
])

def test_serialize_array(self):
d = pq(self.html4)
self.assertEqual(d('form').serialize_array(), [
{'name': 'spam', 'value': 'Spam/spam'},
{'name': 'order', 'value': 'baked\r\nbeans'},
{'name': 'order', 'value': 'tomato'},
{'name': 'multiline', 'value': 'multiple\r\nlines\r\nof text'},
])

def test_serialize(self):
d = pq(self.html4)
self.assertEqual(
d('form').serialize(),
'spam=Spam%2Fspam&order=baked%0D%0Abeans&order=tomato&'
'multiline=multiple%0D%0Alines%0D%0Aof%20text'
)

def test_serialize_dict(self):
d = pq(self.html4)
self.assertEqual(d('form').serialize_dict(), {
'spam': 'Spam/spam',
'order': ['baked\r\nbeans', 'tomato'],
'multiline': 'multiple\r\nlines\r\nof text',
})


class TestMakeLinks(TestCase):

html = '''
Expand Down

0 comments on commit 63d1715

Please sign in to comment.