From 63d1715dff44720d6d9c282b681fc9b244b0cb16 Mon Sep 17 00:00:00 2001 From: whybin <31753349+whybin@users.noreply.github.com> Date: Tue, 29 May 2018 20:06:53 -0700 Subject: [PATCH] Add API methods to serialize form-related elements according to spec Fix newlines in form element values --- pyquery/pyquery.py | 135 +++++++++++++++++++++++++++++++++++++- tests/test_pyquery.py | 146 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 280 insertions(+), 1 deletion(-) diff --git a/pyquery/pyquery.py b/pyquery/pyquery.py index 8d64857..8ffe7c8 100644 --- a/pyquery/pyquery.py +++ b/pyquery/pyquery.py @@ -4,6 +4,7 @@ # # Distributed under the BSD license, see LICENSE.txt from .cssselectpatch import JQueryTranslator +from collections import OrderedDict from .openers import url_opener from .text import extract_text from copy import deepcopy @@ -1039,7 +1040,11 @@ def _get_value(tag): return 'on' else: return val - # and everything else. + # + elif tag.tag == 'input': + val = self._copy(tag).attr('value') + return val.replace('\n', '') if val else '' + # everything else. return self._copy(tag).attr('value') or '' def _set_value(pq, value): @@ -1518,10 +1523,138 @@ def fn(self, *args, **kwargs): setattr(PyQuery, name, fn) fn = Fn() + + ######## + # AJAX # + ######## + + @with_camel_case_alias + def serialize_array(self): + """Serialize form elements as an array of dictionaries, whose structure + mirrors that produced by the jQuery API. Notably, it does not handle the + deprecated `keygen` form element. + + >>> d = PyQuery('
') + >>> d.serialize_array() == [{'name': 'order', 'value': 'spam'}] + True + >>> d.serializeArray() == [{'name': 'order', 'value': 'spam'}] + True + """ + return list(map( + lambda p: {'name': p[0], 'value': p[1]}, + self.serialize_pairs() + )) + + def serialize(self): + """Serialize form elements as a URL-encoded string. + + >>> h = ( + ... '
' + ... '
' + ... ) + >>> d = PyQuery(h) + >>> d.serialize() + 'order=spam&order2=baked%20beans' + """ + return urlencode(self.serialize_pairs()).replace('+', '%20') + + ##################################################### # Additional methods that are not in the jQuery API # ##################################################### + @with_camel_case_alias + def serialize_pairs(self): + """Serialize form elements as an array of 2-tuples conventional for + typical URL-parsing operations in Python. + + >>> d = PyQuery('
') + >>> d.serialize_pairs() + [('order', 'spam')] + >>> d.serializePairs() + [('order', 'spam')] + """ + # https://github.com/jquery/jquery/blob + # /2d4f53416e5f74fa98e0c1d66b6f3c285a12f0ce/src/serialize.js#L14 + _submitter_types = ['submit', 'button', 'image', 'reset', 'file'] + + controls = self._copy([]) + # Expand list of form controls + for el in self.items(): + if el[0].tag == 'form': + form_id = el.attr('id') + if form_id: + # Include inputs outside of their form owner + root = self._copy(el.root.getroot()) + controls.extend(root( + '#%s :not([form]):input, [form="%s"]:input' + % (form_id, form_id))) + else: + controls.extend(el(':not([form]):input')) + elif el[0].tag == 'fieldset': + controls.extend(el(':input')) + else: + controls.extend(el) + # Filter controls + selector = '[name]:enabled:not(button)' # Not serializing image button + selector += ''.join(map( + lambda s: ':not([type="%s"])' % s, + _submitter_types)) + controls = controls.filter(selector) + + def _filter_out_unchecked(_, el): + el = controls._copy(el) + return not el.is_(':checkbox:not(:checked)') \ + and not el.is_(':radio:not(:checked)') + controls = controls.filter(_filter_out_unchecked) + + # jQuery serializes inputs with the datalist element as an ancestor + # contrary to WHATWG spec as of August 2018 + # + # xpath = 'self::*[not(ancestor::datalist)]' + # results = [] + # for tag in controls: + # results.extend(tag.xpath(xpath, namespaces=controls.namespaces)) + # controls = controls._copy(results) + + # Serialize values + ret = [] + for field in controls: + val = self._copy(field).val() + if isinstance(val, list): + ret.extend(map( + lambda v: (field.attrib['name'], v.replace('\n', '\r\n')), + val + )) + else: + ret.append((field.attrib['name'], val.replace('\n', '\r\n'))) + return ret + + @with_camel_case_alias + def serialize_dict(self): + """Serialize form elements as an ordered dictionary. Multiple values + corresponding to the same input name are concatenated into one list. + + >>> d = PyQuery('''
+ ... + ... + ... + ...
''') + >>> d.serialize_dict() + OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')]) + >>> d.serializeDict() + OrderedDict([('order', ['spam', 'eggs']), ('order2', 'ham')]) + """ + ret = OrderedDict() + for name, val in self.serialize_pairs(): + if name not in ret: + ret[name] = val + elif not isinstance(ret[name], list): + ret[name] = [ret[name], val] + else: + ret[name].append(val) + return ret + @property def base_url(self): """Return the url of current html document or None if not available. diff --git a/tests/test_pyquery.py b/tests/test_pyquery.py index 26da186..414eb9a 100644 --- a/tests/test_pyquery.py +++ b/tests/test_pyquery.py @@ -388,6 +388,13 @@ class TestManipulating(TestCase): ''' + html2_newline = ''' + + + ''' + html3 = ''' + + ''' + + def test_serialize_pairs_form_id(self): + d = pq(self.html) + self.assertEqual(d('#div').serialize_pairs(), []) + self.assertEqual(d('#dispersed').serialize_pairs(), [ + ('order', 'spam'), ('order', 'eggs'), ('order', 'ham'), + ('order', 'tomato'), ('order', 'baked beans'), + ]) + self.assertEqual(d('.no-id').serialize_pairs(), [ + ('spam', 'Spam'), + ]) + + def test_serialize_pairs_form_controls(self): + d = pq(self.html2) + self.assertEqual(d('fieldset').serialize_pairs(), [ + ('fieldset', 'eggs'), ('fieldset', 'ham'), + ]) + self.assertEqual(d('#input, fieldset, #first').serialize_pairs(), [ + ('order', 'spam'), ('fieldset', 'eggs'), ('fieldset', 'ham'), + ('fieldset', 'eggs'), ('fieldset', 'ham'), ('fieldset', 'ham'), + ]) + self.assertEqual(d('#datalist').serialize_pairs(), [ + ('datalist', 'eggs'), ('checkbox', 'on'), ('radio', 'on'), + ]) + + def test_serialize_pairs_filter_controls(self): + d = pq(self.html3) + self.assertEqual(d('form').serialize_pairs(), [ + ('order', 'spam') + ]) + + def test_serialize_pairs_form_values(self): + d = pq(self.html4) + self.assertEqual(d('form').serialize_pairs(), [ + ('spam', 'Spam/spam'), ('order', 'baked\r\nbeans'), + ('order', 'tomato'), ('multiline', 'multiple\r\nlines\r\nof text'), + ]) + + def test_serialize_array(self): + d = pq(self.html4) + self.assertEqual(d('form').serialize_array(), [ + {'name': 'spam', 'value': 'Spam/spam'}, + {'name': 'order', 'value': 'baked\r\nbeans'}, + {'name': 'order', 'value': 'tomato'}, + {'name': 'multiline', 'value': 'multiple\r\nlines\r\nof text'}, + ]) + + def test_serialize(self): + d = pq(self.html4) + self.assertEqual( + d('form').serialize(), + 'spam=Spam%2Fspam&order=baked%0D%0Abeans&order=tomato&' + 'multiline=multiple%0D%0Alines%0D%0Aof%20text' + ) + + def test_serialize_dict(self): + d = pq(self.html4) + self.assertEqual(d('form').serialize_dict(), { + 'spam': 'Spam/spam', + 'order': ['baked\r\nbeans', 'tomato'], + 'multiline': 'multiple\r\nlines\r\nof text', + }) + + class TestMakeLinks(TestCase): html = '''