Skip to content

Commit

Permalink
First pass at a FomElement
Browse files Browse the repository at this point in the history
The FormElement extends Element to provide ready access to a form's
controls, and to allow the form to be submitted. It also connects forms
to their controls in situations when the DOM tree created does not have
the form element be a parent of the control, like when the form tag is
in a TR but the control in a TD. In that case the form tag gets
reparented.
  • Loading branch information
jhy committed Feb 8, 2013
1 parent ea41269 commit c5792eb
Show file tree
Hide file tree
Showing 8 changed files with 329 additions and 9 deletions.
7 changes: 7 additions & 0 deletions src/main/java/org/jsoup/Connection.java
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,13 @@ public enum Method {
*/
public Connection data(String key, String value);

/**
* Adds all of the supplied data to the request data parameters
* @param data collection of data parameters
* @return this Connection, for chaining
*/
public Connection data(Collection<KeyVal> data);

/**
* Adds all of the supplied data to the request data parameters
* @param data map of data parameters
Expand Down
8 changes: 8 additions & 0 deletions src/main/java/org/jsoup/helper/HttpConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,14 @@ public Connection data(String... keyvals) {
return this;
}

public Connection data(Collection<Connection.KeyVal> data) {
Validate.notNull(data, "Data collection must not be null");
for (Connection.KeyVal entry: data) {
req.data(entry);
}
return this;
}

public Connection header(String name, String value) {
req.header(name, value);
return this;
Expand Down
94 changes: 94 additions & 0 deletions src/main/java/org/jsoup/nodes/FormElement.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package org.jsoup.nodes;

import org.jsoup.Connection;
import org.jsoup.Jsoup;
import org.jsoup.helper.HttpConnection;
import org.jsoup.helper.Validate;
import org.jsoup.parser.Tag;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.List;

/**
* A HTML Form Element provides ready access to the form fields/controls that are associated with it. It also allows a
* form to easily be submitted.
*/
public class FormElement extends Element {
private final Elements elements = new Elements();

/**
* Create a new, standalone form element.
*
* @param tag tag of this element
* @param baseUri the base URI
* @param attributes initial attributes
*/
public FormElement(Tag tag, String baseUri, Attributes attributes) {
super(tag, baseUri, attributes);
}

/**
* Get the list of form control elements associated with this form.
* @return form controls associated with this element.
*/
public Elements elements() {
return elements;
}

/**
* Add a form control element to this form.
* @param element form control to add
* @return this form element, for chaining
*/
public FormElement addElement(Element element) {
elements.add(element);
return this;
}

/**
* Prepare to submit this form. A Connection object is created with the request set up from the form values. You
* can then set up other options (like user-agent, timeout, cookies), then execute it.
* @return a connection prepared from the values of this form.
* @throws IllegalArgumentException if the form's absolute action URL cannot be determined. Make sure you pass the
* document's base URI when parsing.
*/
public Connection submit() {
String action = hasAttr("action") ? absUrl("action") : baseUri();
Validate.notEmpty(action, "Could not determine a form action URL for submit. Ensure you set a base URI when parsing.");
Connection.Method method = attr("method").toUpperCase().equals("POST") ?
Connection.Method.POST : Connection.Method.GET;

Connection con = Jsoup.connect(action)
.data(formData())
.method(method);

return con;
}

/**
* Get the data that this form submits. The returned list is a copy of the data, and changes to the contents of the
* list will not be reflected in the DOM.
* @return a list of key vals
*/
public List<Connection.KeyVal> formData() {
ArrayList<Connection.KeyVal> data = new ArrayList<Connection.KeyVal>();

// iterate the form control elements and accumulate their values
for (Element el: elements) {
if (!el.tag().isFormSubmittable()) continue; // contents are form listable, superset of submitable
String name = el.attr("name");
if (name.length() == 0) continue;

if ("select".equals(el.tagName())) {
Elements options = el.select("option[selected]");
for (Element option: options) {
data.add(HttpConnection.KeyVal.create(name, option.val()));
}
} else {
data.add(HttpConnection.KeyVal.create(name, el.val()));
}
}
return data;
}
}
35 changes: 31 additions & 4 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilder.java
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import org.jsoup.helper.StringUtil;
import org.jsoup.helper.Validate;
import org.jsoup.nodes.*;
import org.jsoup.select.Elements;

import java.util.ArrayList;
import java.util.Iterator;
Expand All @@ -20,7 +21,7 @@ class HtmlTreeBuilder extends TreeBuilder {

private boolean baseUriSetFromDoc = false;
private Element headElement; // the current head element
private Element formElement; // the current form element
private FormElement formElement; // the current form element
private Element contextElement; // fragment parse context -- could be null even if fragment parsing
private DescendableLinkedList<Element> formattingElements = new DescendableLinkedList<Element>(); // active (open) formatting elements
private List<Token.Character> pendingTableCharacters = new ArrayList<Token.Character>(); // chars in table to be shifted out
Expand Down Expand Up @@ -68,7 +69,17 @@ else if (contextTag.equals("plaintext"))
doc.appendChild(root);
stack.push(root);
resetInsertionMode();
// todo: setup form element to nearest form on context (up ancestor chain)

// setup form element to nearest form on context (up ancestor chain). ensures form controls are associated
// with form correctly
Elements contextChain = context.parents();
contextChain.add(0, context);
for (Element parent: contextChain) {
if (parent instanceof FormElement) {
formElement = (FormElement) parent;
break;
}
}
}

runParser();
Expand Down Expand Up @@ -184,6 +195,16 @@ Element insertEmpty(Token.StartTag startTag) {
return el;
}

FormElement insertForm(Token.StartTag startTag, boolean onStack) {
Tag tag = Tag.valueOf(startTag.name());
FormElement el = new FormElement(tag, baseUri, startTag.attributes);
setFormElement(el);
insertNode(el);
if (onStack)
stack.add(el);
return el;
}

void insert(Token.Comment commentToken) {
Comment comment = new Comment(commentToken.getData(), baseUri);
insertNode(comment);
Expand All @@ -207,6 +228,12 @@ else if (isFosterInserts())
insertInFosterParent(node);
else
currentElement().appendChild(node);

// connect form controls to their form element
if (node instanceof Element && ((Element) node).tag().isFormListed()) {
if (formElement != null)
formElement.addElement((Element) node);
}
}

Element pop() {
Expand Down Expand Up @@ -481,11 +508,11 @@ void setFosterInserts(boolean fosterInserts) {
this.fosterInserts = fosterInserts;
}

Element getFormElement() {
FormElement getFormElement() {
return formElement;
}

void setFormElement(Element formElement) {
void setFormElement(FormElement formElement) {
this.formElement = formElement;
}

Expand Down
8 changes: 4 additions & 4 deletions src/main/java/org/jsoup/parser/HtmlTreeBuilderState.java
Original file line number Diff line number Diff line change
Expand Up @@ -346,8 +346,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
if (tb.inButtonScope("p")) {
tb.process(new Token.EndTag("p"));
}
Element form = tb.insert(startTag);
tb.setFormElement(form);
tb.insertForm(startTag, true);
} else if (name.equals("li")) {
tb.framesetOk(false);
LinkedList<Element> stack = tb.getStack();
Expand Down Expand Up @@ -856,12 +855,12 @@ boolean process(Token t, HtmlTreeBuilder tb) {
if (tb.getFormElement() != null)
return false;
else {
Element form = tb.insertEmpty(startTag);
tb.setFormElement(form);
tb.insertForm(startTag, false);
}
} else {
return anythingElse(t, tb);
}
return true; // todo: check if should return processed http://www.whatwg.org/specs/web-apps/current-work/multipage/tree-construction.html#parsing-main-intable
} else if (t.isEndTag()) {
Token.EndTag endTag = t.asEndTag();
String name = endTag.name();
Expand All @@ -881,6 +880,7 @@ boolean process(Token t, HtmlTreeBuilder tb) {
} else {
return anythingElse(t, tb);
}
return true; // todo: as above todo
} else if (t.isEOF()) {
if (tb.currentElement().nodeName().equals("html"))
tb.error(this);
Expand Down
45 changes: 44 additions & 1 deletion src/main/java/org/jsoup/parser/Tag.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ public class Tag {
private boolean empty = false; // can hold nothing; e.g. img
private boolean selfClosing = false; // can self close (<foo />). used for unknown tags that self close, without forcing them as empty.
private boolean preserveWhitespace = false; // for pre, textarea, script etc
private boolean formList = false; // a control that appears in forms: input, textarea, output etc
private boolean formSubmit = false; // a control that can be submitted in a form: input etc

private Tag(String tagName) {
this.tagName = tagName.toLowerCase();
Expand Down Expand Up @@ -153,6 +155,22 @@ public boolean preserveWhitespace() {
return preserveWhitespace;
}

/**
* Get if this tag represents a control associated with a form. E.g. input, textarea, output
* @return if associated with a form
*/
public boolean isFormListed() {
return formList;
}

/**
* Get if this tag represents an element that should be submitted with a form. E.g. input, option
* @return if submittable with a form
*/
public boolean isFormSubmittable() {
return formSubmit;
}

Tag setSelfClosing() {
selfClosing = true;
return this;
Expand All @@ -172,6 +190,8 @@ public boolean equals(Object o) {
if (isBlock != tag.isBlock) return false;
if (preserveWhitespace != tag.preserveWhitespace) return false;
if (selfClosing != tag.selfClosing) return false;
if (formList != tag.formList) return false;
if (formSubmit != tag.formSubmit) return false;
if (!tagName.equals(tag.tagName)) return false;

return true;
Expand All @@ -187,6 +207,8 @@ public int hashCode() {
result = 31 * result + (empty ? 1 : 0);
result = 31 * result + (selfClosing ? 1 : 0);
result = 31 * result + (preserveWhitespace ? 1 : 0);
result = 31 * result + (formList ? 1 : 0);
result = 31 * result + (formSubmit ? 1 : 0);
return result;
}

Expand Down Expand Up @@ -218,7 +240,16 @@ public String toString() {
"title", "a", "p", "h1", "h2", "h3", "h4", "h5", "h6", "pre", "address", "li", "th", "td", "script", "style",
"ins", "del", "s"
};
private static final String[] preserveWhitespaceTags = {"pre", "plaintext", "title", "textarea"};
private static final String[] preserveWhitespaceTags = {
"pre", "plaintext", "title", "textarea"
};
// todo: I think we just need submit tags, and can scrub listed
private static final String[] formListedTags = {
"button", "fieldset", "input", "keygen", "object", "output", "select", "textarea"
};
private static final String[] formSubmitTags = {
"button", "input", "keygen", "object", "select", "textarea"
};

static {
// creates
Expand Down Expand Up @@ -254,6 +285,18 @@ public String toString() {
Validate.notNull(tag);
tag.preserveWhitespace = true;
}

for (String tagName : formListedTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formList = true;
}

for (String tagName : formSubmitTags) {
Tag tag = tags.get(tagName);
Validate.notNull(tag);
tag.formSubmit = true;
}
}

private static void register(Tag tag) {
Expand Down
Loading

0 comments on commit c5792eb

Please sign in to comment.