forked from yasserg/crawler4j
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Parse css for links matching url(*)in tags @import and @font-face
- Loading branch information
Federico Tolomei
committed
Dec 15, 2018
1 parent
d7366fa
commit 8043a98
Showing
5 changed files
with
136 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
74 changes: 74 additions & 0 deletions
74
crawler4j/src/test/groovy/edu/uci/ics/crawler4j/parser/CssParseDataTest.groovy
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
package edu.uci.ics.crawler4j.parser | ||
|
||
import edu.uci.ics.crawler4j.url.WebURL | ||
import spock.lang.Specification | ||
|
||
/** | ||
* Test the CssParseData class. | ||
* | ||
* @author Federico Tolomei <[email protected]> | ||
*/ | ||
class CssParseDataTest extends Specification { | ||
|
||
def "CSS urls parsing quotes"() { | ||
given: "css parser" | ||
CssParseData parseData = new CssParseData() | ||
parseData.setTextContent(this.getClass().getResource( '/css/quotes.css' ).text) | ||
|
||
and: "configure css parser" | ||
WebURL webUrl = new WebURL() | ||
webUrl.setURL("http://example.com/css.css") | ||
|
||
when: "parse css" | ||
parseData.setOutgoingUrls(webUrl) | ||
Set<WebURL> urls = parseData.outgoingUrls | ||
|
||
then: "urls from css" | ||
assert urls.size() == 3 | ||
} | ||
|
||
def "CSS absolute urls paths"() { | ||
given: "css parser" | ||
CssParseData parseData = new CssParseData() | ||
parseData.setTextContent(this.getClass().getResource( '/css/absolute.css' ).text) | ||
|
||
and: "configure css parser" | ||
WebURL webUrl = new WebURL() | ||
webUrl.setURL("http://example.com/css.css") | ||
|
||
when: "parse css" | ||
parseData.setOutgoingUrls(webUrl) | ||
Set<WebURL> urls = parseData.outgoingUrls | ||
|
||
then: "urls from css" | ||
assert urls.size() == 3 | ||
|
||
and: | ||
List<String> mapped = urls.collect { x -> x.getURL() } | ||
assert mapped.contains("http://example.com/css/absolute_no_proto.png") | ||
assert mapped.contains("http://example.com/css/absolute_path.png") | ||
assert mapped.contains("http://example.com/css/absolute_with_domain.png") | ||
} | ||
|
||
def "CSS relative urls paths"() { | ||
given: "css parser" | ||
CssParseData parseData = new CssParseData() | ||
parseData.setTextContent(this.getClass().getResource( '/css/relative.css' ).text) | ||
|
||
and: "configure css parser" | ||
WebURL webUrl = new WebURL() | ||
webUrl.setURL("http://example.com/asset/css/css.css") | ||
|
||
when: "parse css" | ||
parseData.setOutgoingUrls(webUrl) | ||
Set<WebURL> urls = parseData.outgoingUrls | ||
|
||
then: "urls from css" | ||
assert urls.size() == 2 | ||
|
||
and: | ||
List<String> mapped = urls.collect { x -> x.getURL() } | ||
assert mapped.contains("http://example.com/asset/images/backgound_one.jpg") | ||
assert mapped.contains("http://example.com/backgound_two.jpg") | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
.absolute_with_domain { | ||
background-image: url("http://example.com/css/absolute_with_domain.png"); | ||
} | ||
|
||
.absolute_no_proto { | ||
background-image: url("//example.com/css/absolute_no_proto.png"); | ||
} | ||
|
||
|
||
.absolute_path { | ||
background-image: url("/css/absolute_path.png"); | ||
} | ||
|
||
|
||
.data { | ||
background: url(data:image/gif;base64,IGNORETHISURL) no-repeat left center; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
.double { | ||
background-image: url("//css/quotes/double.jpg"); | ||
} | ||
|
||
.single { | ||
background-image: url('//css/quotes/single.jpg'); | ||
} | ||
|
||
.noquote { | ||
background-image: url(//pix/quotes/none.jpg); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
.relative_one { | ||
background-image: url("../images/backgound_one.jpg"); | ||
} | ||
|
||
.relative_two { | ||
background-image: url("../../backgound_two.jpg"); | ||
} | ||
|