Skip to content

Commit

Permalink
[omnibox] Updates to handling of trivial subdomains.
Browse files Browse the repository at this point in the history
1. Revert the change where we decided to ignore eTLDs
(https://crrev.com/c/644491). We figured that none of these would allow
"www" or "m" as a subdomain but the m.tumblr.com case proves this
wrong.
2. Only strip leading trivial subdomains.

Bug: 881694
Change-Id: Ia8b7bb02611edc4d6c6c2c634f428dee0bff11e2
Reviewed-on: https://chromium-review.googlesource.com/1215420
Reviewed-by: Tommy Li <[email protected]>
Reviewed-by: Justin Donnelly <[email protected]>
Reviewed-by: Carlos IL <[email protected]>
Commit-Queue: Tommy Li <[email protected]>
Cr-Commit-Position: refs/heads/master@{#589985}
  • Loading branch information
Tommy C. Li authored and Commit Bot committed Sep 10, 2018
1 parent 04d3c8a commit 1883bad
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 23 deletions.
13 changes: 9 additions & 4 deletions components/url_formatter/url_formatter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ class HostComponentTransform : public AppendComponentTransform {
std::string domain_and_registry =
net::registry_controlled_domains::GetDomainAndRegistry(
component_text,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES);

// If there is no domain and registry, we may be looking at an intranet
// or otherwise non-standard host. Leave those alone.
Expand All @@ -107,11 +107,16 @@ class HostComponentTransform : public AppendComponentTransform {
component_text.end() - domain_and_registry.length(), ".");
tokenizer.set_options(base::StringTokenizer::RETURN_DELIMS);

bool reached_non_trivial_subdomain = false;
std::string transformed_subdomain;
while (tokenizer.GetNext()) {
// Append delimiters and non-trivial subdomains to the new subdomain part.
if (tokenizer.token_is_delim() ||
!IsTrivialSubdomain(tokenizer.token_piece())) {
// Record whether a trivial subdomain has been encountered.
if (!IsTrivialSubdomain(tokenizer.token_piece()))
reached_non_trivial_subdomain = true;

// Append delimiters and everything encountered at or after the first
// non-trivial subdomain.
if (tokenizer.token_is_delim() || reached_non_trivial_subdomain) {
transformed_subdomain += tokenizer.token();
continue;
}
Expand Down
40 changes: 21 additions & 19 deletions components/url_formatter/url_formatter_unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1223,29 +1223,27 @@ TEST(UrlFormatterTest, FormatUrl) {

// -------- omit trivial subdomains --------
#if defined(OS_ANDROID) || defined(OS_IOS)
{"omit trivial subdomains - trim m", "http://m.google.com/",
{"omit trivial subdomains - trim leading m", "http://m.google.com/",
kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL,
L"http://google.com/", 7},
{"omit trivial subdomains - trim m and www", "http://m.www.google.com/",
{"omit trivial subdomains - trim leading m and www",
"http://m.www.google.com/",
kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL,
L"http://google.com/", 7},
{"omit trivial subdomains - trim m from middle",
"http://en.m.wikipedia.org/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://en.wikipedia.org/", 7},
#else // !(defined(OS_ANDROID) || defined(OS_IOS))
{"omit trivial subdomains - don't trim m on desktop",
{"omit trivial subdomains - don't trim leading m on desktop",
"http://m.google.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://m.google.com/", 7},
{"omit trivial subdomains - trim www but leave m on desktop",
{"omit trivial subdomains - don't trim www after a leading m on desktop",
"http://m.www.google.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://m.google.com/", 7},
{"omit trivial subdomains - don't trim m from middle on desktop",
"http://en.m.wikipedia.org/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://en.m.wikipedia.org/", 7},
net::UnescapeRule::NORMAL, L"http://m.www.google.com/", 7},
#endif
{"omit trivial subdomains - don't do blind substring matches for m",
"http://foom.google.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://foom.google.com/", 7},
{"omit trivial subdomains - don't trim www from middle",
"http://en.www.wikipedia.org/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://en.www.wikipedia.org/", 7},
{"omit trivial subdomains - don't do blind substring matches for www",
"http://foowww.google.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://foowww.google.com/", 7},
{"omit trivial subdomains - don't crash on multiple delimiters",
"http://www....foobar...google.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://...foobar...google.com/", 7},
Expand All @@ -1262,7 +1260,7 @@ TEST(UrlFormatterTest, FormatUrl) {
{"omit trivial subdomains - sanity check for IDN",
"http://www.xn--cy2a840a.www.xn--cy2a840a.com",
kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL,
L"http://\x89c6\x9891.\x89c6\x9891.com/", 7},
L"http://\x89c6\x9891.www.\x89c6\x9891.com/", 7},

{"omit trivial subdomains but leave registry and domain alone - trivial",
"http://google.com/", kFormatUrlOmitTrivialSubdomains,
Expand All @@ -1273,6 +1271,10 @@ TEST(UrlFormatterTest, FormatUrl) {
{"omit trivial subdomains but leave registry and domain alone - co.uk",
"http://m.co.uk/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://m.co.uk/", 7},
{"omit trivial subdomains but leave eTLD (effective TLD) alone",
"http://www.appspot.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, L"http://www.appspot.com/", 7},


{"omit trivial subdomains but leave intranet hostnames alone",
"http://router/", kFormatUrlOmitTrivialSubdomains,
Expand Down Expand Up @@ -1742,15 +1744,15 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) {

#if defined(OS_ANDROID) || defined(OS_IOS)
const size_t strip_trivial_subdomains_offsets_2[] = {
0, 1, 2, 3, 4, 5, 6, 7, kNpos, 7, 8, 9,
10, kNpos, kNpos, kNpos, 10, 11, 12, 13, 14, 15, 16, 17};
0, 1, 2, 3, 4, 5, 6, 7, kNpos, 7, 8, 9,
10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21};
CheckAdjustedOffsets(
"http://m.en.www.foo.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, strip_trivial_subdomains_offsets_2);
#else // !(defined(OS_ANDROID) || defined(OS_IOS))
const size_t strip_trivial_subdomains_offsets_2[] = {
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16, 17, 18, 19};
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23};
CheckAdjustedOffsets(
"http://m.en.www.foo.com/", kFormatUrlOmitTrivialSubdomains,
net::UnescapeRule::NORMAL, strip_trivial_subdomains_offsets_2);
Expand Down

0 comments on commit 1883bad

Please sign in to comment.