From 1883bad43889c9f0a5d7f6ca3dabff648c73bb90 Mon Sep 17 00:00:00 2001 From: "Tommy C. Li" Date: Mon, 10 Sep 2018 18:20:46 +0000 Subject: [PATCH] [omnibox] Updates to handling of trivial subdomains. 1. Revert the change where we decided to ignore eTLDs (https://crrev.com/c/644491). We figured that none of these would allow "www" or "m" as a subdomain but the m.tumblr.com case proves this wrong. 2. Only strip leading trivial subdomains. Bug: 881694 Change-Id: Ia8b7bb02611edc4d6c6c2c634f428dee0bff11e2 Reviewed-on: https://chromium-review.googlesource.com/1215420 Reviewed-by: Tommy Li Reviewed-by: Justin Donnelly Reviewed-by: Carlos IL Commit-Queue: Tommy Li Cr-Commit-Position: refs/heads/master@{#589985} --- components/url_formatter/url_formatter.cc | 13 ++++-- .../url_formatter/url_formatter_unittest.cc | 40 ++++++++++--------- 2 files changed, 30 insertions(+), 23 deletions(-) diff --git a/components/url_formatter/url_formatter.cc b/components/url_formatter/url_formatter.cc index 726ffec12cd18d..1c122348ca9b46 100644 --- a/components/url_formatter/url_formatter.cc +++ b/components/url_formatter/url_formatter.cc @@ -94,7 +94,7 @@ class HostComponentTransform : public AppendComponentTransform { std::string domain_and_registry = net::registry_controlled_domains::GetDomainAndRegistry( component_text, - net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + net::registry_controlled_domains::INCLUDE_PRIVATE_REGISTRIES); // If there is no domain and registry, we may be looking at an intranet // or otherwise non-standard host. Leave those alone. @@ -107,11 +107,16 @@ class HostComponentTransform : public AppendComponentTransform { component_text.end() - domain_and_registry.length(), "."); tokenizer.set_options(base::StringTokenizer::RETURN_DELIMS); + bool reached_non_trivial_subdomain = false; std::string transformed_subdomain; while (tokenizer.GetNext()) { - // Append delimiters and non-trivial subdomains to the new subdomain part. - if (tokenizer.token_is_delim() || - !IsTrivialSubdomain(tokenizer.token_piece())) { + // Record whether a trivial subdomain has been encountered. + if (!IsTrivialSubdomain(tokenizer.token_piece())) + reached_non_trivial_subdomain = true; + + // Append delimiters and everything encountered at or after the first + // non-trivial subdomain. + if (tokenizer.token_is_delim() || reached_non_trivial_subdomain) { transformed_subdomain += tokenizer.token(); continue; } diff --git a/components/url_formatter/url_formatter_unittest.cc b/components/url_formatter/url_formatter_unittest.cc index e34c824e318fa5..319db5b8c4865e 100644 --- a/components/url_formatter/url_formatter_unittest.cc +++ b/components/url_formatter/url_formatter_unittest.cc @@ -1223,29 +1223,27 @@ TEST(UrlFormatterTest, FormatUrl) { // -------- omit trivial subdomains -------- #if defined(OS_ANDROID) || defined(OS_IOS) - {"omit trivial subdomains - trim m", "http://m.google.com/", + {"omit trivial subdomains - trim leading m", "http://m.google.com/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, L"http://google.com/", 7}, - {"omit trivial subdomains - trim m and www", "http://m.www.google.com/", + {"omit trivial subdomains - trim leading m and www", + "http://m.www.google.com/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, L"http://google.com/", 7}, - {"omit trivial subdomains - trim m from middle", - "http://en.m.wikipedia.org/", kFormatUrlOmitTrivialSubdomains, - net::UnescapeRule::NORMAL, L"http://en.wikipedia.org/", 7}, #else // !(defined(OS_ANDROID) || defined(OS_IOS)) - {"omit trivial subdomains - don't trim m on desktop", + {"omit trivial subdomains - don't trim leading m on desktop", "http://m.google.com/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, L"http://m.google.com/", 7}, - {"omit trivial subdomains - trim www but leave m on desktop", + {"omit trivial subdomains - don't trim www after a leading m on desktop", "http://m.www.google.com/", kFormatUrlOmitTrivialSubdomains, - net::UnescapeRule::NORMAL, L"http://m.google.com/", 7}, - {"omit trivial subdomains - don't trim m from middle on desktop", - "http://en.m.wikipedia.org/", kFormatUrlOmitTrivialSubdomains, - net::UnescapeRule::NORMAL, L"http://en.m.wikipedia.org/", 7}, + net::UnescapeRule::NORMAL, L"http://m.www.google.com/", 7}, #endif - {"omit trivial subdomains - don't do blind substring matches for m", - "http://foom.google.com/", kFormatUrlOmitTrivialSubdomains, - net::UnescapeRule::NORMAL, L"http://foom.google.com/", 7}, + {"omit trivial subdomains - don't trim www from middle", + "http://en.www.wikipedia.org/", kFormatUrlOmitTrivialSubdomains, + net::UnescapeRule::NORMAL, L"http://en.www.wikipedia.org/", 7}, + {"omit trivial subdomains - don't do blind substring matches for www", + "http://foowww.google.com/", kFormatUrlOmitTrivialSubdomains, + net::UnescapeRule::NORMAL, L"http://foowww.google.com/", 7}, {"omit trivial subdomains - don't crash on multiple delimiters", "http://www....foobar...google.com/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, L"http://...foobar...google.com/", 7}, @@ -1262,7 +1260,7 @@ TEST(UrlFormatterTest, FormatUrl) { {"omit trivial subdomains - sanity check for IDN", "http://www.xn--cy2a840a.www.xn--cy2a840a.com", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, - L"http://\x89c6\x9891.\x89c6\x9891.com/", 7}, + L"http://\x89c6\x9891.www.\x89c6\x9891.com/", 7}, {"omit trivial subdomains but leave registry and domain alone - trivial", "http://google.com/", kFormatUrlOmitTrivialSubdomains, @@ -1273,6 +1271,10 @@ TEST(UrlFormatterTest, FormatUrl) { {"omit trivial subdomains but leave registry and domain alone - co.uk", "http://m.co.uk/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, L"http://m.co.uk/", 7}, + {"omit trivial subdomains but leave eTLD (effective TLD) alone", + "http://www.appspot.com/", kFormatUrlOmitTrivialSubdomains, + net::UnescapeRule::NORMAL, L"http://www.appspot.com/", 7}, + {"omit trivial subdomains but leave intranet hostnames alone", "http://router/", kFormatUrlOmitTrivialSubdomains, @@ -1742,15 +1744,15 @@ TEST(UrlFormatterTest, FormatUrlWithOffsets) { #if defined(OS_ANDROID) || defined(OS_IOS) const size_t strip_trivial_subdomains_offsets_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, kNpos, 7, 8, 9, - 10, kNpos, kNpos, kNpos, 10, 11, 12, 13, 14, 15, 16, 17}; + 0, 1, 2, 3, 4, 5, 6, 7, kNpos, 7, 8, 9, + 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21}; CheckAdjustedOffsets( "http://m.en.www.foo.com/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, strip_trivial_subdomains_offsets_2); #else // !(defined(OS_ANDROID) || defined(OS_IOS)) const size_t strip_trivial_subdomains_offsets_2[] = { - 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, - 12, kNpos, kNpos, kNpos, 12, 13, 14, 15, 16, 17, 18, 19}; + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23}; CheckAdjustedOffsets( "http://m.en.www.foo.com/", kFormatUrlOmitTrivialSubdomains, net::UnescapeRule::NORMAL, strip_trivial_subdomains_offsets_2);