Skip to content

Commit

Permalink
Lookalike URLs: Ignore URLs that are not HTTP or HTTPS
Browse files Browse the repository at this point in the history
This CL fixes the case where the lookalike URL heuristics are triggered for non-web URLs such as chrome:// schemes. It also converts test cases to use non-private domains.

Bug: 843361
Change-Id: I2cb7cdc975ba330d547b604ba4fc9fe8c45969a6
Reviewed-on: https://chromium-review.googlesource.com/c/1388562
Reviewed-by: Tommy Li <[email protected]>
Commit-Queue: Mustafa Emre Acer <[email protected]>
Cr-Commit-Position: refs/heads/master@{#618611}
  • Loading branch information
meacer authored and Commit Bot committed Dec 21, 2018
1 parent 7c1e916 commit 8ffb442
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,9 @@ std::string GetMatchingSiteEngagementDomain(
blink::mojom::EngagementLevel::MEDIUM))
continue;

// Site engagement service should only return HTTP or HTTPS domains.
DCHECK(detail.origin.SchemeIsHTTPOrHTTPS());

// If the user has engaged with eTLD+1 of this site, don't show any
// lookalike navigation suggestions.
const std::string engaged_domain_and_registry =
Expand Down Expand Up @@ -147,6 +150,8 @@ void LookalikeUrlNavigationObserver::DidFinishNavigation(
return;

const GURL url = navigation_handle->GetURL();
if (!url.SchemeIsHTTPOrHTTPS())
return;

// If the user has engaged with this site, don't show any lookalike
// navigation suggestions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,22 +39,25 @@ const int kHighEngagement = 20;
// An engagement score below MEDIUM.
const int kLowEngagement = 1;

// The domains here should not private domains (e.g. site.test), otherwise they
// might test the wrong thing. Also note that site5.com is in the top domain
// list, so it shouldn't be used here.
struct SiteEngagementTestCase {
const char* const navigated;
const char* const suggested;
} kSiteEngagementTestCases[] = {
{"sité1.test", "site1.test"},
{"mail.www.sité1.test", "site1.test"},
{"sité1.com", "site1.com"},
{"mail.www.sité1.com", "site1.com"},

// These should match since the comparison uses eTLD+1s.
{"sité2.test", "www.site2.test"},
{"mail.sité2.test", "www.site2.test"},
{"sité2.com", "www.site2.com"},
{"mail.sité2.com", "www.site2.com"},

{"síté3.test", "sité3.test"},
{"mail.síté3.test", "sité3.test"},
{"síté3.com", "sité3.com"},
{"mail.síté3.com", "sité3.com"},

{"síté4.test", "www.sité4.test"},
{"mail.síté4.test", "www.sité4.test"},
{"síté4.com", "www.sité4.com"},
{"mail.síté4.com", "www.sité4.com"},
};

} // namespace
Expand Down Expand Up @@ -276,6 +279,13 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
CheckNoUkm();
}

// Schemes other than HTTP and HTTPS should be ignored.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
TopDomainChromeUrl_NoInfobar) {
TestInfobarNotShown(GURL("chrome://googlé.com"));
CheckNoUkm();
}

// Navigate to a domain within an edit distance of 1 to a top domain.
// This should record metrics. It should also show a "Did you mean to go to ..."
// infobar if configured via a feature param.
Expand Down Expand Up @@ -333,10 +343,10 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
// a feature param.
IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
Idn_SiteEngagement_Match) {
SetSiteEngagementScore(GURL("http://site1.test"), kHighEngagement);
SetSiteEngagementScore(GURL("http://www.site2.test"), kHighEngagement);
SetSiteEngagementScore(GURL("http://sité3.test"), kHighEngagement);
SetSiteEngagementScore(GURL("http://www.sité4.test"), kHighEngagement);
SetSiteEngagementScore(GURL("http://site1.com"), kHighEngagement);
SetSiteEngagementScore(GURL("http://www.site2.com"), kHighEngagement);
SetSiteEngagementScore(GURL("http://sité3.com"), kHighEngagement);
SetSiteEngagementScore(GURL("http://www.sité4.com"), kHighEngagement);

std::vector<GURL> ukm_urls;
for (const auto& test_case : kSiteEngagementTestCases) {
Expand Down Expand Up @@ -391,15 +401,30 @@ IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
// Test that navigations to a site with a high engagement score shouldn't
// record metrics or show infobar.
base::HistogramTester histograms;
SetSiteEngagementScore(GURL("http://site5.test"), kHighEngagement);
const GURL high_engagement_url =
embedded_test_server()->GetURL("síte5.test", "/title1.html");
SetSiteEngagementScore(GURL("http://site-not-in-top-domain-list.com"),
kHighEngagement);
const GURL high_engagement_url = embedded_test_server()->GetURL(
"síte-not-ín-top-domaín-líst.com", "/title1.html");
SetSiteEngagementScore(high_engagement_url, kHighEngagement);
TestInfobarNotShown(high_engagement_url);
histograms.ExpectTotalCount(LookalikeUrlNavigationObserver::kHistogramName,
0);
}

IN_PROC_BROWSER_TEST_P(LookalikeUrlNavigationObserverBrowserTest,
Idn_SiteEngagement_ChromeUrl_Ignored) {
// Test that an engaged site with a scheme other than HTTP or HTTPS should be
// ignored.
base::HistogramTester histograms;
SetSiteEngagementScore(GURL("chrome://site-not-in-top-domain-list.com"),
kHighEngagement);
const GURL low_engagement_url("http://síte-not-ín-top-domaín-líst.com");
SetSiteEngagementScore(low_engagement_url, kLowEngagement);
TestInfobarNotShown(low_engagement_url);
histograms.ExpectTotalCount(LookalikeUrlNavigationObserver::kHistogramName,
0);
}

// IDNs with a single label should be properly handled. There are two cases
// where this might occur:
// 1. The navigated URL is an IDN with a single label.
Expand Down

0 comments on commit 8ffb442

Please sign in to comment.