Skip to content

Commit

Permalink
All pages include a canonical link.
Browse files Browse the repository at this point in the history
The main points we hit on for canonicalization are that:

1) All pages should be https
2) All pages should end in a '\'
3) All language specific subdomains (de, pt, etc ...) are canonicalized to 'www'.

For comment pages we also do extra logic to ensure that pages of the form:

https://www.reddit.com/r/hiphopheads/comments/4nc0ln/

are canonicalized to:

https://www.reddit.com/r/hiphopheads/comments/4nc0ln/fresh_clams_casino_all_nite_feat_vince_staples/
  • Loading branch information
telaviv committed Jun 20, 2016
1 parent dc50148 commit c5c1c90
Show file tree
Hide file tree
Showing 7 changed files with 73 additions and 20 deletions.
15 changes: 9 additions & 6 deletions r2/r2/config/middleware.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
from r2.config import hooks
from r2.config.environment import load_environment
from r2.config.extensions import extension_mapping, set_extension
from r2.lib.utils import is_subdomain
from r2.lib.utils import is_subdomain, is_language_subdomain
from r2.lib import csrf, filters


Expand Down Expand Up @@ -106,7 +106,7 @@ def error_mapper(code, message, environ, global_conf=None, **kw):
error_data = getattr(exception, 'error_data', None)
if error_data:
environ['extra_error_data'] = error_data

if environ.get('REDDIT_NAME'):
d['srname'] = environ.get('REDDIT_NAME')
if environ.get('REDDIT_TAKEDOWN'):
Expand Down Expand Up @@ -161,7 +161,6 @@ def __call__(self, environ, start_response):


class DomainMiddleware(object):
lang_re = re.compile(r"\A\w\w(-\w\w)?\Z")

def __init__(self, app, config):
self.app = app
Expand All @@ -180,7 +179,9 @@ def __call__(self, environ, start_response):

# localhost is exempt so paster run/shell will work
# media_domain doesn't need special processing since it's just ads
if domain == "localhost" or is_subdomain(domain, g.media_domain):
is_media_only_domain = (is_subdomain(domain, g.media_domain) and
g.domain != g.media_domain)
if domain == "localhost" or is_media_only_domain:
return self.app(environ, start_response)

# tell reddit_base to redirect to the appropriate subreddit for
Expand Down Expand Up @@ -211,7 +212,7 @@ def __call__(self, environ, start_response):
prefix_parts.append(subdomain)
elif extension:
environ['reddit-domain-extension'] = extension
elif self.lang_re.match(subdomain):
elif is_language_subdomain(subdomain):
environ['reddit-prefer-lang'] = subdomain
else:
sr_redirect = subdomain
Expand Down Expand Up @@ -251,6 +252,7 @@ def __call__(self, environ, start_response):
environ['PATH_INFO'] = self.sr_pattern.sub('', path) or '/'
return self.app(environ, start_response)


class DomainListingMiddleware(object):
def __init__(self, app):
self.app = app
Expand All @@ -265,9 +267,10 @@ def __call__(self, environ, start_response):
environ['PATH_INFO'] = rest or '/'
return self.app(environ, start_response)


class ExtensionMiddleware(object):
ext_pattern = re.compile(r'\.([^/]+)\Z')

def __init__(self, app):
self.app = app

Expand Down
3 changes: 3 additions & 0 deletions r2/r2/controllers/front.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,8 @@ def GET_comments(
if article.is_nsfw and not c.over18 and c.render_style == 'html':
return self.intermediate_redirect("/over18", sr_path=False)

canonical_link = article.make_canonical_link(sr)

# Determine if we should show the embed link for comments
c.can_embed = bool(comment) and article.is_embeddable

Expand Down Expand Up @@ -523,6 +525,7 @@ def GET_comments(
sr_detail=sr_detail,
campaign_fullname=campaign_fullname,
click_url=click_url,
canonical_link=canonical_link,
extra_js_config=extra_js_config,
)

Expand Down
21 changes: 21 additions & 0 deletions r2/r2/controllers/listingcontroller.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,25 @@ def _build_og_title(self, max_length=256):

return u"%s • %s" % (_force_unicode(title), sr_fragment)

def canonical_link(self):
"""Return the canonical link of the subreddit.
Ordinarily canonical links are created using request.url.
In the case of subreddits, we perform a bit of magic to strip the
subreddit path from the url. This means that a path like:
https:///www.reddit.com/r/hiphopheads/
will instead show:
https://www.reddit.com/
See SubredditMiddleware for more information.
This method constructs our url from scratch given other information.
"""
return add_sr('/', force_https=True)

def _build_og_description(self):
description = c.site.public_description.strip()
if not description:
Expand Down Expand Up @@ -336,6 +355,8 @@ def render_params(self):
event_target['target_after'] = self.after._fullname
render_params['extra_js_config'] = {'event_target': event_target}

render_params['canonical_link'] = self.canonical_link()

return render_params


Expand Down
30 changes: 17 additions & 13 deletions r2/r2/lib/pages/pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,7 +254,8 @@ def __init__(self, space_compress=None, nav_menus=None, loginbox=True,
header=True, srbar=True, page_classes=None, short_title=None,
show_wiki_actions=False, extra_js_config=None,
show_locationbar=False, auction_announcement=False,
show_newsletterbar=False, **context):
show_newsletterbar=False, canonical_link=None,
**context):
Templated.__init__(self, **context)
self.title = title
self.short_title = short_title
Expand Down Expand Up @@ -287,7 +288,9 @@ def __init__(self, space_compress=None, nav_menus=None, loginbox=True,
self.show_timeout_modal = False

# generate a canonical link for google
self.canonical_link = request.fullpath
canonical_url = UrlParser(canonical_link or request.url)
canonical_url.canonicalize()
self.canonical_link = canonical_url.unparse()
if c.render_style != "html":
u = UrlParser(request.fullpath)
u.set_extension("")
Expand All @@ -296,6 +299,7 @@ def __init__(self, space_compress=None, nav_menus=None, loginbox=True,
if g.domain_prefix:
u.hostname = "%s.%s" % (g.domain_prefix, u.hostname)
self.canonical_link = u.unparse()

# Generate a mobile link for Google.
u = UrlParser(request.fullpath)
u.switch_subdomain_by_extension('mobile')
Expand Down Expand Up @@ -351,7 +355,7 @@ def __init__(self, space_compress=None, nav_menus=None, loginbox=True,
if self.show_newsletterbar:
self.newsletterbar = NewsletterBar()

if (c.render_style == "compact" and
if (c.render_style == "compact" and
getattr(self, "show_mobilewebredirectbar", True)):
self.mobilewebredirectbar = MobileWebRedirectBar()

Expand Down Expand Up @@ -1003,7 +1007,7 @@ def content(self):

def build_popup_panes(self):
panes = []

panes.append(Popup('archived-popup', ArchivedInterstitial()))

if self.show_timeout_modal:
Expand All @@ -1012,7 +1016,7 @@ def build_popup_panes(self):
hide_message=True,
)
panes.append(Popup('access-popup', popup_content))

return HtmlPaneStack(panes)

def is_gold_page(self):
Expand Down Expand Up @@ -1334,7 +1338,7 @@ def render_editable_developer(self, app, dev):
res = editable_developer_fn.render(app, dev)
return spaceCompress(res)


class PrefDeactivate(Templated):
"""Preference form for deactivating a user's own account."""
def __init__(self):
Expand Down Expand Up @@ -3172,7 +3176,7 @@ def __init__(self, goldtype, period, months, signed,
user_creddits = 50
else:
user_creddits = c.user.gold_creddits

if (goldtype in ("gift", "code", "onetime") and
months <= user_creddits):
can_use_creddits = True
Expand Down Expand Up @@ -4459,7 +4463,7 @@ def __init__(self, **kw):
self.mobile_targeting_enabled = feature.is_enabled("mobile_targeting")
Templated.__init__(self, **kw)

def get_locations(self):
def get_locations(self):
# geotargeting
def location_sort(location_tuple):
code, name, default = location_tuple
Expand Down Expand Up @@ -4566,7 +4570,7 @@ def setup(self, link, listing):
self.max_start = max_start.strftime("%m/%d/%Y")
self.max_end = max_end.strftime("%m/%d/%Y")
self.default_start = default_start.strftime("%m/%d/%Y")
self.default_end = default_end.strftime("%m/%d/%Y")
self.default_end = default_end.strftime("%m/%d/%Y")

self.link = link
self.listing = listing
Expand Down Expand Up @@ -4649,7 +4653,7 @@ def __init__(self, link, campaign, transaction, is_pending, is_live,
self.total_budget_dollars = campaign.total_budget_pennies / 100.

if full_details:
if not self.campaign.is_house and not self.campaign.is_auction:
if not self.campaign.is_house and not self.campaign.is_auction:
self.spent = promote.get_spent_amount(campaign)
else:
self.spent = campaign.adserver_spent_pennies / 100.
Expand Down Expand Up @@ -4944,7 +4948,7 @@ def __init__(self,

if text is None:
text = ''

# set the attribute for admin takedowns
if getattr(item, 'admin_takedown', False):
admin_takedown = True
Expand Down Expand Up @@ -5367,7 +5371,7 @@ def group_and_combine(items_by_key, group_on=None):
})
crt = self.campaign_report_totals
crt['total_clicks'] = crt['sr_clicks'] + crt['fp_clicks']
crt['total_imps'] = crt['sr_imps'] + crt['fp_imps']
crt['total_imps'] = crt['sr_imps'] + crt['fp_imps']
crt['bid'] = format_currency(crt['bid'], 'USD', locale=c.locale)
# make the link report
traffic_by_key = group_and_combine(
Expand Down Expand Up @@ -5768,7 +5772,7 @@ def __init__(self, item_type, rec_src, sr, link, comment=None):
useful for comparing performance of data sources or algorithms
sr and link are required
comment is optional
See r2.lib.recommender for valid values of item_type and rec_src.
"""
Expand Down
18 changes: 17 additions & 1 deletion r2/r2/lib/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,15 @@ def strip_www(domain):

def is_subdomain(subdomain, base):
"""Check if a domain is equal to or a subdomain of a base domain."""
return subdomain == base or (subdomain is not None and subdomain.endswith('.' + base))
return subdomain == base or (
subdomain is not None and subdomain.endswith('.' + base))


lang_re = re.compile(r"\A\w\w(-\w\w)?\Z")


def is_language_subdomain(subdomain):
return lang_re.match(subdomain)


def base_url(url):
Expand Down Expand Up @@ -590,6 +598,14 @@ def set_extension(self, extension):
self.path = '/'.join(dirs)
return self

def canonicalize(self):
subdomain = extract_subdomain(self.hostname)
if subdomain == '' or is_language_subdomain(subdomain):
self.hostname = 'www.{0}'.format(g.domain)
if not self.path.endswith('/'):
self.path += '/'
self.scheme = 'https'

def switch_subdomain_by_extension(self, extension=None):
"""Change the subdomain to the one that fits an extension.
Expand Down
5 changes: 5 additions & 0 deletions r2/r2/models/link.py
Original file line number Diff line number Diff line change
Expand Up @@ -480,6 +480,11 @@ def make_permalink(self, sr, force_domain=False):

return res

def make_canonical_link(self, sr, subdomain='www'):
domain = '%s.%s' % (subdomain, g.domain)
path = 'comments/%s/%s/' % (self._id36, title_to_url(self.title))
return '%s://%s/r/%s/%s' % (g.default_scheme, domain, sr.name, path)

def make_permalink_slow(self, force_domain=False):
return self.make_permalink(self.subreddit_slow,
force_domain=force_domain)
Expand Down
1 change: 1 addition & 0 deletions r2/r2/templates/base.html
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
<meta name="description" content="${getattr(thing, 'short_description', None) or g.short_description}" />
<meta name="referrer" content="${c.referrer_policy}">
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<link rel="canonical" href="${thing.canonical_link}" />
%if hasattr(thing, 'mobile_link'):
<link rel="alternate" media="only screen and (max-width: 640px)" href="${thing.mobile_link}" />
%endif
Expand Down

0 comments on commit c5c1c90

Please sign in to comment.