Skip to content

Commit

Permalink
Session variable support for A/B testing (hedyorg#444)
Browse files Browse the repository at this point in the history
* Session variable support for A/B testing.

* Passing env variables between main & test, WIP.

* Add e2e tests for session variable support in a/b testing; fix cookie parsing from test to main.

* Instead of doing our own decoding, switch to using `flask`/`itsdangerous` cookie parsing

* Move code to a separate file and fix some tests

* Also factor out CDN logic to its own module

* Update documentation

* Move function that's only used for A/B testing into A/B module

* Rename ab test script; move script to tests folder.

Co-authored-by: Rico Huijbers <[email protected]>
  • Loading branch information
fpereiro and rix0rrr authored May 31, 2021
1 parent acf020d commit b048fec
Show file tree
Hide file tree
Showing 11 changed files with 306 additions and 100 deletions.
114 changes: 114 additions & 0 deletions ab_proxying.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
from http.cookies import SimpleCookie
import hashlib
import re
import os
import logging

from flask import request, session, make_response
import requests
import flask.sessions
import itsdangerous

from auth import current_user
import utils

class ABProxying:
"""Proxy some requests to another server."""
def __init__(self, app, target_host, secret_key):
self.target_host = target_host
self.secret_key = secret_key

app.before_request(self.before_request_proxy)

def before_request_proxy(self):
# If it is an auth route, we do not reverse proxy it to the PROXY_TO_TEST_HOST environment, with the exception of /auth/texts
# We want to keep all cookie setting in the main environment, not the test one.
if re.match ('.*/auth/.*', request.url) and not re.match ('.*/auth/texts', request.url):
pass
# This route is meant to return the session from the main environment, for testing purposes.
elif re.match ('.*/session_main', request.url):
pass
# If we enter this block, we will reverse proxy the request to the PROXY_TO_TEST_HOST environment.
# /session_test is meant to return the session from the test environment, for testing purposes.
elif re.match ('.*/session_test', request.url) or redirect_ab (request):
url = self.target_host + request.full_path
logging.debug('Proxying %s %s %s to %s', request.method, request.url, dict (session), url)

request_headers = {}
for header in request.headers:
if (header [0].lower () in ['host']):
continue
request_headers [header [0]] = header [1]
# In case the session_id is not yet set in the cookie, pass it in a special header
request_headers ['X-session_id'] = session ['session_id']

r = getattr (requests, request.method.lower ()) (url, headers=request_headers, data=request.data)

response = make_response (r.content)
for header in r.headers:
# With great help from https://medium.com/customorchestrator/simple-reverse-proxy-server-using-flask-936087ce0afb
if header.lower () in ['content-encoding', 'content-length', 'transfer-encoding', 'connection']:
continue
# Setting the session cookie returned by the test environment into the response won't work because it will be overwritten by Flask, so we need to read the cookie into the session so that then the session cookie can be updated by Flask
if header.lower () == 'set-cookie':
proxied_session = extract_session_from_cookie (r.headers [header], self.secret_key)
for key in proxied_session:
session [key] = proxied_session [key]
continue
response.headers [header] = r.headers [header]

return response, r.status_code


def redirect_ab (request):
# If this is a testing request, we return True
if utils.is_testing_request (request):
return True
# If the user is logged in, we use their username as identifier, otherwise we use the session id
user_identifier = current_user(request) ['username'] or str (session['session_id'])

# This will send either % PROXY_TO_TEST_PROPORTION of the requests into redirect, or 50% if that variable is not specified.
redirect_proportion = int (os.getenv ('PROXY_TO_TEST_PROPORTION', '50'))
redirect_flag = (hash_user_or_session (user_identifier) % 100) < redirect_proportion
return redirect_flag


def hash_user_or_session (string):
hash = hashlib.md5 (string.encode ('utf-8')).hexdigest ()
return int (hash, 16)


# Used by A/B testing to extract a session from a set-cookie header.
# The signature is ignored. The source of the session should be trusted.
def extract_session_from_cookie (cookie_header, secret_key):
parsed_cookie = SimpleCookie (cookie_header)
if not 'session' in parsed_cookie:
return {}

cookie_interface = flask.sessions.SecureCookieSessionInterface()

# This code matches what Flask does for encoding
serializer = itsdangerous.URLSafeTimedSerializer(
secret_key,
salt=cookie_interface.salt,
serializer=cookie_interface.serializer,
signer_kwargs=dict(
key_derivation=cookie_interface.key_derivation,
digest_method=cookie_interface.digest_method
))

try:
cookie_value = parsed_cookie['session'].value
return serializer.loads(cookie_value)
except itsdangerous.exc.BadSignature as e:
# If the signature is wrong, we can still decode the cookie.
# We try to do it properly with the actual key though, because exception
# handling is slightly slow in Python and doing it successfully is therefore
# faster.
if e.payload is not None:
try:
return serializer.load_payload(e.payload)
except itsdangerous.exc.BadData:
pass
return {}

122 changes: 35 additions & 87 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
import os
from os import path
import re
import requests
import traceback
import uuid
from ruamel import yaml
Expand All @@ -23,10 +22,9 @@
from auth import auth_templates, current_user, requires_login, is_admin, is_teacher
from utils import db_get, db_get_many, db_set, timems, type_check, object_check, db_del, load_yaml, load_yaml_rt, dump_yaml_rt
import utils
import hashlib

# app.py
from flask import Flask, request, jsonify, session, abort, g, redirect, make_response, Response
from flask import Flask, request, jsonify, session, abort, g, redirect, Response
from flask_helpers import render_template
from flask_compress import Compress

Expand All @@ -36,6 +34,8 @@
import translating
import querylog
import aws_helpers
import ab_proxying
import cdn

# Define and load all available language data
ALL_LANGUAGES = {
Expand Down Expand Up @@ -136,90 +136,26 @@ def load_adventure_assignments_per_level(lang, level):
utils.set_debug_mode_based_on_flask(app)


CDN_PREFIX = os.getenv('CDN_PREFIX', None)
STATIC_PREFIX = '/'
if CDN_PREFIX:
# If we are using a CDN, also host static resources under a URL that includes
# the version number (so the CDN can aggressively cache the static assets and we
# still can invalidate them whenever necessary).
#
# The function {{static('/js/bla.js')}} can be used to retrieve the URL of static
# assets, either from the CDN if configured or just the normal URL we would use
# without a CDN.
#
# We still keep on hosting static assets in the "old" location as well for images in
# emails and content we forgot to replace or are unable to replace (like in MarkDowns).
STATIC_PREFIX = '/static-' + os.getenv('HEROKU_SLUG_COMMIT', 'dev')
app.add_url_rule(STATIC_PREFIX + '/<path:filename>',
endpoint="static",
view_func=app.send_static_file)


@app.context_processor
def inject_static():
"""Add the 'static' function to the Template context, to return links to static resources."""
def static(url):
return utils.slash_join(CDN_PREFIX, STATIC_PREFIX, url)
return dict(static=static)


def hash_user_or_session (string):
hash = hashlib.md5 (string.encode ('utf-8')).hexdigest ()
return int (hash, 16)

def redirect_ab (request, session):
# If the user is logged in, we use their username as identifier, otherwise we use the session id
user_identifier = current_user(request) ['username'] or str (session_id ())

# This will send 50% of the requests into redirect.
redirect_proportion = 50
redirect_flag = (hash_user_or_session (user_identifier) % 100) < redirect_proportion
return redirect_flag

cdn.Cdn(app, os.getenv('CDN_PREFIX'), os.getenv('HEROKU_SLUG_COMMIT', 'dev'))

# Set session id if not already set. This must be done as one of the first things,
# so the function should be defined high up.
@app.before_request
def set_session_cookie():
session_id()

if os.getenv('IS_PRODUCTION'):
@app.before_request
def reject_e2e_requests():
if utils.is_testing_request (request):
return 'No E2E tests are allowed in production', 400

# If present, PROXY_TO_TEST_ENV should be the name of the target environment
if os.getenv ('PROXY_TO_TEST_ENV') and not os.getenv ('IS_TEST_ENV'):
@app.before_request
def before_request_proxy():
# If it is an auth route, we do not reverse proxy it to the PROXY_TO_TEST_ENV environment, with the exception of /auth/texts
# We want to keep all cookie setting in the main environment, not the test one.
if (re.match ('.*/auth/.*', request.url) and not re.match ('.*/auth/texts', request.url)):
pass
# If we enter this block, we will reverse proxy the request to the PROXY_TO_TEST_ENV environment.
elif (redirect_ab (request, session)):

print ('DEBUG TEST - REVERSE PROXYING REQUEST', request.method, request.url, session_id ())

url = request.url.replace (os.getenv ('HEROKU_APP_NAME'), os.getenv ('PROXY_TO_TEST_ENV'))

request_headers = {}
for header in request.headers:
if (header [0].lower () in ['host']):
continue
request_headers [header [0]] = header [1]

# Send the session_id to the test environment for logging purposes
request_headers ['x-session_id'] = session_id ()
r = getattr (requests, request.method.lower ()) (url, headers=request_headers, data=request.data)

response = make_response (r.content)
for header in r.headers:
# With great help from https://medium.com/customorchestrator/simple-reverse-proxy-server-using-flask-936087ce0afb
# We ignore the set-cookie header
if (header.lower () in ['content-encoding', 'content-length', 'transfer-encoding', 'connection', 'set-cookie']):
continue
response.headers [header] = r.headers [header]
return response, r.status_code

if os.getenv ('IS_TEST_ENV'):
@app.before_request
def before_request_receive_proxy():
print ('DEBUG TEST - RECEIVE PROXIED REQUEST', request.method, request.url, session_id ())
@app.before_request
def before_request_proxy_testing():
if utils.is_testing_request (request):
if os.getenv ('IS_TEST_ENV'):
session ['test_session'] = 'test'

# HTTP -> HTTPS redirect
# https://stackoverflow.com/questions/32237379/python-flask-redirect-to-https-from-http/32238093
Expand Down Expand Up @@ -271,6 +207,21 @@ def after_request_log_status(response):
def teardown_request_finish_logging(exc):
querylog.finish_global_log_record(exc)

# If present, PROXY_TO_TEST_HOST should be the 'http[s]://hostname[:port]' of the target environment
if os.getenv ('PROXY_TO_TEST_HOST') and not os.getenv ('IS_TEST_ENV'):
ab_proxying.ABProxying(app, os.getenv ('PROXY_TO_TEST_HOST'), app.config['SECRET_KEY'])

@app.route('/session_test', methods=['GET'])
def echo_session_vars_test():
if not utils.is_testing_request (request):
return 'This endpoint is only meant for E2E tests', 400
return jsonify({'session': dict(session)})

@app.route('/session_main', methods=['GET'])
def echo_session_vars_main():
if not utils.is_testing_request (request):
return 'This endpoint is only meant for E2E tests', 400
return jsonify({'session': dict(session), 'proxy_enabled': bool (os.getenv ('PROXY_TO_TEST_HOST'))})

@app.route('/parse', methods=['POST'])
def parse():
Expand Down Expand Up @@ -479,7 +430,6 @@ def adventure_page(adventure_name, level):
@app.route('/hedy/<level>', methods=['GET'], defaults={'step': 1})
@app.route('/hedy/<level>/<step>', methods=['GET'])
def index(level, step):
session_id() # Run this for the side effect of generating a session ID
try:
g.level = level = int(level)
except:
Expand Down Expand Up @@ -526,7 +476,6 @@ def index(level, step):
@app.route('/onlinemasters/<level>', methods=['GET'], defaults={'step': 1})
@app.route('/onlinemasters/<level>/<step>', methods=['GET'])
def onlinemasters(level, step):
session_id() # Run this for the side effect of generating a session ID
g.level = level = int(level)
g.lang = lang = requested_lang()
g.prefix = '/onlinemasters'
Expand All @@ -550,7 +499,6 @@ def onlinemasters(level, step):
@app.route('/space_eu/<level>', methods=['GET'], defaults={'step': 1})
@app.route('/space_eu/<level>/<step>', methods=['GET'])
def space_eu(level, step):
session_id() # Run this for the side effect of generating a session ID
g.level = level = int(level)
g.lang = requested_lang()
g.prefix = '/space_eu'
Expand Down Expand Up @@ -621,13 +569,13 @@ def main_page(page):

def session_id():
"""Returns or sets the current session ID."""
if os.getenv('IS_TEST_ENV') and 'x-session_id' in request.headers:
return request.headers['x-session_id']
if 'session_id' not in session:
session['session_id'] = uuid.uuid4().hex
if os.getenv ('IS_TEST_ENV') and 'X-session_id' in request.headers:
session['session_id'] = request.headers ['X-session_id']
else:
session['session_id'] = uuid.uuid4().hex
return session['session_id']


def requested_lang():
"""Return the user's requested language code.
Expand Down
52 changes: 52 additions & 0 deletions cdn.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
import utils

class Cdn:
"""Set up CDN configuration.
Add a global to the template (called `static()`) to return references to
static resources. All static resources should be referenced using this
function.
If CDN is not enabled:
static('hello.jpg') -> '/hello.jpg' (fetch from the current server)
If enabled (when a CDN prefix is given), add an additional
route with a unique name containing the server commit to return
static resources.
If CDN is enabled:
static('hello.jpg') -> 'https://1235.cdn.com/s-830s8a2fa/hello.jpg' (fetch from CDN)
Because the commit number is in the URL, it can be extremely aggressively
cached by the CDN.
"""
def __init__(self, app, cdn_prefix, commit):
self.cdn_prefix = cdn_prefix or ''
self.commit = commit
self.static_prefix = '/'

if self.cdn_prefix:
# If we are using a CDN, also host static resources under a URL that includes
# the version number (so the CDN can aggressively cache the static assets and we
# still can invalidate them whenever necessary).
#
# The function {{static('/js/bla.js')}} can be used to retrieve the URL of static
# assets, either from the CDN if configured or just the normal URL we would use
# without a CDN.
#
# We still keep on hosting static assets in the "old" location as well for images in
# emails and content we forgot to replace or are unable to replace (like in MarkDowns).
self.static_prefix = '/static-' + commit
app.add_url_rule(self.static_prefix + '/<path:filename>',
endpoint="static",
view_func=app.send_static_file)

app.add_template_global(self.static, name='static')

def static(self, url):
"""Return cacheable links to static resources."""
return utils.slash_join(self.cdn_prefix, self.static_prefix, url)


2 changes: 1 addition & 1 deletion config.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
dyno = os.getenv('DYNO')

config = {
'port': 5000,
'port': os.getenv ('PORT') or 5000,
'session': {
'cookie_name': 'hedy',
# in minutes
Expand Down
9 changes: 8 additions & 1 deletion CONFIGURATION.md → doc/CONFIGURATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ BASE_URL
A/B testing:

```
PROXY_TO_TEST_ENV
PROXY_TO_TEST_HOST
PROXY_TO_TEST_PROPORTION
IS_TEST_ENV
```

Expand All @@ -56,6 +57,12 @@ To determine if this is the production environment (to avoid requests from e2e t
IS_PRODUCTION
```

To set up the port of the app through an env variable (helpful to start multiple instances of the app locally

```
PORT
```

## Heroku Metadata

This app depends on some environment variables that require Heroku dyno metadata.
Expand Down
Loading

0 comments on commit b048fec

Please sign in to comment.