-
Notifications
You must be signed in to change notification settings - Fork 6.4k
/
Copy pathex_chisq.py
42 lines (33 loc) · 1.15 KB
/
ex_chisq.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# From the course: Bayesin Machine Learning in Python: A/B Testing
# https://deeplearningcourses.com/c/bayesian-machine-learning-in-python-ab-testing
# https://www.udemy.com/bayesian-machine-learning-in-python-ab-testing
from __future__ import print_function, division
from builtins import range
# Note: you may need to update your version of future
# sudo pip install -U future
import numpy as np
import pandas as pd
from scipy.stats import chi2, chi2_contingency
# contingency table
# click no click
#------------------------------
# ad A | a b
# ad B | c d
def get_p_value(T):
# same as scipy.stats.chi2_contingency(T, correction=False)
det = T[0,0]*T[1,1] - T[0,1]*T[1,0]
c2 = float(det) / T[0].sum() * det / T[1].sum() * T.sum() / T[:,0].sum() / T[:,1].sum()
p = 1 - chi2.cdf(x=c2, df=1)
return p
# get data
df = pd.read_csv('advertisement_clicks.csv')
a = df[df['advertisement_id'] == 'A']
b = df[df['advertisement_id'] == 'B']
a = a['action']
b = b['action']
A_clk = a.sum()
A_noclk = a.size - a.sum()
B_clk = b.sum()
B_noclk = b.size - b.sum()
T = np.array([[A_clk, A_noclk], [B_clk, B_noclk]])
print(get_p_value(T))