Skip to content

Commit

Permalink
Added code for linear regression and augmented dicky fuller test.
Browse files Browse the repository at this point in the history
  • Loading branch information
IanLKaplan committed Aug 19, 2022
1 parent 152d401 commit ff03394
Show file tree
Hide file tree
Showing 504 changed files with 592 additions and 29 deletions.
118 changes: 89 additions & 29 deletions pairs_trading.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from statsmodels.compat import scipy
from tabulate import tabulate
from scipy import stats
import statsmodels.api as sm
from statsmodels.tsa.stattools import adfuller

s_and_p_file = 's_and_p_sector_components/sp_stocks.csv'
s_and_p_data = 's_and_p_data'
Expand Down Expand Up @@ -252,26 +254,56 @@ def display_histogram(data_v: np.array, x_label: str, y_label: str) -> None:


pairs_list = get_pairs(sectors)


# yearly_cor_a = calc_yearly_correlation(close_prices_df, pairs_list)


# display_histogram(yearly_cor_a, 'Correlation between pairs', 'Count')

class PairStats:

def __init__(self, stock_a: str,
stock_b: str,
slope: float,
intercept: float,
residuals: pd.Series,
adf_stat: float,
p_value: float,
one_percent: float,
five_percent: float):
self.stock_a = stock_a
self.stock_b = stock_b
self.slope = slope
self.intercept = intercept
self.residuals = residuals
self.adf_stat = adf_stat
self.p_value = p_value
self.one_percent = one_percent
self.five_percent = five_percent

def __str__(self):
s: str = f'({self.stock_a},{self.stock_b}) slope: {self.slope} intercept: {self.intercept} \nadf: {self.adf_stat} p-value: {self.p_value} 5%: {self.five_percent} 1%: {self.one_percent}'
return s


class PairsSelection:
decimals = 4

def __init__(self, correlation_cutoff: float):
def __init__(self,
close_prices: pd.DataFrame,
correlation_cutoff: float):
self.correlation_cutoff = correlation_cutoff
self.close_prices = close_prices

def pairs_yearly_correlation(self,
close_prices: pd.DataFrame,
start_ix: int,
end_ix: int,
pairs_list: List[Tuple],
cutoff: float) -> List[Tuple]:
def pairs_correlation(self,
start_ix: int,
end_ix: int,
pairs_list: List[Tuple]) -> List[Tuple]:

"""
Find the pairs with a log(price) correlation greater than or equal to cutoff
Find the pairs with a log(price) correlation greater than or equal to cutoff within a close price window
from start_ix to end_ix
:param stock_close_df: the stock close prices for the entire backtest period
:param start_ix: the start index in stock_close_df
:param end_ix: the end index in stock_close_df
Expand All @@ -283,39 +315,67 @@ def pairs_yearly_correlation(self,
for pair in pairs_list:
stock_a: str = pair[0]
stock_b: str = pair[1]
log_close_a = log(close_prices[stock_a][start_ix:end_ix+1])
log_close_b = log(close_prices[stock_b][start_ix:end_ix+1])
log_close_a = log(close_prices[stock_a][start_ix:end_ix + 1])
log_close_b = log(close_prices[stock_b][start_ix:end_ix + 1])
c = np.corrcoef(log_close_a, log_close_b)
cor_v = round(c[0, 1], 2)
if cor_v >= cutoff:
if cor_v >= self.correlation_cutoff:
sector = pair[2]
selected_pairs_l.append((stock_a, stock_b, sector, cor_v))
return selected_pairs_l

def regression(self, close_prices: pd.DataFrame, start_ix: int, end_ix: int, pair: Tuple):
def predict(intercept: float, slope: float, X: pd.Series) -> pd.Series:
y_hat = intercept + (slope * X)
return y_hat

# https://www.reneshbedre.com/blog/learn-to-calculate-residuals-regression.html
def stationary_analysis(self, start_ix: int, end_ix: int, pair: Tuple) -> PairStats:
stock_a: str = pair[0]
stock_b: str = pair[1]
log_close_a = log(close_prices[stock_a][start_ix:end_ix])
log_close_b = log(close_prices[stock_b][start_ix:end_ix])
slope, intercept, r_value, p_value, std_err = stats.linregress(log_close_a, log_close_b)
close_b_hat = predict(intercept=intercept, slope=slope, X=log_close_a)
res = log_close_b - close_b_hat
pass




log_close_b_const = sm.add_constant(log_close_b)
result_ab = sm.OLS(log_close_a, log_close_b_const).fit()
log_close_a_const = sm.add_constant(log_close_a)
result_ba = sm.OLS(log_close_b, log_close_a_const).fit()
slope_ab = result_ab.params[stock_b]
slope_ba = result_ba.params[stock_a]
result = result_ab
slope = slope_ab
if slope_ab < slope_ba:
result = result_ba
slope = slope_ba
intercept = round(result.params['const'], self.decimals)
slope = round(slope, self.decimals)
residuals = result.resid
# References
# https://machinelearningmastery.com/time-series-data-stationary-python/
# https://www.quantstart.com/articles/Basics-of-Statistical-Mean-Reversion-Testing-Part-II/
# p-value <= 0.05 stationary mean reverting TS
# ADF more negative means a stronger mean reverting process
adf_result = adfuller(residuals)
adf_stat = round(adf_result[0], self.decimals)
p_value = round(adf_result[1], self.decimals)
d = adf_result[4]
one_percent = round(d['1%'], self.decimals)
five_percent = round(d['5%'], self.decimals)
pair_stats = PairStats(stock_a=stock_a,
stock_b=stock_b,
slope=slope,
intercept=intercept,
residuals=residuals,
adf_stat=adf_stat,
p_value=p_value,
one_percent=one_percent,
five_percent=five_percent)
return pair_stats

def find_pairs(self, start_ix: int, end_ix: int, pairs_list=pairs_list ) -> List[PairStats]:
selected_pairs = self.pairs_correlation(close_prices=close_prices, start_ix=start_ix, end_ix=end_ix, pairs_list=pairs_list)
pair_stat_l: List[PairStats] = list()
for pair in selected_pairs:
stats = self.stationary_analysis(close_prices=close_prices, start_ix=start_ix, end_ix=end_ix, pair=pair)
pair_stat_l.append(stats)
return pair_stat_l


correlation_cutoff = 0.75
pairs_selection = PairsSelection(correlation_cutoff)

test_pair = ('WAT', 'XRAY', 'health-care', 0.9)
pairs_selection.regression(close_prices=close_prices_df, start_ix=0, end_ix=trading_days, pair=test_pair)
pairs_selection = PairsSelection(close_prices=close_prices_df, correlation_cutoff=correlation_cutoff)
stats_l = pairs_selection.find_pairs(cstart_ix=0, end_ix=trading_days, pairs_list=pairs_list)

pass
1 change: 1 addition & 0 deletions s_and_p_data/A.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,133.9
2022-08-16,132.77
2022-08-17,142.29
2022-08-18,139.97
1 change: 1 addition & 0 deletions s_and_p_data/AAL.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,15.33
2022-08-16,15.5
2022-08-17,14.96
2022-08-18,14.84
1 change: 1 addition & 0 deletions s_and_p_data/AAP.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,205.33
2022-08-16,209.6
2022-08-17,209.98
2022-08-18,207.49
1 change: 1 addition & 0 deletions s_and_p_data/AAPL.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,173.19
2022-08-16,173.03
2022-08-17,174.55
2022-08-18,174.15
1 change: 1 addition & 0 deletions s_and_p_data/ABBV.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2423,3 +2423,4 @@ Date,Close
2022-08-15,142.29
2022-08-16,142.55
2022-08-17,141.44
2022-08-18,141.29
1 change: 1 addition & 0 deletions s_and_p_data/ABC.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,150.82
2022-08-16,152.66
2022-08-17,149.53
2022-08-18,149.91
1 change: 1 addition & 0 deletions s_and_p_data/ABMD.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,295.06
2022-08-16,291.12
2022-08-17,281.82
2022-08-18,278.18
1 change: 1 addition & 0 deletions s_and_p_data/ABT.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,111.9
2022-08-16,112.03
2022-08-17,110.55
2022-08-18,109.96
1 change: 1 addition & 0 deletions s_and_p_data/ACN.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,320.33
2022-08-16,320.78
2022-08-17,318.45
2022-08-18,319.46
1 change: 1 addition & 0 deletions s_and_p_data/ADBE.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,451.02
2022-08-16,447.56
2022-08-17,437.82
2022-08-18,439.03
1 change: 1 addition & 0 deletions s_and_p_data/ADI.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,179.89
2022-08-16,179.04
2022-08-17,170.13
2022-08-18,170.13
1 change: 1 addition & 0 deletions s_and_p_data/ADM.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,86.13
2022-08-16,
2022-08-17,86.35
2022-08-18,87.62
1 change: 1 addition & 0 deletions s_and_p_data/ADP.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,259.71
2022-08-16,260.04
2022-08-17,260.69
2022-08-18,260.94
1 change: 1 addition & 0 deletions s_and_p_data/ADSK.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,234.04
2022-08-16,233.64
2022-08-17,228.2
2022-08-18,228.86
1 change: 1 addition & 0 deletions s_and_p_data/AEE.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,95.99
2022-08-16,96.02
2022-08-17,96.34
2022-08-18,96.7
1 change: 1 addition & 0 deletions s_and_p_data/AEP.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,104.56
2022-08-16,104.46
2022-08-17,104.33
2022-08-18,104.37
1 change: 1 addition & 0 deletions s_and_p_data/AES.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,25.18
2022-08-16,25.06
2022-08-17,25.01
2022-08-18,26.02
1 change: 1 addition & 0 deletions s_and_p_data/AFL.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,64.07
2022-08-16,63.99
2022-08-17,63.9
2022-08-18,64.55
1 change: 1 addition & 0 deletions s_and_p_data/AIG.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,56.76
2022-08-16,57.29
2022-08-17,56.69
2022-08-18,57.33
1 change: 1 addition & 0 deletions s_and_p_data/AIZ.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,167.26
2022-08-16,170.23
2022-08-17,171.54
2022-08-18,171.78
1 change: 1 addition & 0 deletions s_and_p_data/AJG.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,190.15
2022-08-16,190.66
2022-08-17,190.85
2022-08-18,191.52
1 change: 1 addition & 0 deletions s_and_p_data/AKAM.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,97.08
2022-08-16,98.08
2022-08-17,95.72
2022-08-18,96.8
1 change: 1 addition & 0 deletions s_and_p_data/ALB.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,284.08
2022-08-16,280.81
2022-08-17,277.62
2022-08-18,279.51
1 change: 1 addition & 0 deletions s_and_p_data/ALGN.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,286.19
2022-08-16,289.04
2022-08-17,277.66
2022-08-18,281.66
1 change: 1 addition & 0 deletions s_and_p_data/ALK.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,48.45
2022-08-16,48.24
2022-08-17,46.64
2022-08-18,46.69
1 change: 1 addition & 0 deletions s_and_p_data/ALL.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,127.76
2022-08-16,129.73
2022-08-17,132.04
2022-08-18,133.02
1 change: 1 addition & 0 deletions s_and_p_data/ALLE.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2201,3 +2201,4 @@ Date,Close
2022-08-15,107.2
2022-08-16,108.46
2022-08-17,108.57
2022-08-18,109.11
1 change: 1 addition & 0 deletions s_and_p_data/AMAT.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,109.78
2022-08-16,108.49
2022-08-17,106.0
2022-08-18,108.27
1 change: 1 addition & 0 deletions s_and_p_data/AMCR.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2581,3 +2581,4 @@ Date,Close
2022-08-15,13.03
2022-08-16,13.29
2022-08-17,13.01
2022-08-18,12.72
1 change: 1 addition & 0 deletions s_and_p_data/AMD.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,101.01
2022-08-16,100.2
2022-08-17,98.27
2022-08-18,100.44
1 change: 1 addition & 0 deletions s_and_p_data/AME.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,129.55
2022-08-16,130.31
2022-08-17,129.64
2022-08-18,130.13
1 change: 1 addition & 0 deletions s_and_p_data/AMGN.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,251.08
2022-08-16,253.15
2022-08-17,
2022-08-18,249.7
1 change: 1 addition & 0 deletions s_and_p_data/AMP.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,286.34
2022-08-16,289.06
2022-08-17,288.15
2022-08-18,291.83
1 change: 1 addition & 0 deletions s_and_p_data/AMT.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,281.52
2022-08-16,278.45
2022-08-17,277.46
2022-08-18,275.49
1 change: 1 addition & 0 deletions s_and_p_data/AMZN.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,143.18
2022-08-16,144.78
2022-08-17,142.1
2022-08-18,142.3
1 change: 1 addition & 0 deletions s_and_p_data/ANET.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2064,3 +2064,4 @@ Date,Close
2022-08-15,128.74
2022-08-16,129.91
2022-08-17,129.46
2022-08-18,132.74
1 change: 1 addition & 0 deletions s_and_p_data/ANSS.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,286.36
2022-08-16,283.72
2022-08-17,279.99
2022-08-18,279.88
1 change: 1 addition & 0 deletions s_and_p_data/AON.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,300.68
2022-08-16,301.0
2022-08-17,300.4
2022-08-18,302.57
1 change: 1 addition & 0 deletions s_and_p_data/AOS.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,64.08
2022-08-16,64.33
2022-08-17,64.21
2022-08-18,64.11
1 change: 1 addition & 0 deletions s_and_p_data/APA.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,33.57
2022-08-16,33.22
2022-08-17,33.63
2022-08-18,36.5
1 change: 1 addition & 0 deletions s_and_p_data/APD.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,274.11
2022-08-16,273.54
2022-08-17,268.0
2022-08-18,267.6
1 change: 1 addition & 0 deletions s_and_p_data/APH.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,79.63
2022-08-16,79.57
2022-08-17,79.64
2022-08-18,80.14
1 change: 1 addition & 0 deletions s_and_p_data/APTV.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2703,3 +2703,4 @@ Date,Close
2022-08-15,108.79
2022-08-16,108.07
2022-08-17,106.1
2022-08-18,106.75
1 change: 1 addition & 0 deletions s_and_p_data/ARE.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,170.04
2022-08-16,171.58
2022-08-17,170.73
2022-08-18,165.86
1 change: 1 addition & 0 deletions s_and_p_data/ATO.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,117.39
2022-08-16,117.58
2022-08-17,117.66
2022-08-18,118.08
1 change: 1 addition & 0 deletions s_and_p_data/ATVI.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,80.73
2022-08-16,80.92
2022-08-17,80.53
2022-08-18,80.44
1 change: 1 addition & 0 deletions s_and_p_data/AVB.csv
Original file line number Diff line number Diff line change
Expand Up @@ -3933,3 +3933,4 @@ Date,Close
2022-08-15,218.38
2022-08-16,219.24
2022-08-17,220.22
2022-08-18,216.52
Loading

0 comments on commit ff03394

Please sign in to comment.