Skip to content

Commit

Permalink
Add notebook for HRP basic and advanced
Browse files Browse the repository at this point in the history
  • Loading branch information
WanABRO committed Apr 21, 2020
1 parent 4a42d6a commit f2775de
Show file tree
Hide file tree
Showing 12 changed files with 4,525 additions and 0 deletions.
699 changes: 699 additions & 0 deletions HRP/.ipynb_checkpoints/HRP use example-checkpoint.ipynb

Large diffs are not rendered by default.

699 changes: 699 additions & 0 deletions HRP/HRP use example.ipynb

Large diffs are not rendered by default.

13 changes: 13 additions & 0 deletions HRP/mlfinlab/portfolio_optimization/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
"""
Classes derived from Portfolio Optimisation module
"""

from mlfinlab.portfolio_optimization.cla import CLA
from mlfinlab.portfolio_optimization.hrp import HierarchicalRiskParity
from mlfinlab.portfolio_optimization.mean_variance import MeanVarianceOptimisation
from mlfinlab.portfolio_optimization.hcaa import HierarchicalClusteringAssetAllocation
from mlfinlab.portfolio_optimization.risk_metrics import RiskMetrics
from mlfinlab.portfolio_optimization.returns_estimators import ReturnsEstimation
from mlfinlab.portfolio_optimization.nco import NCO
from mlfinlab.portfolio_optimization.risk_estimators import RiskEstimators
from mlfinlab.portfolio_optimization.tic import TIC
638 changes: 638 additions & 0 deletions HRP/mlfinlab/portfolio_optimization/cla.py

Large diffs are not rendered by default.

442 changes: 442 additions & 0 deletions HRP/mlfinlab/portfolio_optimization/hcaa.py

Large diffs are not rendered by default.

268 changes: 268 additions & 0 deletions HRP/mlfinlab/portfolio_optimization/hrp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,268 @@
# pylint: disable=missing-module-docstring
import numpy as np
import pandas as pd
from scipy.cluster.hierarchy import dendrogram, linkage
from scipy.spatial.distance import squareform
from sklearn.covariance import OAS
from mlfinlab.portfolio_optimization.returns_estimators import ReturnsEstimation
from mlfinlab.portfolio_optimization.risk_metrics import RiskMetrics


class HierarchicalRiskParity:
"""
This class implements the Hierarchical Risk Parity algorithm mentioned in the following paper: `López de Prado, Marcos,
Building Diversified Portfolios that Outperform Out-of-Sample (May 23, 2016). Journal of Portfolio Management,
2016 <https://papers.ssrn.com/sol3/papers.cfm?abstract_id=2708678>`_; The code is reproduced with modification from his book:
Advances in Financial Machine Learning, Chp-16
By removing exact analytical approach to the calculation of weights and instead relying on an approximate
machine learning based approach (hierarchical tree-clustering), Hierarchical Risk Parity produces weights which are stable to
random shocks in the stock-market. Moreover, previous algorithms like CLA involve the inversion of covariance matrix which is
a highly unstable operation and tends to have major impacts on the performance due to slight changes in the covariance matrix.
By removing dependence on the inversion of covariance matrix completely, the Hierarchical Risk Parity algorithm is fast,
robust and flexible.
"""

def __init__(self):
self.weights = list()
self.seriated_distances = None
self.ordered_indices = None
self.clusters = None
self.returns_estimator = ReturnsEstimation()
self.risk_metrics = RiskMetrics()

def allocate(self,
asset_names=None,
asset_prices=None,
asset_returns=None,
covariance_matrix=None,
distance_matrix=None,
side_weights=None,
linkage='single',
resample_by=None,
use_shrinkage=False):
# pylint: disable=invalid-name, too-many-branches
"""
Calculate asset allocations using HRP algorithm.
:param asset_names: (list) a list of strings containing the asset names
:param asset_prices: (pd.Dataframe) a dataframe of historical asset prices (daily close)
indexed by date
:param asset_returns: (pd.Dataframe/numpy matrix) user supplied matrix of asset returns
:param covariance_matrix: (pd.Dataframe/numpy matrix) user supplied covariance matrix of asset returns
:param distance_matrix: (pd.Dataframe/numpy matrix) user supplied distance matrix
:param side_weights: (pd.Series/numpy matrix) with asset_names in index and value 1 for Buy, -1 for Sell
(default 1 for all)
:param linkage: (string) type of linkage used for Hierarchical Clustering ex: single, average, complete...
:param resample_by: (str) specifies how to resample the prices - weekly, daily, monthly etc.. Defaults to
None for no resampling
:param use_shrinkage: (Boolean) specifies whether to shrink the covariances
"""

if asset_prices is None and asset_returns is None and covariance_matrix is None:
raise ValueError(
"You need to supply either raw prices or returns or a covariance matrix of asset returns")

if asset_prices is not None:
if not isinstance(asset_prices, pd.DataFrame):
raise ValueError("Asset prices matrix must be a dataframe")
if not isinstance(asset_prices.index, pd.DatetimeIndex):
raise ValueError("Asset prices dataframe must be indexed by date.")

if asset_names is None:
if asset_prices is not None:
asset_names = asset_prices.columns
elif asset_returns is not None and isinstance(asset_returns, pd.DataFrame):
asset_names = asset_returns.columns
else:
raise ValueError("Please provide a list of asset names")

# Calculate the returns if the user does not supply a returns dataframe
if asset_returns is None and covariance_matrix is None:
asset_returns = self.returns_estimator.calculate_returns(asset_prices=asset_prices, resample_by=resample_by)
asset_returns = pd.DataFrame(asset_returns, columns=asset_names)

# Calculate covariance of returns or use the user specified covariance matrix
if covariance_matrix is None:
if use_shrinkage:
covariance_matrix = self._shrink_covariance(asset_returns=asset_returns)
else:
covariance_matrix = asset_returns.cov()
covariance_matrix = pd.DataFrame(covariance_matrix, index=asset_names, columns=asset_names)

# Calculate correlation and distance from covariance matrix
if distance_matrix is None:
correlation_matrix = self._cov2corr(covariance=covariance_matrix)
distance_matrix = np.sqrt((1 - correlation_matrix).round(5) / 2)
distance_matrix = pd.DataFrame(distance_matrix, index=asset_names, columns=asset_names)

# Step-1: Tree Clustering
self.clusters = self._tree_clustering(distance=distance_matrix, method=linkage)

# Step-2: Quasi Diagnalization
num_assets = len(asset_names)
self.ordered_indices = self._quasi_diagnalization(num_assets, 2 * num_assets - 2)
self.seriated_distances = self._get_seriated_matrix(assets=asset_names, distance=distance_matrix)

if side_weights is None:
side_weights = pd.Series([1] * num_assets, index=asset_names)
side_weights = pd.Series(side_weights, index=asset_names)

# Step-3: Recursive Bisection
self._recursive_bisection(covariance=covariance_matrix, assets=asset_names, side_weights=side_weights)

@staticmethod
def _tree_clustering(distance, method='single'):
"""
Perform the traditional heirarchical tree clustering.
:param correlation: (np.array) correlation matrix of the assets
:param method: (str) the type of clustering to be done
:return: distance matrix and clusters
"""
clusters = linkage(squareform(distance.values), method=method)
return clusters

def _quasi_diagnalization(self, num_assets, curr_index):
"""
Rearrange the assets to reorder them according to hierarchical tree clustering order.
:param num_assets: (int) the total number of assets
:param curr_index: (int) current index
:return: (list) the assets rearranged according to hierarchical clustering
"""

if curr_index < num_assets:
return [curr_index]

left = int(self.clusters[curr_index - num_assets, 0])
right = int(self.clusters[curr_index - num_assets, 1])

return (self._quasi_diagnalization(num_assets, left) + self._quasi_diagnalization(num_assets, right))

def _get_seriated_matrix(self, assets, distance):
"""
Based on the quasi-diagnalization, reorder the original distance matrix, so that assets within
the same cluster are grouped together.
:param assets: (list) list of asset names in the portfolio
:param distance: (pd.Dataframe) distance values between asset returns
:return: (np.array) re-arranged distance matrix based on tree clusters
"""

ordering = assets[self.ordered_indices]
seriated_distances = distance.loc[ordering, ordering]
return seriated_distances

@staticmethod
def _get_inverse_variance_weights(covariance):
"""
Calculate the inverse variance weight allocations.
:param covariance: (pd.Dataframe) covariance matrix of assets
:return: (list) inverse variance weight values
"""

inv_diag = 1 / np.diag(covariance.values)
parity_w = inv_diag * (1 / np.sum(inv_diag))
return parity_w

def _get_cluster_variance(self, covariance, cluster_indices):
"""
Calculate cluster variance.
:param covariance: (pd.Dataframe) covariance matrix of assets
:param cluster_indices: (list) list of asset indices for the cluster
:return: (float) variance of the cluster
"""

cluster_covariance = covariance.iloc[cluster_indices, cluster_indices]
parity_w = self._get_inverse_variance_weights(cluster_covariance)
cluster_variance = self.risk_metrics.calculate_variance(covariance=cluster_covariance, weights=parity_w)
return cluster_variance

def _recursive_bisection(self, covariance, assets, side_weights):
"""
Recursively assign weights to the clusters - ultimately assigning weights to the inidividual assets.
:param covariance: (pd.Dataframe) the covariance matrix
:param assets: (list) list of asset names in the portfolio
"""
self.weights = pd.Series(1, index=self.ordered_indices)
clustered_alphas = [self.ordered_indices]

while clustered_alphas:
clustered_alphas = [cluster[start:end]
for cluster in clustered_alphas
for start, end in ((0, len(cluster) // 2), (len(cluster) // 2, len(cluster)))
if len(cluster) > 1]

for subcluster in range(0, len(clustered_alphas), 2):
left_cluster = clustered_alphas[subcluster]
right_cluster = clustered_alphas[subcluster + 1]

# Get left and right cluster variances and calculate allocation factor
left_cluster_variance = self._get_cluster_variance(covariance, left_cluster)
right_cluster_variance = self._get_cluster_variance(covariance, right_cluster)
alloc_factor = 1 - left_cluster_variance / (left_cluster_variance + right_cluster_variance)

# Assign weights to each sub-cluster
self.weights[left_cluster] *= alloc_factor
self.weights[right_cluster] *= 1 - alloc_factor

# Assign actual asset values to weight index
self.weights.index = assets[self.ordered_indices]
self.weights = pd.DataFrame(self.weights)

# Build Long/Short portfolio if needed
short_ptf = side_weights[side_weights == -1].index
buy_ptf = side_weights[side_weights == 1].index
if not short_ptf.empty:
# Short half size
self.weights.loc[short_ptf] /= self.weights.loc[short_ptf].sum().item()
self.weights.loc[short_ptf] *= -0.5
# Buy other half
self.weights.loc[buy_ptf] /= self.weights.loc[buy_ptf].sum().item()
self.weights.loc[buy_ptf] *= 0.5
self.weights = self.weights.T

def plot_clusters(self, assets):
"""
Plot a dendrogram of the hierarchical clusters.
:param assets: (list) list of asset names in the portfolio
"""

dendrogram_plot = dendrogram(self.clusters, labels=assets)
return dendrogram_plot

@staticmethod
def _shrink_covariance(asset_returns):
"""
Regularise/Shrink the asset covariances.
:param asset_returns: (pd.Dataframe) asset returns
:return: (pd.Dataframe) shrinked asset returns covariances
"""

oas = OAS()
oas.fit(asset_returns)
shrinked_covariance = oas.covariance_
return shrinked_covariance

@staticmethod
def _cov2corr(covariance):
"""
Calculate the correlations from asset returns covariance matrix.
:param covariance: (pd.Dataframe) asset returns covariances
:return: (pd.Dataframe) correlations between asset returns
"""

d_matrix = np.zeros_like(covariance)
diagnoal_sqrt = np.sqrt(np.diag(covariance))
np.fill_diagonal(d_matrix, diagnoal_sqrt)
d_inv = np.linalg.inv(d_matrix)
corr = np.dot(np.dot(d_inv, covariance), d_inv)
corr = pd.DataFrame(corr, index=covariance.columns, columns=covariance.columns)
return corr
Loading

0 comments on commit f2775de

Please sign in to comment.