add process discriminator

imanolperez · Apr 12, 2019 · 78055c8 · 78055c8
1 parent 5c5f748
commit 78055c8
Show file tree

Hide file tree

Showing 9 changed files with 498 additions and 2 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+    "python.pythonPath": "/usr/bin/python3"
+}
diff --git a/README.md b/README.md
@@ -1,2 +1,2 @@
-# market_simulator
-Market simulator
+# Market simulator
+Project with Blanka.
diff --git a/notebooks/.ipynb_checkpoints/process_discriminator.py-checkpoint.ipynb b/notebooks/.ipynb_checkpoints/process_discriminator.py-checkpoint.ipynb
diff --git a/notebooks/__pycache__/base.cpython-36.pyc b/notebooks/__pycache__/base.cpython-36.pyc
diff --git a/notebooks/base.py b/notebooks/base.py
@@ -0,0 +1,3 @@
+import sys
+
+sys.path.insert(0, "../src")
diff --git a/notebooks/process_discriminator.py.ipynb b/notebooks/process_discriminator.py.ipynb
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/__pycache__/process_discriminator.cpython-36.pyc b/src/__pycache__/process_discriminator.cpython-36.pyc
diff --git a/src/process_discriminator.py b/src/process_discriminator.py
@@ -0,0 +1,116 @@
+import numpy as np
+from esig import tosig
+from tqdm import tqdm
+from scipy.optimize import brentq
+from joblib import Parallel, delayed
+import ast
+
+def psi(x, M=4, a=1):
+    x = x ** 2
+
+    if x <= M:
+        return x
+
+    return M + M ** (1 + a) * (M ** (-a)) / a
+
+def norm(x):
+    return np.linalg.norm(x)
+
+def phi(x, order):
+    x = np.array(x)
+
+    a = x ** 2
+    a[0] -= psi(norm(x))
+
+    f = lambda x: np.dot(a, [x ** (2 * m) for m in range(len(a))])
+
+    return brentq(f, 0, 5)
+
+def get_keys(dim, order):
+    s = tosig.sigkeys(dim, order)
+
+    tuples = []
+
+    for t in s.split():
+        if len(t) > 2:
+            t = t.replace(")", ",)")
+
+        tuples.append(ast.literal_eval(t))
+
+    return tuples
+
+def Phi(X, order, normalise=True):
+    sig = tosig.stream2sig(np.array(X), order)
+
+    if not normalise:
+        return sig
+
+    keys = get_keys(np.shape(X)[1], order)
+
+    phi_x = phi(tuple(sig), order)
+
+    Lambda = np.array([phi_x ** len(t) for t in keys])
+
+    return Lambda * sig
+
+def k(X, Y):
+    return np.dot(X, Y)
+
+
+def T(set1, set2, order, verbose=True, normalise=True):
+    m = len(set1)
+    n = len(set2)
+
+    X = Parallel(n_jobs=-1)(delayed(Phi)(path, order, normalise) for path in tqdm(set1, desc="Computing signatures of population 1", disable=(not verbose)))
+    Y = Parallel(n_jobs=-1)(delayed(Phi)(path, order, normalise) for path in tqdm(set2, desc="Computing signatures of population 2", disable=(not verbose)))
+
+    XX = np.dot(X, np.transpose(X))
+    YY = np.dot(Y, np.transpose(Y))
+    XY = np.dot(X, np.transpose(Y))
+
+    TU = 0.
+    TU += XX.sum() / (m * m)
+    TU += YY.sum() / (n * n)
+    TU -= 2 * XY.sum() / (m * n)
+
+
+    return TU
+
+def c_alpha(m, alpha):
+    K = 1.
+    return (2 * K / m) * (1 + np.sqrt(-2 * np.log(alpha))) ** 2
+
+def test(set1, set2, order, confidence_level=0.99):
+    """Statistical test to determine if two sets of paths come
+    from the same distribution.
+
+    Parameters
+    ----------
+    set1 : list of array
+        Set of paths.
+    set2 : list of array
+        Set of paths.
+    order : int
+        Signature order.
+    confidence_level : float, optional
+        Confidence level of the statistical test. Must be in [0, 1].
+        Default is 0.99, i.e. 99%.
+
+    Returns
+    -------
+    bool
+        True if the hypothesis is rejected and hence the sets come from
+        different distributions, False otherwise.
+
+    """
+
+    assert len(set1) == len(set2), "Same size samples accepted for now."
+
+    assert confidence_level >= 0. and confidence_level <= 1., "Confidence level must be in [0, 1]."
+
+    m = len(set1)
+
+    TU = T(set1, set2, order)
+    c = c_alpha(m, confidence_level)
+
+    return TU > c