Skip to content

Commit

Permalink
Finalizing the Jupyter notebook for demonstrating CSSR-via-transCSSR.
Browse files Browse the repository at this point in the history
  • Loading branch information
David Darmon authored and David Darmon committed Apr 23, 2018

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature.
1 parent 64e61df commit 4713abd
Showing 10 changed files with 4,061 additions and 283 deletions.
1,991 changes: 1,850 additions & 141 deletions .ipynb_checkpoints/demo_CSSR-checkpoint.ipynb

Large diffs are not rendered by default.

9 changes: 4 additions & 5 deletions compute_mixed_matrix.py
Original file line number Diff line number Diff line change
@@ -3,16 +3,14 @@
import numpy
import scipy

import ipdb

import matplotlib.pyplot as plt

# machine_fname = 'transCSSR_results/+even-exact.dot'
# machine_fname = 'transCSSR_results/+golden-mean.dot'
# machine_fname = 'transCSSR_results/+barnettX.dot'
# machine_fname = 'transCSSR_results/+RnC.dot'
# machine_fname = 'transCSSR_results/+RIP-exact.dot'
machine_fname = 'transCSSR_results/+RIP.dot'
machine_fname = 'transCSSR_results/+RIP-exact.dot'
# machine_fname = 'transCSSR_results/+RIP.dot'
# machine_fname = 'transCSSR_results/+complex-csm.dot'
# machine_fname = 'transCSSR_results/+renewal-process.dot'

@@ -30,9 +28,10 @@ def Hp(p):
return -numpy.sum(x*numpy.log2(x))

p = 0.5
q = 0.5

Hp(1/(2 - p)) - Hp(p)/(2 - p) # E for Golden Mean process

numpy.log2(p + 2) - p*numpy.log2(p)/(p + 2) - (1 - p*(1-p))/(p+2)*Hp((1 - p)/(1 - p*(1 - p))) # E for RIP
numpy.log2(p + 2) - p*numpy.log2(p)/(p + 2) - (1 - p*q)/(p+2)*Hp((1 - p)/(1 - p*q)) # E for RIP

plt.show()
1,901 changes: 1,832 additions & 69 deletions demo_CSSR.ipynb

Large diffs are not rendered by default.

80 changes: 72 additions & 8 deletions transCSSR_bc.py
Original file line number Diff line number Diff line change
@@ -413,7 +413,7 @@ def draw_dot(fname, epsilon, invepsilon, morph_by_state, axs, ays, L_max):

wfile.write('{} -> {} [label = \"{}|{}:{:.3}\"];\n'.format(numeric_to_alpha(printing_lookup[state]), numeric_to_alpha(printing_lookup[to_state]), ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]]))
wfile.write('}')
def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays, L_max):
def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays, L_max, all_digits = False):
"""
This function draws the .dot file associated with the
epsilon-transducer stored in epsilon+invepsilon.
@@ -524,9 +524,10 @@ def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays,

exists_transition[(state, to_state)] = True

W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])

# wfile.write('{} -> {} [label = \"({}, {})\"];\n'.format(numeric_to_alpha(printing_lookup[state]), numeric_to_alpha(printing_lookup[to_state]), ax, ay))
if all_digits:
W[(state, to_state)] += '{}|{}:{}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
else:
W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
else:
pass
else:
@@ -544,7 +545,10 @@ def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays,

exists_transition[(state, to_state)] = True

W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
if all_digits:
W[(state, to_state)] += '{}|{}:{:}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
else:
W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])

for from_state in invepsilon.keys():
for to_state in invepsilon.keys():
@@ -1203,7 +1207,7 @@ def estimate_predictive_distributions(stringX, stringY, L_max, is_multiline = Fa
word_lookup_fut[(trunc_stringX, trunc_stringY)] += 1

return word_lookup_marg, word_lookup_fut
def run_transCSSR(word_lookup_marg, word_lookup_fut, L_max, axs, ays, e_symbols, Xt_name, Yt_name, alpha = 0.001, test_type = 'chi2', fname = None, verbose = False):
def run_transCSSR(word_lookup_marg, word_lookup_fut, L_max, axs, ays, e_symbols, Xt_name, Yt_name, alpha = 0.001, test_type = 'chi2', fname = None, verbose = False, all_digits = False):
"""
run_transCSSR performs the CSSR algorithm, adapted for
epsilon-transducers, to estimate the Shalizi-style
@@ -1665,10 +1669,10 @@ def run_transCSSR(word_lookup_marg, word_lookup_fut, L_max, axs, ays, e_symbols,
# save_states('transCSSR_results/mydot-det_recurrent', epsilon, invepsilon, morph_by_state, axs, ays, L_max)

if fname == None:
draw_dot_singlearrows('transCSSR_results/{}+{}'.format(Xt_name, Yt_name), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
draw_dot_singlearrows('transCSSR_results/{}+{}'.format(Xt_name, Yt_name), epsilon, invepsilon, morph_by_state, axs, ays, L_max, all_digits)
save_states('transCSSR_results/{}+{}'.format(Xt_name, Yt_name), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
else:
draw_dot_singlearrows('transCSSR_results/{}'.format(fname), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
draw_dot_singlearrows('transCSSR_results/{}'.format(fname), epsilon, invepsilon, morph_by_state, axs, ays, L_max, all_digits)
save_states('transCSSR_results/{}'.format(fname), epsilon, invepsilon, morph_by_state, axs, ays, L_max)

return epsilon, invepsilon, morph_by_state
@@ -3779,6 +3783,66 @@ def filter_and_pred_probs(stringX, stringY, machine_fname, transducer_fname, axs
return pred_probs_by_time, cur_states_by_time

def compute_ict_measures(machine_fname, axs, inf_alg, L_max, to_plot = False, M_states_to_index = None, M_trans = None, stationary_dist_eM = None):
"""
Compute i(nformation- and) c(omputation-) t(heoretic) measures from an $\epsilon$-machine stored in dot format.
We use the spectral representation of the process via its mixed
state presentation, as described in
J. P. Crutchfield, C. J. Ellison, and P. M. Riechers, "Exact complexity: The spectral decomposition of intrinsic computation," Physics Letters A, vol. 380, no. 9, pp. 998-1002, Mar. 2016. [arXiv](https://arxiv.org/abs/1309.3792).
Parameters
----------
machine_fname : string
The path to the epsilon-machine in dot format.
axs : list
The process alphabet.
inf_alg : string
The inference algorithm used to estimate the machine.
One of {'CSSR', 'transCSSR'}
L_max : int
How far out to compute the finite-L entropy rate
and excess entropy.
to_plot : boolean
Whether or not to plot the intermediate results.
Returns
-------
HLs : numpy.array
The entropies over words of length L,
starting at HLs[0] = H[X_{0}] and going
up to HLs[L_max] = H[X_{0}^{L_{max}}].
hLs : numpy.array
The conditional entropies, conditioning
on pasts of length L, starting at
hLs[0] = H[X_{0} | *] = H[X_{0}] and
going up to
hLs[L_max] = H[X_{0} | X_{-(L-1)}^{0}].
hmu : float
The asymptotic entropy rate.
ELs : numpy.array
The finite-L excess entropies, i.e.
the mutual information between past
and future blocks, each of length L,
starting at ELs[0] = I[X_{-1}; X_{0}]
and going up to
ELs[L_max] = [X_{-L_max}^{-1}; X_{0}^{L_max - 1}].
Cmu : float
The statistical complexity.
etas_matrix : numpy.matrix
The states associated with the mixed
state presentation of the process.
Notes
-----
Any notes go here.
Examples
--------
>>> import module_name
>>> # Demonstrate code here.
"""

if stationary_dist_eM == None:
P, M_states_to_index, M_trans = compute_eM_transition_matrix(machine_fname, axs, inf_alg = inf_alg)
52 changes: 26 additions & 26 deletions transCSSR_results/+complex-csm.dot
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
digraph {
digraph\l{
size = "6,8.5";
ratio = "fill";
node
@@ -9,29 +9,29 @@ edge [fontsize = 24];
node [fontname = "CMU Serif Roman"];
graph [fontname = "CMU Serif Roman"];
edge [fontname = "CMU Serif Roman"];
A -> N [label = "0|0:0.01205 "];
A -> A [label = "1|0:0.9879 "];
B -> K [label = "1|0:1 "];
C -> N [label = "0|0:0.04651 "];
C -> A [label = "1|0:0.9535 "];
D -> E [label = "0|0:0.7094 "];
D -> M [label = "1|0:0.2906 "];
E -> E [label = "0|0:0.9458 "];
E -> H [label = "1|0:0.05416 "];
F -> L [label = "0|0:0.06691 "];
F -> G [label = "1|0:0.9331 "];
G -> B [label = "0|0:0.04377 "];
G -> C [label = "1|0:0.9562 "];
H -> L [label = "0|0:0.4615 "];
H -> J [label = "1|0:0.5385 "];
I -> F [label = "1|0:1 "];
J -> L [label = "0|0:0.1429 "];
J -> C [label = "1|0:0.8571 "];
K -> G [label = "1|0:1 "];
L -> D [label = "0|0:0.9079 "];
L -> F [label = "1|0:0.09211 "];
M -> I [label = "0|0:0.2353 "];
M -> G [label = "1|0:0.7647 "];
N -> D [label = "0|0:0.1633 "];
N -> F [label = "1|0:0.8367 "];
A -> N [label = "0|0:0.01205\l"];
A -> A [label = "1|0:0.9879\l"];
B -> K [label = "1|0:1\l"];
C -> N [label = "0|0:0.04651\l"];
C -> A [label = "1|0:0.9535\l"];
D -> E [label = "0|0:0.7094\l"];
D -> M [label = "1|0:0.2906\l"];
E -> E [label = "0|0:0.9458\l"];
E -> H [label = "1|0:0.05416\l"];
F -> L [label = "0|0:0.06691\l"];
F -> G [label = "1|0:0.9331\l"];
G -> B [label = "0|0:0.04377\l"];
G -> C [label = "1|0:0.9562\l"];
H -> L [label = "0|0:0.4615\l"];
H -> J [label = "1|0:0.5385\l"];
I -> F [label = "1|0:1\l"];
J -> L [label = "0|0:0.1429\l"];
J -> C [label = "1|0:0.8571\l"];
K -> G [label = "1|0:1\l"];
L -> D [label = "0|0:0.9079\l"];
L -> F [label = "1|0:0.09211\l"];
M -> I [label = "0|0:0.2353\l"];
M -> G [label = "1|0:0.7647\l"];
N -> D [label = "0|0:0.1633\l"];
N -> F [label = "1|0:0.8367\l"];
}
29 changes: 21 additions & 8 deletions transCSSR_results/+even.dat_results
Original file line number Diff line number Diff line change
@@ -1,28 +1,41 @@
State number: 0
000, 000
000, 011
000, 100
000, 110
0000, 0000
0000, 0011
0000, 0110
0000, 1000
0000, 1011
0000, 1100
0000, 1110
00000, 00000
00000, 00011
00000, 00110
00000, 01100
00000, 01111
00000, 10000
00000, 10011
00000, 10110
00000, 11000
00000, 11011
00000, 11100
00000, 11110
distribution:
P(0|0,state) = 0.503494374981
P(1|0,state) = 0.496505625019
P(0|0,state) = 0.503304344749
P(1|0,state) = 0.496695655251
transitions: T((0, 0)) = 0 T((0, 1)) = 1
P(State) = ...

State number: 1
000, 001
000, 101
0000, 0001
0000, 0111
0000, 1001
0000, 1101
00000, 00001
00000, 00111
00000, 01101
00000, 10001
00000, 10111
00000, 11001
00000, 11101
distribution:
P(0|0,state) = 0.0
P(1|0,state) = 1.0
4 changes: 2 additions & 2 deletions transCSSR_results/+even.dot
Original file line number Diff line number Diff line change
@@ -9,7 +9,7 @@ edge [fontsize = 24];
node [fontname = "CMU Serif Roman"];
graph [fontname = "CMU Serif Roman"];
edge [fontname = "CMU Serif Roman"];
A -> A [label = "0|0:0.503\l"];
A -> B [label = "1|0:0.497\l"];
A -> A [label = "0|0:0.503304344749\l"];
A -> B [label = "1|0:0.496695655251\l"];
B -> A [label = "1|0:1.0\l"];
}
Loading

0 comments on commit 4713abd

Please sign in to comment.