Finalizing the Jupyter notebook for demonstrating CSSR-via-transCSSR.

RafaelMri · Apr 23, 2018 · 4713abd · 4713abd
1 parent 64e61df
commit 4713abd
Showing 10 changed files with 4,061 additions and 283 deletions.
diff --git a/.ipynb_checkpoints/demo_CSSR-checkpoint.ipynb b/.ipynb_checkpoints/demo_CSSR-checkpoint.ipynb
diff --git a/compute_mixed_matrix.py b/compute_mixed_matrix.py
@@ -3,16 +3,14 @@
 import numpy
 import scipy
 
-import ipdb
-
 import matplotlib.pyplot as plt
 
 # machine_fname = 'transCSSR_results/+even-exact.dot'
 # machine_fname = 'transCSSR_results/+golden-mean.dot'
 # machine_fname = 'transCSSR_results/+barnettX.dot'
 # machine_fname = 'transCSSR_results/+RnC.dot'
-# machine_fname = 'transCSSR_results/+RIP-exact.dot'
-machine_fname = 'transCSSR_results/+RIP.dot'
+machine_fname = 'transCSSR_results/+RIP-exact.dot'
+# machine_fname = 'transCSSR_results/+RIP.dot'
 # machine_fname = 'transCSSR_results/+complex-csm.dot'
 # machine_fname = 'transCSSR_results/+renewal-process.dot'
 
@@ -30,9 +28,10 @@ def Hp(p):
 	return -numpy.sum(x*numpy.log2(x))
 
 p = 0.5
+q = 0.5
 
 Hp(1/(2 - p)) - Hp(p)/(2 - p) # E for Golden Mean process
 
-numpy.log2(p + 2) - p*numpy.log2(p)/(p + 2) - (1 - p*(1-p))/(p+2)*Hp((1 - p)/(1 - p*(1 - p))) # E for RIP
+numpy.log2(p + 2) - p*numpy.log2(p)/(p + 2) - (1 - p*q)/(p+2)*Hp((1 - p)/(1 - p*q)) # E for RIP
 
 plt.show()
diff --git a/demo_CSSR.ipynb b/demo_CSSR.ipynb
diff --git a/transCSSR_bc.py b/transCSSR_bc.py
@@ -413,7 +413,7 @@ def draw_dot(fname, epsilon, invepsilon, morph_by_state, axs, ays, L_max):
 
 									wfile.write('{} -> {} [label = \"{}|{}:{:.3}\"];\n'.format(numeric_to_alpha(printing_lookup[state]), numeric_to_alpha(printing_lookup[to_state]), ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]]))
 		wfile.write('}')
-def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays, L_max):
+def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays, L_max, all_digits = False):
 	"""
 	This function draws the .dot file associated with the 
 	epsilon-transducer stored in epsilon+invepsilon.
@@ -524,9 +524,10 @@ def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays,
 
 										exists_transition[(state, to_state)] = True
 
-										W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
-
-										# wfile.write('{} -> {} [label = \"({}, {})\"];\n'.format(numeric_to_alpha(printing_lookup[state]), numeric_to_alpha(printing_lookup[to_state]), ax, ay))
+										if all_digits:
+											W[(state, to_state)] += '{}|{}:{}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
+										else:
+											W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
 					else:
 						pass
 				else:
@@ -544,7 +545,10 @@ def draw_dot_singlearrows(fname, epsilon, invepsilon, morph_by_state, axs, ays,
 
 									exists_transition[(state, to_state)] = True
 
-									W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
+									if all_digits:
+										W[(state, to_state)] += '{}|{}:{:}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
+									else:
+										W[(state, to_state)] += '{}|{}:{:.3}\\l'.format(ay, ax, prob_by_state[state][len(ays)*input_lookup[ax] + output_lookup[ay]])
 
 		for from_state in invepsilon.keys():
 			for to_state in invepsilon.keys():
@@ -1203,7 +1207,7 @@ def estimate_predictive_distributions(stringX, stringY, L_max, is_multiline = Fa
 				word_lookup_fut[(trunc_stringX, trunc_stringY)] += 1
 
 	return word_lookup_marg, word_lookup_fut
-def run_transCSSR(word_lookup_marg, word_lookup_fut, L_max, axs, ays, e_symbols, Xt_name, Yt_name, alpha = 0.001, test_type = 'chi2', fname = None, verbose = False):
+def run_transCSSR(word_lookup_marg, word_lookup_fut, L_max, axs, ays, e_symbols, Xt_name, Yt_name, alpha = 0.001, test_type = 'chi2', fname = None, verbose = False, all_digits = False):
 	"""
 	run_transCSSR performs the CSSR algorithm, adapted for
 	epsilon-transducers, to estimate the Shalizi-style
@@ -1665,10 +1669,10 @@ def run_transCSSR(word_lookup_marg, word_lookup_fut, L_max, axs, ays, e_symbols,
 	# save_states('transCSSR_results/mydot-det_recurrent', epsilon, invepsilon, morph_by_state, axs, ays, L_max)
 
 	if fname == None:
-		draw_dot_singlearrows('transCSSR_results/{}+{}'.format(Xt_name, Yt_name), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
+		draw_dot_singlearrows('transCSSR_results/{}+{}'.format(Xt_name, Yt_name), epsilon, invepsilon, morph_by_state, axs, ays, L_max, all_digits)
 		save_states('transCSSR_results/{}+{}'.format(Xt_name, Yt_name), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
 	else:
-		draw_dot_singlearrows('transCSSR_results/{}'.format(fname), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
+		draw_dot_singlearrows('transCSSR_results/{}'.format(fname), epsilon, invepsilon, morph_by_state, axs, ays, L_max, all_digits)
 		save_states('transCSSR_results/{}'.format(fname), epsilon, invepsilon, morph_by_state, axs, ays, L_max)
 
 	return epsilon, invepsilon, morph_by_state
@@ -3779,6 +3783,66 @@ def filter_and_pred_probs(stringX, stringY, machine_fname, transducer_fname, axs
 	return pred_probs_by_time, cur_states_by_time
 
 def compute_ict_measures(machine_fname, axs, inf_alg, L_max, to_plot = False, M_states_to_index = None, M_trans = None, stationary_dist_eM = None):
+	"""
+	Compute i(nformation- and) c(omputation-) t(heoretic) measures from an $\epsilon$-machine stored in dot format.
+
+	We use the spectral representation of the process via its mixed
+	state presentation, as described in
+
+	J. P. Crutchfield, C. J. Ellison, and P. M. Riechers, "Exact complexity: The spectral decomposition of intrinsic computation," Physics Letters A, vol. 380, no. 9, pp. 998-1002, Mar. 2016. [arXiv](https://arxiv.org/abs/1309.3792).
+
+	Parameters
+	----------
+	machine_fname : string
+			The path to the epsilon-machine in dot format.
+	axs : list
+			The process alphabet.
+	inf_alg : string
+			The inference algorithm used to estimate the machine.
+			One of {'CSSR', 'transCSSR'}
+	L_max : int
+			How far out to compute the finite-L entropy rate
+			and excess entropy.
+	to_plot : boolean
+			Whether or not to plot the intermediate results.
+
+	Returns
+	-------
+	HLs : numpy.array
+			The entropies over words of length L,
+			starting at HLs[0] = H[X_{0}] and going
+			up to HLs[L_max] = H[X_{0}^{L_{max}}].
+	hLs : numpy.array
+			The conditional entropies, conditioning
+			on pasts of length L, starting at
+			hLs[0] = H[X_{0} | *] = H[X_{0}] and
+			going up to 
+			hLs[L_max] = H[X_{0} | X_{-(L-1)}^{0}].
+	hmu : float
+			The asymptotic entropy rate.
+	ELs : numpy.array
+			The finite-L excess entropies, i.e.
+			the mutual information between past
+			and future blocks, each of length L,
+			starting at ELs[0] = I[X_{-1}; X_{0}]
+			and going up to 
+			ELs[L_max] = [X_{-L_max}^{-1}; X_{0}^{L_max - 1}].
+	Cmu : float
+			The statistical complexity.
+	etas_matrix : numpy.matrix
+			The states associated with the mixed
+			state presentation of the process.
+
+	Notes
+	-----
+	Any notes go here.
+
+	Examples
+	--------
+	>>> import module_name
+	>>> # Demonstrate code here.
+
+	"""
 
 	if stationary_dist_eM == None:
 		P, M_states_to_index, M_trans = compute_eM_transition_matrix(machine_fname, axs, inf_alg = inf_alg)

diff --git a/transCSSR_results/+complex-csm.dot b/transCSSR_results/+complex-csm.dot
@@ -1,4 +1,4 @@
-digraph  {
+digraph\l{
 size = "6,8.5";
 ratio = "fill";
 node
@@ -9,29 +9,29 @@ edge [fontsize = 24];
 node [fontname = "CMU Serif Roman"];
 graph [fontname = "CMU Serif Roman"];
 edge [fontname = "CMU Serif Roman"];
-A -> N [label = "0|0:0.01205  "];
-A -> A [label = "1|0:0.9879   "];
-B -> K [label = "1|0:1        "];
-C -> N [label = "0|0:0.04651  "];
-C -> A [label = "1|0:0.9535   "];
-D -> E [label = "0|0:0.7094   "];
-D -> M [label = "1|0:0.2906   "];
-E -> E [label = "0|0:0.9458   "];
-E -> H [label = "1|0:0.05416  "];
-F -> L [label = "0|0:0.06691  "];
-F -> G [label = "1|0:0.9331   "];
-G -> B [label = "0|0:0.04377  "];
-G -> C [label = "1|0:0.9562   "];
-H -> L [label = "0|0:0.4615   "];
-H -> J [label = "1|0:0.5385   "];
-I -> F [label = "1|0:1        "];
-J -> L [label = "0|0:0.1429   "];
-J -> C [label = "1|0:0.8571   "];
-K -> G [label = "1|0:1        "];
-L -> D [label = "0|0:0.9079   "];
-L -> F [label = "1|0:0.09211  "];
-M -> I [label = "0|0:0.2353   "];
-M -> G [label = "1|0:0.7647   "];
-N -> D [label = "0|0:0.1633   "];
-N -> F [label = "1|0:0.8367   "];
+A -> N [label = "0|0:0.01205\l"];
+A -> A [label = "1|0:0.9879\l"];
+B -> K [label = "1|0:1\l"];
+C -> N [label = "0|0:0.04651\l"];
+C -> A [label = "1|0:0.9535\l"];
+D -> E [label = "0|0:0.7094\l"];
+D -> M [label = "1|0:0.2906\l"];
+E -> E [label = "0|0:0.9458\l"];
+E -> H [label = "1|0:0.05416\l"];
+F -> L [label = "0|0:0.06691\l"];
+F -> G [label = "1|0:0.9331\l"];
+G -> B [label = "0|0:0.04377\l"];
+G -> C [label = "1|0:0.9562\l"];
+H -> L [label = "0|0:0.4615\l"];
+H -> J [label = "1|0:0.5385\l"];
+I -> F [label = "1|0:1\l"];
+J -> L [label = "0|0:0.1429\l"];
+J -> C [label = "1|0:0.8571\l"];
+K -> G [label = "1|0:1\l"];
+L -> D [label = "0|0:0.9079\l"];
+L -> F [label = "1|0:0.09211\l"];
+M -> I [label = "0|0:0.2353\l"];
+M -> G [label = "1|0:0.7647\l"];
+N -> D [label = "0|0:0.1633\l"];
+N -> F [label = "1|0:0.8367\l"];
 }
diff --git a/transCSSR_results/+even.dat_results b/transCSSR_results/+even.dat_results
@@ -1,28 +1,41 @@
 State number: 0
-000, 000
-000, 011
-000, 100
-000, 110
 0000, 0000
 0000, 0011
 0000, 0110
 0000, 1000
 0000, 1011
 0000, 1100
 0000, 1110
+00000, 00000
+00000, 00011
+00000, 00110
+00000, 01100
+00000, 01111
+00000, 10000
+00000, 10011
+00000, 10110
+00000, 11000
+00000, 11011
+00000, 11100
+00000, 11110
 distribution: 
-P(0|0,state) = 0.503494374981	
-P(1|0,state) = 0.496505625019	
+P(0|0,state) = 0.503304344749	
+P(1|0,state) = 0.496695655251	
 transitions: T((0, 0)) = 0	T((0, 1)) = 1	
 P(State) = ...
 
 State number: 1
-000, 001
-000, 101
 0000, 0001
 0000, 0111
 0000, 1001
 0000, 1101
+00000, 00001
+00000, 00111
+00000, 01101
+00000, 10001
+00000, 10111
+00000, 11001
+00000, 11101
 distribution: 
 P(0|0,state) = 0.0	
 P(1|0,state) = 1.0	

diff --git a/transCSSR_results/+even.dot b/transCSSR_results/+even.dot
@@ -9,7 +9,7 @@ edge [fontsize = 24];
 node [fontname = "CMU Serif Roman"];
 graph [fontname = "CMU Serif Roman"];
 edge [fontname = "CMU Serif Roman"];
-A -> A [label = "0|0:0.503\l"];
-A -> B [label = "1|0:0.497\l"];
+A -> A [label = "0|0:0.503304344749\l"];
+A -> B [label = "1|0:0.496695655251\l"];
 B -> A [label = "1|0:1.0\l"];
 }