-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathoptimizeParam.R
executable file
·353 lines (270 loc) · 11.5 KB
/
optimizeParam.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
## GNU > 2 License:
## written by Moreno I. Coco ([email protected]) and James Dixon
## Iterative procedure exploring a combination of parameter values
## to obtain maximal recurrence between two time-series
## The procedure should be applied on several trials to find the
## "optimal" setting for the parameters...
## it should be applied on time-series with continuous data
## TODO: probably, for windows the selection of size/embedding
## has to be done at the same time.
## some tips
## Recurrence measures change with changing parameters:
## Explore parameters for both signals same way as for
## phase space reconstruction / auto recurrence.
## If you find the same parameters for both signals: Great!
## If you find different parameters for each signal
## (which is more common):
## Run Cross RQA once for each set of parameters
## and make sure the results will pattern the same way
## across both parameter settings
## If so, you can be more confident that any results are
## not due to parameter differences.
## maxlag = 20 ## maximum lag to check for average mutual information
## radiusspan = 2 # increasing radius step means smaller steps
# #radiussample = 10 ## number of radius points within the steps
## to be sampled
## min.rec = minimal recurrence value
## max.rec = maximal recurrence value
#ts1 = runif(100)
#ts2 = runif(100)
.packageName <- 'crqa'
optimizeParam <- function(ts1, ts2, par, min.rec = 2, max.rec = 5){
## Initialize attached variables
method = metric = maxlag = radiusspan =
normalize = rescale = mindiagline = minvertline = tw =
whiteline = recpt = side = datatype = fnnpercent =
typeami = nbins = criterion = threshold =
maxEmb = numSamples = Rtol = Atol = NULL
## Assign parameters
for (v in 1:length(par)) assign(names(par)[v], par[[v]])
if (method == 'rqa' | method == 'crqa'){
## we estimate the paramaters for radius and embedding considering
## two one-dimensional time-series
if (is.vector(ts1) != TRUE){ ts1 = as.numeric(as.matrix(ts1) ) }
if (is.vector(ts2) != TRUE){ ts2 = as.numeric(as.matrix(ts2) ) }
## A: Choose a delay that will accommodate both ts.
## For example if one has a considerably longer delay
## indicated than another, you may need to select the longer
## delay of the two because that ensures that new information
## is gained for both by using the higher delay.
## If delays are close to each other, you may want to use a
## delay somewhere in between the two
mi1 = as.numeric(mutual(ts1, lag.max = maxlag, plot = FALSE))
mi2 = as.numeric(mutual(ts2, lag.max = maxlag, plot = FALSE))
## average mutual information
mi = ami(ts1, ts2, 1:maxlag)
## global minimum for each series and ami, given lag max
m1 = min(mi1); m2 = min(mi2); m12 = min(mi)
mis = c(m1, m2, m12)
## find the minimum dip across all (mi1, mi2, ami(1/2))
if (typeami == "mindip"){
minmi = which(mis == min(mis))
if (minmi == 1) lag = which(mi1 == m1)
if (minmi == 2) lag = which(mi2 == m2)
if (minmi == 3) lag = which(mi == m12)
maxlag = lag
}
## just double check that the maximum lag is the same
## with the minimum dip, if not just take the longest lag
if (typeami == "maxlag"){
lg1 = which(mi1 == m1)
lg2 = which(mi2 == m2)
lg12 = which(mi == m12)
lags = c(lg1, lg2, lg12)
maxlag = lags[which(lags == max(lags))]
}
if (length(maxlag) > 1){
## just to address those cases more than a lag has the same max/min
maxlag = maxlag[sample(1:length(maxlag),1)]
}
del = maxlag - 1
## another solution would be to use the global minimum
## but it could result into longer delays being selected
## B: Determine embedding dimension: FNN function bottoms out
## (it buys nothing to add more dimensions).
## If the embedding dimension indicated for the two files is
## different, you want to go with the higher embedding
## dimension of the two to make sure that you have
## sufficiently unfolded both time series.
## It is generally safe to overestimate embedding dimensions.
## Though this can amplify the influence of noise for
## high embedding dimensions.
## TODO: bring out this parameters to optimize them also
## m maximum embedding dimension = 20
## d delay parameter = estimate at 1
## t Theiler window = 0
## in cross-recurrence LOC is important
## rt escape factor = leave it default
## eps neighborhood diameter = leave default
embdts1 = false.nearest(ts1, m = maxEmb, d = del, t = 0, rt = 10,
eps = sd(ts1)/10)
## get a percentage of reduction of false neighbours
## based on the first dimension
fnnfraction1 = embdts1[1,]
fnnfraction1 = fnnfraction1[which(is.na(fnnfraction1) == FALSE)]
emdthd1 = fnnfraction1[1]/fnnpercent
emdix1 = which(diff(fnnfraction1) < -emdthd1)
if (length(emdix1) == 1){
emdmints1 = as.numeric(emdix1) + 1
} else if (length(emdix1) > 1){
## there is no gain when embedding
emdmints1 = as.numeric(tail(emdix1,1) + 1)
} else {
emdmints1 = 1
}
## to adjust for the diff
## alternative method, just get the minimum
## at the moment the method is commented.
# embmints1 = as.numeric( which(fnnfraction1 == min(fnnfraction1)))
embdts2 = false.nearest(ts2, m = maxEmb, d = del, t = 0, rt=10,
eps=sd(ts2)/10)
fnnfraction2 = embdts2[1,]
fnnfraction2 = fnnfraction2[which(is.na(fnnfraction2) == FALSE)]
emdthd2 = fnnfraction2[1]/fnnpercent
emdix2 = which(diff(fnnfraction2) < -emdthd2)
if (length(emdix2) == 1){
emdmints2 = as.numeric(emdix2) + 1
} else if (length(emdix2) > 1){
emdmints2 = as.numeric(tail(emdix2,1) + 1)
} else { ## there is no gain when embedding
emdmints2 = 1
}
# embmints2 = as.numeric( which(fnnfraction2 == min(fnnfraction2)))
if ( length(emdmints1) > 1){ emdmints1 = emdmints1[1] }
if ( length(emdmints2) > 1){ emdmints2 = emdmints2[1] }
embdim = max( c(emdmints1, emdmints2) )
## the radius could be theoretically estimated also for
## the multidimensional case,
## but only if the variables are z-scored or are coming from the same measuremnt
## C: Use the optimal parameters of embedding and delay
## from above and then explore an optimal radius
## first we need to see in the rescaled matrix what is
## the range of values, so that the radius interval
## can be consciously estimated.
}
if (method == 'mdcrqa'){
## we use the functions for multidimensional estimation
## do it separately for ts1 and ts2, then get the largest
## value between the two
## estimate delay
tryCatch( { delay_ts1 = mdDelay(ts1, nbins, maxlag, criterion, threshold)}
, error = function(e) {
stop("Delay not found, try increase the threshold,
or explore more lags(maxlag) on a larger number of bins (nbins)")})
tryCatch( { delay_ts2 = mdDelay(ts2, nbins, maxlag, criterion, threshold)}
, error = function(e) {
stop("Delay not found, try increase the threshold,
or explore more lags (maxlag) on a larger number of bins (nbins)")})
del = max(c(delay_ts1, delay_ts2))
## get a percentage of reduction of false neighbours
## based on the first dimension
fnn_ts1 = mdFnn(ts1, delay_ts1, maxEmb, numSamples, Rtol, Atol)
fnn_ts2 = mdFnn(ts2, delay_ts2, maxEmb, numSamples, Rtol, Atol)
fnnfraction1 = fnn_ts1[[1]]
ixi_min = which(fnnfraction1 == min(fnnfraction1))
if (length(ixi_min) > 1){
embmints1 = ixi_min[1]
} else {
embmints1 = ixi_min
}
fnnfraction2 = fnn_ts2[[1]]
ixi_min = which(fnnfraction2 == min(fnnfraction2))
if (length(ixi_min) > 1){
embmints2 = ixi_min[1]
} else {
embmints2 = ixi_min
}
embdim = max( c(embmints1, embmints1) )
}
if (radiusspan <= 1){
stop("Radius span too small, please choose a larger value")
}
dm <- as.matrix(cdist(ts1, ts2, metric = metric))
## Find indeces of the distance matrix that fall
## within prescribed radius.
if (rescale > 0){
switch(rescale,
{1 ## Create a distance matrix that is re-scaled
## to the mean distance
rescaledist = mean(dm, na.rm = T)
dmrescale = dm/rescaledist},
{2 ## Create a distance matrix that is re-scaled
## to the max distance
rescaledist = max(dm, na.rm = T);
dmrescale = dm/rescaledist},
{3 ## Create a distance matrix that is rescaled
## to the min distance
rescaledist = min(dm, na.rm = T);
dmrescale = dm/rescaledist},
{ 4 ## Create a distance matrix that is rescaled
## to the euclidean distance
dmrescale <- dm/abs(sum(dm)/(nrow(dm)^2-nrow(dm)))}
)
} else { dmrescale = dm }
sdun = sd(dmrescale)
mnun = median(dmrescale)*3
## Multiplier to increase range of candidate radii
## generate a sequence of radius
radi = seq(mnun, 0, -(sdun/radiusspan))
## radi = radi[(length(radi)/2):length(radi)]
## kpt = ceiling(length(radi)/radiussample)
## rsamples = sample(1:kpt, 1)
## syssamp = seq(rsamples, rsamples + kpt * (radiussample - 1), kpt)
## syssamp = syssamp[syssamp <= length(radi)]
## radi = radi[syssamp]
delay = del
embed = embdim
optrad = vector()
end.flag <- 0
while(end.flag == 0){
## location with largest radius
hi.loc <- 1
## location with smallest radius
lo.loc <- length(radi)
#print(c("hi.loc",hi.loc))
#print(c("lo.loc",lo.loc))
## take middle location as the current radius to be tested
curr.loc <- round(length(radi)/2)
radi[curr.loc] -> r
# print(radi)
#print(c("r",r))
#print(c("curr.loc",curr.loc))
#print(c("embed",embed))
#print(c("delay",delay))
res = crqa(ts1, ts2, delay, embed, rescale, r, normalize,
mindiagline, minvertline, tw, whiteline, recpt, side,
method, metric, datatype)
# print(c("recurr",res$RR))
## print("#######")
## print("#######")
if (res$RR >= min.rec & res$RR <= max.rec) {
optrad = r
## print(c("radius",optrad))
## print(c("embed",embed))
## print(c("delay",delay))
end.flag<-1
} else {
if (res$RR < min.rec){
## if rec less than min rec, curr.loc becomes new lo
curr.loc -> lo.loc
}
if (res$RR > max.rec){
## if rec greater than max rec, curr.loc becomes new hi
curr.loc -> hi.loc
}
if((lo.loc-hi.loc) < 2){
## if less than 2 radi locs remaining, no optimal radius
end.flag<-1
warning("Optimal Radius Not found: try again choosing a wider radius span and larger sample size")
}
}
## replace radi vector with remaining unsearched vector
radi <- radi[hi.loc:lo.loc]
} ## end while
if (length(optrad) == 0) {
optrad = NA
}
if (!is.na(optrad)){
return(list(radius = optrad, emddim = embdim, delay = del))
}
}