-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
5df1adc
commit f9fa5cd
Showing
6 changed files
with
223 additions
and
29 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
Spike Test, Missing Value Test, Repeat Value Test, Outlier, Spatial Inconsistency, Logical Inconsistency, Spike Test | ||
0.04310178756713867, 0.9675548076629639, 0.5453481674194336, 2.464052200317383, 0.9644520282745361, 1.6780734062194824 | ||
0.045937538146972656, 0.6281876564025879, 0.5570821762084961, 2.3783037662506104, 0.6530992984771729, 1.685499668121338 | ||
0.029288053512573242, 0.6271235942840576, 0.5480241775512695, 2.353997230529785, 0.6515278816223145, 1.6660075187683105 | ||
0.030195236206054688, 0.626065731048584, 0.5490946769714355, 2.4371140003204346, 0.6704967021942139, 1.6959545612335205 | ||
0.029053211212158203, 0.6258211135864258, 0.5637242794036865, 2.3955459594726562, 0.6635918617248535, 1.6819257736206055 | ||
0.02913188934326172, 0.623931884765625, 0.5477449893951416, 2.3734071254730225, 0.6677625179290771, 1.699209213256836 | ||
0.03390693664550781, 0.6345832347869873, 0.5562515258789062, 2.4408748149871826, 0.6964719295501709, 1.7642099857330322 | ||
0.02899956703186035, 0.6489384174346924, 0.6134247779846191, 2.4096388816833496, 0.6748590469360352, 1.7084054946899414 | ||
0.03548884391784668, 0.6452851295471191, 0.5539658069610596, 2.4438226222991943, 0.6763548851013184, 1.7239494323730469 | ||
0.028683900833129883, 0.6429176330566406, 0.5655772686004639, 2.450761318206787, 0.6836907863616943, 1.7262461185455322 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,172 @@ | ||
from Materia import * | ||
import time | ||
|
||
# DS = Dataset("data/rockland.csv", numHeaderLines=9) | ||
DS = Dataset("data/rockland.csv", numHeaderLines=9) | ||
DS2 = Dataset("data/evergreen.csv") | ||
DS3 = Dataset("data/bigelow_soilMTP_2017.csv", numHeaderLines=2) | ||
|
||
# print(DS) | ||
print(DS2) | ||
print(DS3) | ||
DS.genHeadersFromMetadataRows((1,6)) | ||
|
||
DS.flagcodes().are({"None":"OK", "Repeat Value":"Repeat Value", "Missing Value": "Missing", "Outlier": "Exceeds Range", "Spatial Inconsistency": "Incosistent (Spatial)", "Logical Inconsistency": "Inconsistent (Logical)", "Spike": "Spike", "Hardware Range": "Exceeds Hardware Range"}) | ||
|
||
series_max = DS['Air temperature (2-meter) monitor_Maximum'] | ||
series_min = DS['Air temperature (2-meter) monitor_Minimum'] | ||
series_max_10 = DS['Air temperature (10-meter) monitor_Maximum'] | ||
series_avg_2 = DS['Air temperature (2-meter) monitor_Average'] | ||
series_avg_10 = DS['Air temperature (10-meter) monitor_Average'] | ||
|
||
series_max.timestep((series_max.beginning(+1)) - series_max.beginning()) | ||
series_min.timestep((series_min.beginning(+1)) - series_min.beginning()) | ||
series_avg_2.timestep((series_min.beginning(+1)) - series_min.beginning()) | ||
|
||
def rv_test(value): | ||
n = 3 | ||
if not value.isnan(): | ||
return value == value.prior(n) | ||
|
||
def range_test(value): | ||
return (value < -20 or value > 20) | ||
|
||
def hardware_range_test(value): | ||
return (value < -100 or value > 100) | ||
|
||
def spatial_inconsistency(value, i): | ||
comp_val = series_max_10.value().at(value) | ||
threshold = abs(value * 2) | ||
|
||
if comp_val > (value + threshold) or comp_val < (value - threshold): | ||
return True | ||
|
||
return False | ||
|
||
def avg_spatial_inconsistency(value, i): | ||
comp_val = series_avg_10.value().at(value) | ||
diff = value - comp_val | ||
avg = (value + comp_val) / 2 | ||
threshold = 2 | ||
|
||
threshold = abs(diff / avg) * 100.0 < threshold | ||
|
||
if comp_val > (value + threshold) or comp_val < (value - threshold): | ||
return True | ||
|
||
return False | ||
|
||
def logical_inconsistency_min(min_value): | ||
max_value = series_max.value().at(min_value) | ||
|
||
if min_value > max_value: | ||
return True | ||
|
||
return False | ||
|
||
# checks for logical inconsistency in DataSet | ||
# eg. max value is less than min value | ||
def logical_inconsistency(max_value): | ||
min_val = series_min.value().at(max_value) | ||
|
||
if min_val > max_value: | ||
return True | ||
|
||
return False | ||
|
||
# compares slopes between values | ||
def slope_test(value, i): | ||
p_a = value.prior(2)[0] | ||
p_b = value.prior(2)[1] | ||
p_c = value | ||
|
||
priorslp = 0.0 | ||
|
||
if p_a.isScalar() and p_b.isScalar(): | ||
# x values | ||
x1 = p_a | ||
x2 = p_b | ||
|
||
# y values | ||
y1 = p_a.intIndex() | ||
y2 = p_b.intIndex() | ||
|
||
# current slope | ||
priorslp = (y2-y1)/(x2-x1) | ||
|
||
# x ad y values for next point | ||
x3 = p_c | ||
y3 = p_c.intIndex() | ||
|
||
# next slope | ||
nextslp = (y3-y2)/(x3-x2) | ||
|
||
if (abs(nextslp) < .1 #very sharp slope | ||
and abs(nextslp) < abs(priorslp*.01) #big difference between the two slopes | ||
and abs(nextslp) != float("inf") #slope is not a flat line | ||
and abs(priorslp) != float("inf")): #slope is not a flat line | ||
return True | ||
|
||
return False | ||
|
||
for i in range(10): | ||
|
||
timedeltas = [] | ||
|
||
print("\n Missing Value Test ---------------------") | ||
start = time.time() | ||
series_avg_2.datapoint().flag('Missing Value').missingValueTest(-9999) | ||
end = time.time() | ||
|
||
print(end-start) | ||
|
||
timedeltas.append(end-start) | ||
|
||
print("\n Repeat Value Test ---------------------") | ||
start = time.time() | ||
series_avg_2.datapoint().flag("Repeat Value").when(rv_test) | ||
end = time.time() | ||
|
||
print(end-start) | ||
|
||
timedeltas.append(end-start) | ||
|
||
print("\n Outlier Inconsistency ---------------------") | ||
start = time.time() | ||
series_avg_2.datapoint().flag("Outlier").when(range_test) | ||
end = time.time() | ||
|
||
print(end-start) | ||
|
||
timedeltas.append(end-start) | ||
|
||
print("\n Spatial Inconsistency ---------------------") | ||
start = time.time() | ||
series_avg_2.datapoint().flag("Spatial Inconsistency").when(avg_spatial_inconsistency) | ||
end = time.time() | ||
|
||
print(end-start) | ||
|
||
timedeltas.append(end-start) | ||
|
||
print("\n Logical Inconsistency ---------------------") | ||
start = time.time() | ||
series_max.datapoint().flag("Logical Inconsistency").when(logical_inconsistency) | ||
end = time.time() | ||
|
||
print(end-start) | ||
|
||
timedeltas.append(end-start) | ||
|
||
print("\n Spike Test ---------------------") | ||
start = time.time() | ||
series_avg_2.datapoint().flag("Spike").when(slope_test) | ||
end = time.time() | ||
|
||
print(end-start) | ||
|
||
timedeltas.append(end-start) | ||
|
||
with open('experiments_edsl.csv', 'a') as f: | ||
f.write('\n') | ||
for i, d in enumerate(timedeltas): | ||
if i is len(timedeltas)-1: | ||
f.write(str(d)) | ||
else: | ||
f.write(str(d)+', ') |