Skip to content

Commit

Permalink
patterns now sort of working
Browse files Browse the repository at this point in the history
  • Loading branch information
Julian Pattie committed May 16, 2017
1 parent 1327822 commit e729790
Show file tree
Hide file tree
Showing 4 changed files with 68 additions and 42 deletions.
2 changes: 1 addition & 1 deletion build_training_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def get_files_in_directory(directory, filter):
for file in files:
if filter in file:
yield os.path.join(dir_name, file)

# main
def main():

if (len(sys.argv) < 2):
Expand Down
96 changes: 61 additions & 35 deletions pattern.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,29 +17,6 @@

example_len = 4*8

# build_pattern_set
def build_pattern_set(tick_path, pattern_output_path):
tick_files = list(ohlc_file_helper.get_files_in_directory(tick_path, '.csv'))
patterns = []

for tick_file in tick_files[:1]:
print(tick_file)

df = pd.read_csv(tick_file)

mid = (df['RateBid'] + df['RateAsk']) / 2.0
mid_diff = mid.diff()
mid_pct = np.divide(mid_diff[1:], mid[:len(mid)-1])

for n in range(0, len(mid_pct.index), example_len / 2):
if (n + example_len) < len(mid_pct.index):
patterns.append(mid_pct.iloc[n:(n+example_len)].values)
else:
break


return patterns

# main
def main():

Expand All @@ -48,29 +25,78 @@ def main():
print('Quitting...')
quit()

training_set = sys.argv[1]
training_set_path = sys.argv[1]
ohlc_path = sys.argv[2]

if os.path.exists(ticks_path) == False:
print ticks_path + ' does not exist.'
if os.path.exists(training_set_path) == False:
print(training_set + ' does not exist.')
quit()

index = 0
current_index = 0

# read training set
training_set = pd.read_csv(training_set_path)
patterns = training_set.iloc[:, 1:(example_len + 2)].values

# find all the ohlc files as well
ohlc_files = ohlc_file_helper.get_files_in_directory(ohlc_path, '.csv')
ohlc_date_map = list(ohlc_file_helper.build_ohlc_date_map(ohlc_files))

smallest_dist = 1000000
smallest_index = (0, 0)

index = 0
for pattern in patterns:
for curr_pattern in patterns:

diff = np.subtract(curr_pattern, pattern)
pct_diff = np.absolute(np.divide(diff, pattern))

time_str = str(training_set['time_stamp'].loc[index])
time_dt = datetime.datetime.strptime(time_str, '%Y-%m-%d %H:%M:%S')
time_dt_close = time_dt + datetime.timedelta(minutes=example_len/4)

ohlc_path = ohlc_file_helper.find_ohlc_path_from_date(ohlc_date_map, time_str)
plot_ohlc.plot_ohlc_range(ohlc_path, time_dt, time_dt_close)

average = pct_diff.average()
current_index = 0
for curr_pattern in patterns:
if index == current_index:
current_index += 1
continue

diff = np.subtract(curr_pattern, pattern)
sqr_diff = np.power(diff, 2)
sqr = math.sqrt(np.sum(sqr_diff))

# assume something fucked up
if sqr == 0.0:
current_index += 1
continue

if (sqr < smallest_dist):
smallest_dist = sqr
smallest_index = (current_index, index)

print(smallest_dist)
print(str(index) + ', ' + str(current_index))

if current_index >= len(training_set.index):
break

curr_time_str = str(training_set['time_stamp'].loc[current_index])
curr_time_dt = datetime.datetime.strptime(curr_time_str, '%Y-%m-%d %H:%M:%S')
curr_time_dt_close = curr_time_dt + datetime.timedelta(minutes=example_len/4)

print(time_str + ', ' + curr_time_str)

curr_ohlc_path = ohlc_file_helper.find_ohlc_path_from_date(ohlc_date_map, curr_time_str)
plot_ohlc.plot_ohlc_range(curr_ohlc_path, curr_time_dt, curr_time_dt_close)

current_index += 1

index += 1

# reset these when we start a new pattern
smallest_dist = 1000000
smallest_index = (0, 0)

print(smallest_dist)
print(smallest_index)

if __name__ == "__main__":
# stuff only to run when not called via 'import' here
Expand Down
4 changes: 2 additions & 2 deletions plot_ohlc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
def bytespdate2num(fmt, encoding='utf-8'):
strconverter = mdates.strpdate2num(fmt)
def bytesconverter(b):
s = b.decode(encoding)
return strconverter(s)
#s = b.decode(encoding)
return strconverter(b)
return bytesconverter

# plot_ohlc_range
Expand Down
8 changes: 4 additions & 4 deletions train_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,8 @@

from collections import defaultdict

example_len = 30
example_step = 15
example_len = 32
example_step = 16

# main
def main():
Expand Down Expand Up @@ -95,10 +95,10 @@ def main():

#
fst_open_datetime = datetime.datetime.strptime(fst_open_time, '%Y-%m-%d %H:%M:%S')
fst_close_datetime = fst_open_datetime + datetime.timedelta(minutes=example_len)
fst_close_datetime = fst_open_datetime + datetime.timedelta(minutes=example_len/4)

snd_open_datetime = datetime.datetime.strptime(snd_open_time, '%Y-%m-%d %H:%M:%S')
snd_close_datetime = snd_open_datetime + datetime.timedelta(minutes=example_len)
snd_close_datetime = snd_open_datetime + datetime.timedelta(minutes=example_len/4)

plot_ohlc.plot_ohlc_range(fst_ohlc_path, fst_open_datetime, fst_close_datetime)
plot_ohlc.plot_ohlc_range(snd_ohlc_path, snd_open_datetime, snd_close_datetime)
Expand Down

0 comments on commit e729790

Please sign in to comment.