Skip to content

Commit

Permalink
Merge pull request AI4Finance-Foundation#94 from AI4Finance-LLC/sr_re…
Browse files Browse the repository at this point in the history
…vert_env

reverting changes in env.
  • Loading branch information
spencerR1992 authored Jan 30, 2021
2 parents 5be2a4c + e8ff8f7 commit c11cf8b
Showing 1 changed file with 25 additions and 70 deletions.
95 changes: 25 additions & 70 deletions finrl/env/env_stocktrading_cashpenalty.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,19 @@ class StockTradingEnvCashpenalty(gym.Env):
This enables the model to manage cash reserves in addition to performing trading procedures.
Reward at any step is given as follows
r_i = (sum(cash, asset_value) - initial_cash - max(0, sum(cash, asset_value)*cash_penalty_proportion-cash))/(days_elapsed)
r_i = (sum(cash + asset_value) - initial_cash - max(0, sum(cash, asset_value)*cash_penalty_proportion-cash))/(days_elapsed)
This reward function takes into account a liquidity requirement, as well as long-term accrued rewards.
Parameters:
state space: {start_cash, <owned_shares>, for s in stocks{<stock.values>}, }
df (pandas.DataFrame): Dataframe containing data
buy_cost_pct (float): cost for buying shares
sell_cost_pct (float): cost for selling shares
transaction_cost (float): cost for buying or selling shares
hmax (int): max number of share purchases allowed per asset
min_shares (int): minimum number of shares to be bought in each trade. Has to be >= 1
turbulence_threshold (float): Maximum turbulence allowed in market for purchases to occur. If exceeded, positions are liquidated
print_verbosity(int): When iterating (step), how often to print stats about state of env
initial_amount: (int, float): Amount of cash initially available
daily_information_columns (list(str)): Columns to use when building state space from the dataframe. It could be OHLC columns or any other variables such as technical indicators and turbulence index
daily_information_columns (list(str)): Columns to use when building state space from the dataframe.
out_of_cash_penalty (int, float): Penalty to apply if the algorithm runs out of cash
discrete_actions (bool): option to choose whether perform dicretization on actions space or not
action space: <share_dollar_purchases>
Expand All @@ -55,37 +52,28 @@ class StockTradingEnvCashpenalty(gym.Env):
def __init__(
self,
df,
buy_cost_pct=3e-3,
sell_cost_pct=3e-3,
transaction_cost_pct=3e-3,
date_col_name="date",
hmax=10,
min_shares=1,
turbulence_threshold=None,
print_verbosity=10,
initial_amount=1e6,
daily_information_cols=["open", "close", "high", "low", "volume"],
cache_indicator_data=True,
cash_penalty_proportion=0.1,
random_start=True,
discrete_actions=False,
currency="$",
):
self.df = df
self.stock_col = "tic"
self.assets = df[self.stock_col].unique()
self.dates = df[date_col_name].sort_values().unique()
self.random_start = random_start
self.discrete_actions = discrete_actions
self.currency = currency

self.df = self.df.set_index(date_col_name
self.min_shares = min_shares
self.hmax = hmax / min_shares
self.initial_amount = initial_amount / min_shares
self.df = self.df.set_index(date_col_name)
self.hmax = hmax
self.initial_amount = initial_amount
self.print_verbosity = print_verbosity
self.buy_cost_pct = buy_cost_pct
self.sell_cost_pct = sell_cost_pct
self.turbulence_threshold = turbulence_threshold
self.transaction_cost_pct = transaction_cost_pct
self.daily_information_cols = daily_information_cols
self.state_space = (
1 + len(self.assets) + len(self.assets) * len(self.daily_information_cols)
Expand All @@ -94,7 +82,6 @@ def __init__(
self.observation_space = spaces.Box(
low=-np.inf, high=np.inf, shape=(self.state_space,)
)
self.turbulence = 0
self.episode = -1 # initialize so we can call reset
self.episode_history = []
self.printed_header = False
Expand Down Expand Up @@ -126,7 +113,6 @@ def reset(self):
else:
self.starting_point = 0
self.date_index = self.starting_point
self.turbulence = 0
self.episode += 1
self.actions_memory = []
self.transaction_memory = []
Expand Down Expand Up @@ -165,16 +151,10 @@ def return_terminal(self, reason="Last Date", reward=0):
state = self.state_memory[-1]
self.log_step(reason=reason, terminal_reward=reward)
# Add outputs to logger interface
logger.record("environment/total_assets", int(self.min_shares*self.account_information['total_assets'][-1]))
reward_pct = self.account_information["total_assets"][-1] / self.initial_amount
logger.record("environment/total_reward_pct", (reward_pct - 1) * 100)
logger.record("environment/total_trades", self.sum_trades)
logger.record(
"environment/avg_daily_trades",
self.sum_trades / (self.current_step),
)
logger.record(
"environment/avg_daily_trades_per_asset",
"environment/daily_trades",
self.sum_trades / (self.current_step) / len(self.assets),
)
logger.record("environment/completed_steps", self.current_step)
Expand All @@ -195,33 +175,28 @@ def log_step(self, reason, terminal_reward=None):
self.account_information["cash"][-1]
/ self.account_information["total_assets"][-1]
)
gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
rec = [
self.episode,
self.date_index - self.starting_point,
reason,
f"{self.currency}{'{:0,.0f}'.format(float(self.min_shares*self.account_information['cash'][-1]))}",
f"{self.currency}{'{:0,.0f}'.format(float(self.min_shares*self.account_information['total_assets'][-1]))}",
f"${int(self.account_information['total_assets'][-1])}",
f"{terminal_reward*100:0.5f}%",
f"{(gl_pct - 1)*100:0.5f}%",
f"{cash_pct*100:0.2f}%",
]

self.episode_history.append(rec)
print(self.template.format(*rec))

def log_header(self):
self.template = "{0:4}|{1:4}|{2:15}|{3:15}|{4:15}|{5:10}|{6:10}|{7:10}" # column widths: 8, 10, 15, 7, 10
self.template = "{0:4}|{1:4}|{2:15}|{3:10}|{4:10}|{5:10}" # column widths: 8, 10, 15, 7, 10
print(
self.template.format(
"EPISODE",
"STEPS",
"TERMINAL_REASON",
"CASH",
"TOT_ASSETS",
"TERMINAL_REWARD_unsc",
"GAINLOSS_PCT",
"CASH_PROPORTION",
"CASH_PCT",
)
)
self.printed_header = True
Expand Down Expand Up @@ -266,47 +241,33 @@ def step(self, actions):
reward = self.get_reward()

# log the values of cash, assets, and total assets
self.account_information["cash"].append(self.min_shares*begin_cash)
self.account_information["asset_value"].append(self.min_shares*asset_value)
self.account_information["total_assets"].append(self.min_shares*(begin_cash + asset_value))
self.account_information["cash"].append(begin_cash)
self.account_information["asset_value"].append(asset_value)
self.account_information["total_assets"].append(begin_cash + asset_value)
self.account_information["reward"].append(reward)

# multiply action values by our scalar multiplier and save
actions = actions * self.hmax
self.actions_memory.append(actions*closings) #capture what the model's trying to do

#buy/sell only if the price is > 0 (no missing data in this particular date)
actions = np.where(closings>0,actions,0)

if self.turbulence_threshold is not None:
# if turbulence goes over threshold, just clear out all positions
if self.turbulence>=self.turbulence_threshold:
actions = -(np.array(holdings) * closings)
self.log_step(reason="TURBULENCE")

# scale cash purchases to asset
if self.discrete_actions:
#convert into integer because we can't buy fraction of shares
actions = np.where(closings>0,actions//closings,0)
actions = (actions.astype(int))
else:
actions = np.where(closings>0,actions/closings,0)
actions = actions * self.hmax
self.actions_memory.append(actions)

# scale cash purchases to asset # changes
actions = actions / closings


# clip actions so we can't sell more assets than we hold
actions = np.maximum(actions, -np.array(holdings))
self.transaction_memory.append(actions)

self.transaction_memory.append(actions) #capture what the model's could do

# compute our proceeds from sells, and add to cash
# compute our proceeds from sales, and add to cash
sells = -np.clip(actions, -np.inf, 0)
proceeds = np.dot(sells, closings)
costs = proceeds * self.sell_cost_pct
costs = proceeds * self.transaction_cost_pct
coh = begin_cash + proceeds

# compute the cost of our buys
buys = np.clip(actions, 0, np.inf)
spend = np.dot(buys, closings)
costs += spend * self.buy_cost_pct
costs += spend * self.transaction_cost_pct

# if we run out of cash, end the cycle and penalize
if (spend + costs) > coh:
Expand All @@ -321,17 +282,11 @@ def step(self, actions):
# update our holdings
coh = coh - spend - costs
holdings_updated = holdings + actions

self.date_index += 1
if self.turbulence_threshold is not None:
self.turbulence = self.get_date_vector(self.date_index,cols=["turbulence"])[0]

#Update State
state = (
[coh] + list(holdings_updated) + self.get_date_vector(self.date_index)
)
self.state_memory.append(state)

return state, reward, False, {}

def get_sb_env(self):
Expand Down

0 comments on commit c11cf8b

Please sign in to comment.