Merge pull request AI4Finance-Foundation#94 from AI4Finance-LLC/sr_re…

…vert_env reverting changes in env.
kwan2 · Jan 30, 2021 · c11cf8b · c11cf8b
2 parents 5be2a4c + e8ff8f7
commit c11cf8b
Showing 1 changed file with 25 additions and 70 deletions.
diff --git a/finrl/env/env_stocktrading_cashpenalty.py b/finrl/env/env_stocktrading_cashpenalty.py
@@ -21,22 +21,19 @@ class StockTradingEnvCashpenalty(gym.Env):
     This enables the model to manage cash reserves in addition to performing trading procedures. 
 
     Reward at any step is given as follows
-        r_i = (sum(cash, asset_value) - initial_cash - max(0, sum(cash, asset_value)*cash_penalty_proportion-cash))/(days_elapsed)
+        r_i = (sum(cash + asset_value) - initial_cash - max(0, sum(cash, asset_value)*cash_penalty_proportion-cash))/(days_elapsed)
         This reward function takes into account a liquidity requirement, as well as long-term accrued rewards. 
 
     Parameters:
     state space: {start_cash, <owned_shares>, for s in stocks{<stock.values>}, }
         df (pandas.DataFrame): Dataframe containing data
-        buy_cost_pct (float): cost for buying shares
-        sell_cost_pct (float): cost for selling shares
+        transaction_cost (float): cost for buying or selling shares
         hmax (int): max number of share purchases allowed per asset
-        min_shares (int): minimum number of shares to be bought in each trade. Has to be >= 1
         turbulence_threshold (float): Maximum turbulence allowed in market for purchases to occur. If exceeded, positions are liquidated
         print_verbosity(int): When iterating (step), how often to print stats about state of env
         initial_amount: (int, float): Amount of cash initially available
-        daily_information_columns (list(str)): Columns to use when building state space from the dataframe. It could be OHLC columns or any other variables such as technical indicators and turbulence index
+        daily_information_columns (list(str)): Columns to use when building state space from the dataframe.
         out_of_cash_penalty (int, float): Penalty to apply if the algorithm runs out of cash
-        discrete_actions (bool): option to choose whether perform dicretization on actions space or not
 
     action space: <share_dollar_purchases>
 
@@ -55,37 +52,28 @@ class StockTradingEnvCashpenalty(gym.Env):
     def __init__(
         self,
         df,
-        buy_cost_pct=3e-3,
-        sell_cost_pct=3e-3,
+        transaction_cost_pct=3e-3,
         date_col_name="date",
         hmax=10,
-        min_shares=1,
         turbulence_threshold=None,
         print_verbosity=10,
         initial_amount=1e6,
         daily_information_cols=["open", "close", "high", "low", "volume"],
         cache_indicator_data=True,
         cash_penalty_proportion=0.1,
         random_start=True,
-        discrete_actions=False,
-        currency="$",
     ):
         self.df = df
         self.stock_col = "tic"
         self.assets = df[self.stock_col].unique()
         self.dates = df[date_col_name].sort_values().unique()
         self.random_start = random_start
-        self.discrete_actions = discrete_actions
-        self.currency = currency
 
-        self.df = self.df.set_index(date_col_name
-        self.min_shares = min_shares
-        self.hmax = hmax / min_shares
-        self.initial_amount = initial_amount / min_shares
+        self.df = self.df.set_index(date_col_name)
+        self.hmax = hmax
+        self.initial_amount = initial_amount
         self.print_verbosity = print_verbosity
-        self.buy_cost_pct = buy_cost_pct
-        self.sell_cost_pct = sell_cost_pct
-        self.turbulence_threshold = turbulence_threshold
+        self.transaction_cost_pct = transaction_cost_pct
         self.daily_information_cols = daily_information_cols
         self.state_space = (
             1 + len(self.assets) + len(self.assets) * len(self.daily_information_cols)
@@ -94,7 +82,6 @@ def __init__(
         self.observation_space = spaces.Box(
             low=-np.inf, high=np.inf, shape=(self.state_space,)
         )
-        self.turbulence = 0 
         self.episode = -1  # initialize so we can call reset
         self.episode_history = []
         self.printed_header = False
@@ -126,7 +113,6 @@ def reset(self):
         else:
             self.starting_point = 0
         self.date_index = self.starting_point
-        self.turbulence = 0
         self.episode += 1
         self.actions_memory = []
         self.transaction_memory = []
@@ -165,16 +151,10 @@ def return_terminal(self, reason="Last Date", reward=0):
         state = self.state_memory[-1]
         self.log_step(reason=reason, terminal_reward=reward)
         # Add outputs to logger interface
-        logger.record("environment/total_assets", int(self.min_shares*self.account_information['total_assets'][-1]))
         reward_pct = self.account_information["total_assets"][-1] / self.initial_amount
         logger.record("environment/total_reward_pct", (reward_pct - 1) * 100)
-        logger.record("environment/total_trades", self.sum_trades)
         logger.record(
-            "environment/avg_daily_trades",
-            self.sum_trades / (self.current_step),
-        )
-        logger.record(
-            "environment/avg_daily_trades_per_asset",
+            "environment/daily_trades",
             self.sum_trades / (self.current_step) / len(self.assets),
         )
         logger.record("environment/completed_steps", self.current_step)
@@ -195,33 +175,28 @@ def log_step(self, reason, terminal_reward=None):
             self.account_information["cash"][-1]
             / self.account_information["total_assets"][-1]
         )
-        gl_pct = self.account_information["total_assets"][-1] / self.initial_amount
         rec = [
             self.episode,
             self.date_index - self.starting_point,
             reason,
-            f"{self.currency}{'{:0,.0f}'.format(float(self.min_shares*self.account_information['cash'][-1]))}",
-            f"{self.currency}{'{:0,.0f}'.format(float(self.min_shares*self.account_information['total_assets'][-1]))}",
+            f"${int(self.account_information['total_assets'][-1])}",
             f"{terminal_reward*100:0.5f}%",
-            f"{(gl_pct - 1)*100:0.5f}%",
             f"{cash_pct*100:0.2f}%",
         ]
 
         self.episode_history.append(rec)
         print(self.template.format(*rec))
 
     def log_header(self):
-        self.template = "{0:4}|{1:4}|{2:15}|{3:15}|{4:15}|{5:10}|{6:10}|{7:10}"  # column widths: 8, 10, 15, 7, 10
+        self.template = "{0:4}|{1:4}|{2:15}|{3:10}|{4:10}|{5:10}"  # column widths: 8, 10, 15, 7, 10
         print(
             self.template.format(
                 "EPISODE",
                 "STEPS",
                 "TERMINAL_REASON",
-                "CASH",
                 "TOT_ASSETS",
                 "TERMINAL_REWARD_unsc",
-                "GAINLOSS_PCT",
-                "CASH_PROPORTION",
+                "CASH_PCT",
             )
         )
         self.printed_header = True
@@ -266,47 +241,33 @@ def step(self, actions):
             reward = self.get_reward()
 
             # log the values of cash, assets, and total assets
-            self.account_information["cash"].append(self.min_shares*begin_cash)
-            self.account_information["asset_value"].append(self.min_shares*asset_value)
-            self.account_information["total_assets"].append(self.min_shares*(begin_cash + asset_value))
+            self.account_information["cash"].append(begin_cash)
+            self.account_information["asset_value"].append(asset_value)
+            self.account_information["total_assets"].append(begin_cash + asset_value)
             self.account_information["reward"].append(reward)
 
             # multiply action values by our scalar multiplier and save
-            actions = actions * self.hmax 
-            self.actions_memory.append(actions*closings) #capture what the model's trying to do
-
-            #buy/sell only if the price is > 0 (no missing data in this particular date) 
-            actions = np.where(closings>0,actions,0)
-
-            if self.turbulence_threshold is not None:
-                # if turbulence goes over threshold, just clear out all positions
-                if self.turbulence>=self.turbulence_threshold:
-                    actions = -(np.array(holdings) * closings)
-                    self.log_step(reason="TURBULENCE")
-
-            # scale cash purchases to asset
-            if self.discrete_actions:
-                #convert into integer because we can't buy fraction of shares
-                actions = np.where(closings>0,actions//closings,0)
-                actions = (actions.astype(int))
-            else:
-                actions = np.where(closings>0,actions/closings,0)
+            actions = actions * self.hmax
+            self.actions_memory.append(actions)
+
+            # scale cash purchases to asset # changes
+            actions = actions / closings
 
+
             # clip actions so we can't sell more assets than we hold
             actions = np.maximum(actions, -np.array(holdings))
+            self.transaction_memory.append(actions)
 
-            self.transaction_memory.append(actions) #capture what the model's could do
-
-            # compute our proceeds from sells, and add to cash
+            # compute our proceeds from sales, and add to cash
             sells = -np.clip(actions, -np.inf, 0)
             proceeds = np.dot(sells, closings)
-            costs = proceeds * self.sell_cost_pct
+            costs = proceeds * self.transaction_cost_pct
             coh = begin_cash + proceeds
 
             # compute the cost of our buys
             buys = np.clip(actions, 0, np.inf)
             spend = np.dot(buys, closings)
-            costs += spend * self.buy_cost_pct
+            costs += spend * self.transaction_cost_pct
 
             # if we run out of cash, end the cycle and penalize
             if (spend + costs) > coh:
@@ -321,17 +282,11 @@ def step(self, actions):
             # update our holdings
             coh = coh - spend - costs
             holdings_updated = holdings + actions
-
             self.date_index += 1
-            if self.turbulence_threshold is not None:     
-                self.turbulence = self.get_date_vector(self.date_index,cols=["turbulence"])[0]
-
-            #Update State
             state = (
                 [coh] + list(holdings_updated) + self.get_date_vector(self.date_index)
             )
             self.state_memory.append(state)
-
             return state, reward, False, {}
 
     def get_sb_env(self):