with obstacles,obstacles changed, update only upon finding a closer p…

…oint
jangirrishabh · Jul 3, 2016 · 9fbdefe · 9fbdefe
1 parent 8eb5f4d
commit 9fbdefe
Show file tree

Hide file tree

Showing 56 changed files with 29,129 additions and 24,322 deletions.
diff --git a/__pycache__/learning.cpython-34.pyc b/__pycache__/learning.cpython-34.pyc
diff --git a/__pycache__/nn.cpython-34.pyc b/__pycache__/nn.cpython-34.pyc
diff --git a/__pycache__/playing.cpython-34.pyc b/__pycache__/playing.cpython-34.pyc
diff --git a/flat_game/__pycache__/carmunk.cpython-34.pyc b/flat_game/__pycache__/carmunk.cpython-34.pyc
diff --git a/flat_game/carmunk.py b/flat_game/carmunk.py
@@ -20,8 +20,9 @@
 screen.set_alpha(None)
 
 # Showing sensors and redrawing slows things down.
-show_sensors = 1
-draw_screen = 1
+flag = 1
+show_sensors = flag
+draw_screen = flag
 
 
 class GameState:
@@ -35,7 +36,7 @@ def __init__(self, weights):
         self.W = weights #weights for the reward function
 
         # Create the car.
-        self.create_car(100, 100, 0.5)
+        self.create_car(150, 20, 15)
 
         # Record steps.
         self.num_steps = 0
@@ -65,22 +66,49 @@ def __init__(self, weights):
 
         # Create some obstacles, semi-randomly.
         # We'll create three and they'll move around to prevent over-fitting.
-        #self.obstacles = []
-        #self.obstacles.append(self.create_obstacle(230, 220, 100, "yellow"))
-        #self.obstacles.append(self.create_obstacle(250, 500, 100, "yellow"))
-        #self.obstacles.append(self.create_obstacle(780, 350, 100, "brown"))
-        #self.obstacles.append(self.create_obstacle(550, 200, 70, "brown"))
-        #self.obstacles.append(self.create_obstacle(530, 520, 100, "brown"))
-        #self.obstacles.append(self.create_obstacle(600, 600, 35))
+        self.obstacles = []
+        #self.obstacles.append(self.create_obstacle(380, 220, 70, "yellow"))
+        #self.obstacles.append(self.create_obstacle(250, 500, 70, "yellow"))
+        #self.obstacles.append(self.create_obstacle(780, 330, 70, "brown"))
+        #self.obstacles.append(self.create_obstacle(530, 500, 70, "brown"))
+
+        self.obstacles.append(self.create_obstacle([100, 100], [100, 585] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([450, 600], [100, 600] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([900, 100], [900, 585] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([900, 600], [550, 600] , 7, "yellow"))
+
+        self.obstacles.append(self.create_obstacle([200, 100], [200, 480] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([450, 500], [200, 500] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([800, 100], [800, 480] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([800, 500], [550, 500] , 7, "yellow"))
+
+        self.obstacles.append(self.create_obstacle([300, 100], [300, 350] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([700, 380], [300, 380] , 7, "yellow"))
+        self.obstacles.append(self.create_obstacle([700, 100], [700, 350] , 7, "yellow"))
+
+        self.obstacles.append(self.create_obstacle([400, 100], [600, 100] , 7, "brown"))
+        self.obstacles.append(self.create_obstacle([400, 200], [600, 200] , 7, "brown"))
+        self.obstacles.append(self.create_obstacle([400, 300], [600, 300] , 7, "brown"))
+
+        self.obstacles.append(self.create_obstacle([700, 370], [300, 370] , 7, "brown"))
+        self.obstacles.append(self.create_obstacle([310, 100], [310, 350] , 7, "brown"))
+        self.obstacles.append(self.create_obstacle([690, 100], [690, 350] , 7, "brown"))
+
+
+
 
         # Create a cat.
-        self.create_cat()
+        #self.create_cat()
 
-    def create_obstacle(self, x, y, r, color):
+    def create_obstacle(self, xy1, xy2, r, color):
         c_body = pymunk.Body(pymunk.inf, pymunk.inf)
-        c_shape = pymunk.Circle(c_body, r)
-        c_shape.elasticity = 1.0
-        c_body.position = x, y
+        #c_shape = pymunk.Circle(c_body, r)
+        c_shape = pymunk.Segment(c_body, xy1, xy2, r)
+        #c_shape.elasticity = 1.0
+        #c_body.position = x, y
+        c_shape.friction = 1.
+        c_shape.group = 1
+        c_shape.collision_type = 1
         c_shape.color = THECOLORS[color]
         self.space.add(c_body, c_shape)
         return c_body
@@ -100,27 +128,27 @@ def create_car(self, x, y, r):
         inertia = pymunk.moment_for_circle(1, 0, 14, (0, 0))
         self.car_body = pymunk.Body(1, inertia)
         self.car_body.position = x, y
-        self.car_shape = pymunk.Circle(self.car_body, 25)
+        self.car_shape = pymunk.Circle(self.car_body, r)
         self.car_shape.color = THECOLORS["green"]
         self.car_shape.elasticity = 1.0
-        self.car_body.angle = r
+        self.car_body.angle = 1.4
         driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
         self.car_body.apply_impulse(driving_direction)
         self.space.add(self.car_body, self.car_shape)
 
     def frame_step(self, action):
         if action == 0:  # Turn left.
-            self.car_body.angle -= .2
+            self.car_body.angle -= .3
         elif action == 1:  # Turn right.
-            self.car_body.angle += .2
+            self.car_body.angle += .3
 
         #Move obstacles.
         #if self.num_steps % 100 == 0:
             #self.move_obstacles()
 
         # Move cat.
-        if self.num_steps % 5 == 0:
-            self.move_cat()
+        #if self.num_steps % 5 == 0:
+            #self.move_cat()
 
         driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
         self.car_body.velocity = 100 * driving_direction
@@ -141,11 +169,12 @@ def frame_step(self, action):
         # Car crashed when any reading == 1
         if self.car_is_crashed(readings):
             self.crashed = True
-            readings.append(-1)
+            readings.append(1)
             self.recover_from_crash(driving_direction)
         else:
             readings.append(0)
 
+
         reward = np.dot(self.W, readings)
         state = np.array([readings])
 
@@ -182,7 +211,7 @@ def recover_from_crash(self, driving_direction):
             self.crashed = False
             for i in range(10):
                 self.car_body.angle += .2  # Turn a little.
-                screen.fill(THECOLORS["red"])  # Red is scary!
+                #screen.fill(THECOLORS["red"])  # Red is scary!
                 draw(screen, self.space)
                 self.space.step(1./10)
                 if draw_screen:
@@ -227,16 +256,16 @@ def get_sonar_readings(self, x, y, angle):
                 ObstacleNumber[2] += 1
             elif i == 3:
                 ObstacleNumber[3] += 1
-
+           
 
         # Rotate them and get readings.
-        readings.append(1.0 - float(self.get_arm_distance(arm_left, x, y, angle, 0.75)[0]/39)) # 39 = max distance
-        readings.append(1.0 - float(self.get_arm_distance(arm_middle, x, y, angle, 0)[0]/39))
-        readings.append(1.0 - float(self.get_arm_distance(arm_right, x, y, angle, -0.75)[0]/39))
-        readings.append(float(ObstacleNumber[0]/3))
-        readings.append(float(ObstacleNumber[1]/3))
-        readings.append(float(ObstacleNumber[2]/3))
-        readings.append(float(ObstacleNumber[3]/3))
+        readings.append(1.0 - float(self.get_arm_distance(arm_left, x, y, angle, 0.75)[0]/39.0)) # 39 = max distance
+        readings.append(1.0 - float(self.get_arm_distance(arm_middle, x, y, angle, 0)[0]/39.0))
+        readings.append(1.0 - float(self.get_arm_distance(arm_right, x, y, angle, -0.75)[0]/39.0))
+        readings.append(float(ObstacleNumber[0]/3.0))
+        readings.append(float(ObstacleNumber[1]/3.0))
+        readings.append(float(ObstacleNumber[2]/3.0))
+        readings.append(float(ObstacleNumber[3]/3.0))
 
         if show_sensors:
             pygame.display.update()
@@ -274,8 +303,8 @@ def get_arm_distance(self, arm, x, y, angle, offset):
         return [i, 0] #sensor did not hit anything return 0 for black space
 
     def make_sonar_arm(self, x, y):
-        spread = 10  # Default spread.
-        distance = 20  # Gap before first sensor.
+        spread = 8  # Default spread.
+        distance = 7  # Gap before first sensor.
         arm_points = []
         # Make an arm. We build it flat because we'll rotate it about the
         # center later.

diff --git a/learning.py b/learning.py
@@ -17,7 +17,8 @@ def train_net(model, params, weights):
 
     observe = 1000  # Number of frames to observe before training.
     epsilon = 1
-    train_frames = 1000000  # Number of frames to play. 1000000
+    train_frames = 25000  # Number of frames to play. 1000000
+    train_frames_number = 25000
     batchSize = params['batchSize']
     buffer = params['buffer']
 
@@ -40,7 +41,7 @@ def train_net(model, params, weights):
     start_time = timeit.default_timer()
 
     # Run the frames.
-    while t < train_frames:
+    while t < train_frames_number:
 
         t += 1
         car_distance += 1
@@ -89,7 +90,7 @@ def train_net(model, params, weights):
             epsilon -= (1/train_frames)
 
         # We died, so update stuff.
-        if state[0][7] == -1:
+        if state[0][7] == 1:
             # Log the car's distance at this T.
             data_collect.append([t, car_distance])
 
@@ -102,16 +103,16 @@ def train_net(model, params, weights):
             fps = car_distance / tot_time
 
             # Output some stuff so we can watch.
-            print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
-                  (max_car_distance, t, epsilon, car_distance, fps))
+            #print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
+                  #(max_car_distance, t, epsilon, car_distance, fps))
 
             # Reset.
             car_distance = 0
             start_time = timeit.default_timer()
 
         # Save the model every 25,000 frames.
         if t % 25000 == 0:
-            model.save_weights('saved-models/' + filename + '-' +
+            model.save_weights('saved-models_brown/' + filename + '-' +
                                str(t) + '.h5',
                                overwrite=True)
             print("Saving model %s - %d" % (filename, t))
@@ -154,7 +155,7 @@ def process_minibatch(minibatch, model):
             #update = (reward_m + (GAMMA * maxQ))
         #else:  # terminal state
             #update = reward_m
-        if new_state_m[0][7] == -1:  #terminal state
+        if new_state_m[0][7] == 1:  #terminal state
             update = reward_m
         else:  # non-terminal state
             update = (reward_m + (GAMMA * maxQ))
@@ -202,8 +203,16 @@ def IRL_helper(weights):
 
 
 if __name__ == "__main__":
-    #weights = [-0.1884167 , -0.4677432  , 0.4095033 , -0.16976284 ,-0.03272345 , 0.70967888, -0.13741348 , 0.1600177 ] #plain anti with obstacles 1000
-    weights = [  5.39138627e-04 , -6.46492771e-02  , 5.83850255e-01 , -2.20347551e-02 ,-2.01624004e-07,  -8.21216705e-08 , -8.08983022e-01 ,  1.16252812e-05]
+    #weights = [ 0.04971919, -0.49727854 ,-0.26373486 ,-0.5413812 ,  0.16655347 ,-0.10348452  ,0.577155  ,  0.12663088] # clock obstacles 25000
+    #weights = [ 0.01919929, -0.45153034 ,-0.06908693, -0.75848042  ,0.33990325 ,-0.08139198,  0.29796847 , 0.0688629 ] # clock obstacles 75000
+    #weights =   [-0.82912921 , 0.09203298 , 0.41825967 , 0.23083735 ,-0.06848747 , 0.14014116 ,-0.12718711 ,-0.18799206]
+
+    #weights = [-0.08805555, -0.06245599 , 0.09146864 ,-0.01147858 , 0.66908548 ,-0.07713598 ,-0.66502319 ,-0.28976889]
+    weights = [ 0.2798415  , 0.54756635 ,-0.55969074 , 0.18558382 , 0.01366991 ,-0.1315585, -0.10050859 ,-0.4965564 ]
+
+
+
+
 
 
 

diff --git a/manualControl.py b/manualControl.py
@@ -22,28 +22,38 @@ def play(screen):
     _, state, __ = game_state.frame_step((2))
 
     featureExpectations = np.zeros(len(weights))
+    Prev = np.zeros(len(weights))
 
     # Move.
     while True:
         car_distance += 1
         event = screen.getch()
 
         if event == curses.KEY_LEFT:
-            action = 0
-        elif event == curses.KEY_RIGHT:
             action = 1
+        elif event == curses.KEY_RIGHT:
+            action = 0
         elif event == curses.KEY_DOWN:
             break
         else:
             action = 2
 
-
         # Take action.
         immediateReward , state, readings = game_state.frame_step(action)
-        featureExpectations += (GAMMA**(car_distance-1))*np.array(readings)
+        if car_distance > 100:
+            featureExpectations += (GAMMA**(car_distance-101))*np.array(readings)
+
+
         # Tell us something.
-        if car_distance % 2000 == 0:
+        changePercentage = (np.linalg.norm(featureExpectations - Prev)*100.0)/np.linalg.norm(featureExpectations)
+
+        print (car_distance)
+        print ("percentage change in Feature expectation ::", changePercentage)
+        Prev = np.array(featureExpectations)
+
+        if car_distance % 1000 == 0:
             break
+
 
     return featureExpectations
 

diff --git a/playing.py b/playing.py
@@ -26,19 +26,21 @@ def play(model, weights):
 
         # Choose action.
         action = (np.argmax(model.predict(state, batch_size=1)))
-        print ("Action ", action)
+        #print ("Action ", action)
 
         # Take action.
         immediateReward , state, readings = game_state.frame_step(action)
-        print ("immeditate reward:: ", immediateReward)
-        print ("readings :: ", readings)
-        featureExpectations += (GAMMA**(car_distance-1))*np.array(readings)
-        print ("Feature Expectations :: ", featureExpectations)
+        #print ("immeditate reward:: ", immediateReward)
+        #print ("readings :: ", readings)
+        if car_distance > 100:
+            featureExpectations += (GAMMA**(car_distance-101))*np.array(readings)
+        #print ("Feature Expectations :: ", featureExpectations)
         # Tell us something.
         if car_distance % 2000 == 0:
             print("Current distance: %d frames." % car_distance)
             break
 
+
     return featureExpectations
 
 if __name__ == "__main__":
@@ -47,8 +49,10 @@ def play(model, weights):
     #saved_model = 'saved-models/clock/164-150-100-50000-25000.h5' # [ 756.72859592  723.5764696   619.23933676  0.]
     #saved_model = 'saved-models/antiClock/164-150-100-50000-25000.h5' #[ 662.72064093  689.52239795  894.57495776    0.        ]
     #saved_model = 'saved-models/antiClock/164-150-100-50000-50000.h5' #[ 676.41503823  752.38417361  753.90576239    0.        ]
-    saved_model = 'saved-models/164-150-100-50000-25000.h5'
-    weights = [ -5.31699058e-01 , -6.03381696e-01 ,  5.56388439e-01 , -1.64570933e-01 , -1.71304905e-07 , -6.97726694e-08 , -9.89495334e-02 ,  8.23646987e-02] # plain anti without obs 2000
+    saved_model = 'saved-models_brown/164-150-100-50000-25000.h5'
+    #weights = [-0.41517549 ,-0.20823906  ,0.28402821 , 0.23587648  ,0.12459162 , 0.45047069 ] # around the brown obs 75000
+    weights = [-0.79380502 , 0.00704546 , 0.50866139 , 0.29466834, -0.07636144 , 0.09153848 ,-0.02632325 ,-0.09672041]
+
 
     model = neural_net(NUM_SENSORS, [164, 150], saved_model)
     print (play(model, weights))