Skip to content

Commit

Permalink
with obstacles,obstacles changed, update only upon finding a closer p…
Browse files Browse the repository at this point in the history
…oint
  • Loading branch information
jangirrishabh committed Jul 3, 2016
1 parent 8eb5f4d commit 9fbdefe
Show file tree
Hide file tree
Showing 56 changed files with 29,129 additions and 24,322 deletions.
Binary file modified __pycache__/learning.cpython-34.pyc
Binary file not shown.
Binary file modified __pycache__/nn.cpython-34.pyc
Binary file not shown.
Binary file modified __pycache__/playing.cpython-34.pyc
Binary file not shown.
Binary file modified flat_game/__pycache__/carmunk.cpython-34.pyc
Binary file not shown.
95 changes: 62 additions & 33 deletions flat_game/carmunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
screen.set_alpha(None)

# Showing sensors and redrawing slows things down.
show_sensors = 1
draw_screen = 1
flag = 1
show_sensors = flag
draw_screen = flag


class GameState:
Expand All @@ -35,7 +36,7 @@ def __init__(self, weights):
self.W = weights #weights for the reward function

# Create the car.
self.create_car(100, 100, 0.5)
self.create_car(150, 20, 15)

# Record steps.
self.num_steps = 0
Expand Down Expand Up @@ -65,22 +66,49 @@ def __init__(self, weights):

# Create some obstacles, semi-randomly.
# We'll create three and they'll move around to prevent over-fitting.
#self.obstacles = []
#self.obstacles.append(self.create_obstacle(230, 220, 100, "yellow"))
#self.obstacles.append(self.create_obstacle(250, 500, 100, "yellow"))
#self.obstacles.append(self.create_obstacle(780, 350, 100, "brown"))
#self.obstacles.append(self.create_obstacle(550, 200, 70, "brown"))
#self.obstacles.append(self.create_obstacle(530, 520, 100, "brown"))
#self.obstacles.append(self.create_obstacle(600, 600, 35))
self.obstacles = []
#self.obstacles.append(self.create_obstacle(380, 220, 70, "yellow"))
#self.obstacles.append(self.create_obstacle(250, 500, 70, "yellow"))
#self.obstacles.append(self.create_obstacle(780, 330, 70, "brown"))
#self.obstacles.append(self.create_obstacle(530, 500, 70, "brown"))

self.obstacles.append(self.create_obstacle([100, 100], [100, 585] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([450, 600], [100, 600] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([900, 100], [900, 585] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([900, 600], [550, 600] , 7, "yellow"))

self.obstacles.append(self.create_obstacle([200, 100], [200, 480] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([450, 500], [200, 500] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([800, 100], [800, 480] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([800, 500], [550, 500] , 7, "yellow"))

self.obstacles.append(self.create_obstacle([300, 100], [300, 350] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([700, 380], [300, 380] , 7, "yellow"))
self.obstacles.append(self.create_obstacle([700, 100], [700, 350] , 7, "yellow"))

self.obstacles.append(self.create_obstacle([400, 100], [600, 100] , 7, "brown"))
self.obstacles.append(self.create_obstacle([400, 200], [600, 200] , 7, "brown"))
self.obstacles.append(self.create_obstacle([400, 300], [600, 300] , 7, "brown"))

self.obstacles.append(self.create_obstacle([700, 370], [300, 370] , 7, "brown"))
self.obstacles.append(self.create_obstacle([310, 100], [310, 350] , 7, "brown"))
self.obstacles.append(self.create_obstacle([690, 100], [690, 350] , 7, "brown"))




# Create a cat.
self.create_cat()
#self.create_cat()

def create_obstacle(self, x, y, r, color):
def create_obstacle(self, xy1, xy2, r, color):
c_body = pymunk.Body(pymunk.inf, pymunk.inf)
c_shape = pymunk.Circle(c_body, r)
c_shape.elasticity = 1.0
c_body.position = x, y
#c_shape = pymunk.Circle(c_body, r)
c_shape = pymunk.Segment(c_body, xy1, xy2, r)
#c_shape.elasticity = 1.0
#c_body.position = x, y
c_shape.friction = 1.
c_shape.group = 1
c_shape.collision_type = 1
c_shape.color = THECOLORS[color]
self.space.add(c_body, c_shape)
return c_body
Expand All @@ -100,27 +128,27 @@ def create_car(self, x, y, r):
inertia = pymunk.moment_for_circle(1, 0, 14, (0, 0))
self.car_body = pymunk.Body(1, inertia)
self.car_body.position = x, y
self.car_shape = pymunk.Circle(self.car_body, 25)
self.car_shape = pymunk.Circle(self.car_body, r)
self.car_shape.color = THECOLORS["green"]
self.car_shape.elasticity = 1.0
self.car_body.angle = r
self.car_body.angle = 1.4
driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
self.car_body.apply_impulse(driving_direction)
self.space.add(self.car_body, self.car_shape)

def frame_step(self, action):
if action == 0: # Turn left.
self.car_body.angle -= .2
self.car_body.angle -= .3
elif action == 1: # Turn right.
self.car_body.angle += .2
self.car_body.angle += .3

#Move obstacles.
#if self.num_steps % 100 == 0:
#self.move_obstacles()

# Move cat.
if self.num_steps % 5 == 0:
self.move_cat()
#if self.num_steps % 5 == 0:
#self.move_cat()

driving_direction = Vec2d(1, 0).rotated(self.car_body.angle)
self.car_body.velocity = 100 * driving_direction
Expand All @@ -141,11 +169,12 @@ def frame_step(self, action):
# Car crashed when any reading == 1
if self.car_is_crashed(readings):
self.crashed = True
readings.append(-1)
readings.append(1)
self.recover_from_crash(driving_direction)
else:
readings.append(0)


reward = np.dot(self.W, readings)
state = np.array([readings])

Expand Down Expand Up @@ -182,7 +211,7 @@ def recover_from_crash(self, driving_direction):
self.crashed = False
for i in range(10):
self.car_body.angle += .2 # Turn a little.
screen.fill(THECOLORS["red"]) # Red is scary!
#screen.fill(THECOLORS["red"]) # Red is scary!
draw(screen, self.space)
self.space.step(1./10)
if draw_screen:
Expand Down Expand Up @@ -227,16 +256,16 @@ def get_sonar_readings(self, x, y, angle):
ObstacleNumber[2] += 1
elif i == 3:
ObstacleNumber[3] += 1


# Rotate them and get readings.
readings.append(1.0 - float(self.get_arm_distance(arm_left, x, y, angle, 0.75)[0]/39)) # 39 = max distance
readings.append(1.0 - float(self.get_arm_distance(arm_middle, x, y, angle, 0)[0]/39))
readings.append(1.0 - float(self.get_arm_distance(arm_right, x, y, angle, -0.75)[0]/39))
readings.append(float(ObstacleNumber[0]/3))
readings.append(float(ObstacleNumber[1]/3))
readings.append(float(ObstacleNumber[2]/3))
readings.append(float(ObstacleNumber[3]/3))
readings.append(1.0 - float(self.get_arm_distance(arm_left, x, y, angle, 0.75)[0]/39.0)) # 39 = max distance
readings.append(1.0 - float(self.get_arm_distance(arm_middle, x, y, angle, 0)[0]/39.0))
readings.append(1.0 - float(self.get_arm_distance(arm_right, x, y, angle, -0.75)[0]/39.0))
readings.append(float(ObstacleNumber[0]/3.0))
readings.append(float(ObstacleNumber[1]/3.0))
readings.append(float(ObstacleNumber[2]/3.0))
readings.append(float(ObstacleNumber[3]/3.0))

if show_sensors:
pygame.display.update()
Expand Down Expand Up @@ -274,8 +303,8 @@ def get_arm_distance(self, arm, x, y, angle, offset):
return [i, 0] #sensor did not hit anything return 0 for black space

def make_sonar_arm(self, x, y):
spread = 10 # Default spread.
distance = 20 # Gap before first sensor.
spread = 8 # Default spread.
distance = 7 # Gap before first sensor.
arm_points = []
# Make an arm. We build it flat because we'll rotate it about the
# center later.
Expand Down
27 changes: 18 additions & 9 deletions learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ def train_net(model, params, weights):

observe = 1000 # Number of frames to observe before training.
epsilon = 1
train_frames = 1000000 # Number of frames to play. 1000000
train_frames = 25000 # Number of frames to play. 1000000
train_frames_number = 25000
batchSize = params['batchSize']
buffer = params['buffer']

Expand All @@ -40,7 +41,7 @@ def train_net(model, params, weights):
start_time = timeit.default_timer()

# Run the frames.
while t < train_frames:
while t < train_frames_number:

t += 1
car_distance += 1
Expand Down Expand Up @@ -89,7 +90,7 @@ def train_net(model, params, weights):
epsilon -= (1/train_frames)

# We died, so update stuff.
if state[0][7] == -1:
if state[0][7] == 1:
# Log the car's distance at this T.
data_collect.append([t, car_distance])

Expand All @@ -102,16 +103,16 @@ def train_net(model, params, weights):
fps = car_distance / tot_time

# Output some stuff so we can watch.
print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
(max_car_distance, t, epsilon, car_distance, fps))
#print("Max: %d at %d\tepsilon %f\t(%d)\t%f fps" %
#(max_car_distance, t, epsilon, car_distance, fps))

# Reset.
car_distance = 0
start_time = timeit.default_timer()

# Save the model every 25,000 frames.
if t % 25000 == 0:
model.save_weights('saved-models/' + filename + '-' +
model.save_weights('saved-models_brown/' + filename + '-' +
str(t) + '.h5',
overwrite=True)
print("Saving model %s - %d" % (filename, t))
Expand Down Expand Up @@ -154,7 +155,7 @@ def process_minibatch(minibatch, model):
#update = (reward_m + (GAMMA * maxQ))
#else: # terminal state
#update = reward_m
if new_state_m[0][7] == -1: #terminal state
if new_state_m[0][7] == 1: #terminal state
update = reward_m
else: # non-terminal state
update = (reward_m + (GAMMA * maxQ))
Expand Down Expand Up @@ -202,8 +203,16 @@ def IRL_helper(weights):


if __name__ == "__main__":
#weights = [-0.1884167 , -0.4677432 , 0.4095033 , -0.16976284 ,-0.03272345 , 0.70967888, -0.13741348 , 0.1600177 ] #plain anti with obstacles 1000
weights = [ 5.39138627e-04 , -6.46492771e-02 , 5.83850255e-01 , -2.20347551e-02 ,-2.01624004e-07, -8.21216705e-08 , -8.08983022e-01 , 1.16252812e-05]
#weights = [ 0.04971919, -0.49727854 ,-0.26373486 ,-0.5413812 , 0.16655347 ,-0.10348452 ,0.577155 , 0.12663088] # clock obstacles 25000
#weights = [ 0.01919929, -0.45153034 ,-0.06908693, -0.75848042 ,0.33990325 ,-0.08139198, 0.29796847 , 0.0688629 ] # clock obstacles 75000
#weights = [-0.82912921 , 0.09203298 , 0.41825967 , 0.23083735 ,-0.06848747 , 0.14014116 ,-0.12718711 ,-0.18799206]

#weights = [-0.08805555, -0.06245599 , 0.09146864 ,-0.01147858 , 0.66908548 ,-0.07713598 ,-0.66502319 ,-0.28976889]
weights = [ 0.2798415 , 0.54756635 ,-0.55969074 , 0.18558382 , 0.01366991 ,-0.1315585, -0.10050859 ,-0.4965564 ]







Expand Down
20 changes: 15 additions & 5 deletions manualControl.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,28 +22,38 @@ def play(screen):
_, state, __ = game_state.frame_step((2))

featureExpectations = np.zeros(len(weights))
Prev = np.zeros(len(weights))

# Move.
while True:
car_distance += 1
event = screen.getch()

if event == curses.KEY_LEFT:
action = 0
elif event == curses.KEY_RIGHT:
action = 1
elif event == curses.KEY_RIGHT:
action = 0
elif event == curses.KEY_DOWN:
break
else:
action = 2


# Take action.
immediateReward , state, readings = game_state.frame_step(action)
featureExpectations += (GAMMA**(car_distance-1))*np.array(readings)
if car_distance > 100:
featureExpectations += (GAMMA**(car_distance-101))*np.array(readings)


# Tell us something.
if car_distance % 2000 == 0:
changePercentage = (np.linalg.norm(featureExpectations - Prev)*100.0)/np.linalg.norm(featureExpectations)

print (car_distance)
print ("percentage change in Feature expectation ::", changePercentage)
Prev = np.array(featureExpectations)

if car_distance % 1000 == 0:
break


return featureExpectations

Expand Down
18 changes: 11 additions & 7 deletions playing.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,19 +26,21 @@ def play(model, weights):

# Choose action.
action = (np.argmax(model.predict(state, batch_size=1)))
print ("Action ", action)
#print ("Action ", action)

# Take action.
immediateReward , state, readings = game_state.frame_step(action)
print ("immeditate reward:: ", immediateReward)
print ("readings :: ", readings)
featureExpectations += (GAMMA**(car_distance-1))*np.array(readings)
print ("Feature Expectations :: ", featureExpectations)
#print ("immeditate reward:: ", immediateReward)
#print ("readings :: ", readings)
if car_distance > 100:
featureExpectations += (GAMMA**(car_distance-101))*np.array(readings)
#print ("Feature Expectations :: ", featureExpectations)
# Tell us something.
if car_distance % 2000 == 0:
print("Current distance: %d frames." % car_distance)
break


return featureExpectations

if __name__ == "__main__":
Expand All @@ -47,8 +49,10 @@ def play(model, weights):
#saved_model = 'saved-models/clock/164-150-100-50000-25000.h5' # [ 756.72859592 723.5764696 619.23933676 0.]
#saved_model = 'saved-models/antiClock/164-150-100-50000-25000.h5' #[ 662.72064093 689.52239795 894.57495776 0. ]
#saved_model = 'saved-models/antiClock/164-150-100-50000-50000.h5' #[ 676.41503823 752.38417361 753.90576239 0. ]
saved_model = 'saved-models/164-150-100-50000-25000.h5'
weights = [ -5.31699058e-01 , -6.03381696e-01 , 5.56388439e-01 , -1.64570933e-01 , -1.71304905e-07 , -6.97726694e-08 , -9.89495334e-02 , 8.23646987e-02] # plain anti without obs 2000
saved_model = 'saved-models_brown/164-150-100-50000-25000.h5'
#weights = [-0.41517549 ,-0.20823906 ,0.28402821 , 0.23587648 ,0.12459162 , 0.45047069 ] # around the brown obs 75000
weights = [-0.79380502 , 0.00704546 , 0.50866139 , 0.29466834, -0.07636144 , 0.09153848 ,-0.02632325 ,-0.09672041]


model = neural_net(NUM_SENSORS, [164, 150], saved_model)
print (play(model, weights))
Loading

0 comments on commit 9fbdefe

Please sign in to comment.