A few updates.

lil-lab · Dec 16, 2019 · beacb25 · beacb25
1 parent e7fcc2d
commit beacb25
Show file tree

Hide file tree

Showing 8 changed files with 2,914,040 additions and 6 deletions.
diff --git a/.gitattributes b/.gitattributes
@@ -1 +1,2 @@
 *.pkl filter=lfs diff=lfs merge=lfs -text
+*.json filter=lfs diff=lfs merge=lfs -text
diff --git a/agent/scripts/finetune_end_to_end.sh b/agent/scripts/finetune_end_to_end.sh
@@ -1,22 +1,28 @@
 #!/usr/bin/env bash
 export EXPERIMENT_NAME="test_experiment_finetuned"
-export PLAN_PREDICTOR_FILEPATH="SET THIS PARAMETER TO YOUR BEST MODEL SAVE"
-export ACTION_GENERATOR_FILEPATH="SET THIS PARAMETER TO YOUR BEST MODEL SAVE"
+export PLAN_PREDICTOR_FILEPATH="agent/experiments/fixed_plan_predictor/model_16_bestacc.pt"
+export ACTION_GENERATOR_FILEPATH="agent/experiments/fixed_action_generator_partial_observability/model_12_card.pt"
 
 CUDA_VISIBLE_DEVICES=0 python -m agent.scripts.main \
                   --saved_game_dir="data/" \
                   --game_state_filename="agent/preprocessed/game_states.pkl" \
                   --save_dir="agent/experiments/" \
                   --experiment_name=${EXPERIMENT_NAME} \
-                  --model_type=PLAN_PREDICTOR \
+                  --model_type=ACTION_GENERATOR \
                   --end_to_end=True \
-                  --aggregate_examples=True \
+                  --aggregate_examples=False \
+                  --maximum_number_examples=10 \
                   --generate_new_cards=True \
+                  --full_observability=False \
                   --finetune_auxiliary_coefficient_intermediate_goal_probabilities=1. \
                   --finetune_auxiliary_coefficient_final_goal_probabilities=1. \
                   --finetune_auxiliary_coefficient_obstacle_probabilities=0.1 \
                   --finetune_auxiliary_coefficient_avoid_probabilities=0.1 \
                   --finetune_auxiliary_coefficient_trajectory_distribution=1. \
                   --finetune_auxiliary_coefficient_implicit_actions=0.7 \
+                  --use_trajectory_distribution=True \
+                  --use_goal_probabilities=True \
+                  --use_obstacle_probabilities=True \
+                  --use_avoid_probabilities=True \
                   --pretrained_plan_predictor_filepath=${PLAN_PREDICTOR_FILEPATH} \
                   --pretrained_action_generator_filepath=${ACTION_GENERATOR_FILEPATH}
diff --git a/data/README.md b/data/README.md
@@ -21,6 +21,7 @@ Unity.
 * `num_cards`: The number of cards on the board in the game.
 All games use 21 cards.
 * `leader_id`: An anonymized Turker ID (consistently
+* `follower_id`: An anonymized Turker ID (consistently
 anonymized across all data).
 * `actions`: A list of actions taken by both players in the
 game.
@@ -121,6 +122,7 @@ the leader can give instructions.
 * `completed`: Whether the follower marked the instruction
 as completed. The last few instructions may be incomplete
 if the game ended before the follower could get to them.
+* `set_result` and `card_result` 
 
 Similar to above, the following field is stored for
 redundancy and data analysis only and will be re-computed
@@ -137,14 +139,15 @@ additional field:
 * `instruction_id`: The instruction index being marked
 as complete.
 
+It doesn't have the `turn_id` field for now.
 
 ## Preprocessed game states
 
 ## TODO
 
 - [ ] `turn_id` field for `finish command`
-- [ ] Remove fields like `card_result` and `set_result` in instruction
-- [ ] Anonymize Turker IDs
+- [x] Remove fields like `card_result`dd and `set_result` in instruction
+- [x] Anonymize Turker IDs
 - [x] Make sure that the game states file doesn't have any
 PII for turkers
 - [x] Add in the preprocessed game state pickle file
Original file line number	Diff line number	Diff line change
		@@ -1 +1,2 @@
		*.pkl filter=lfs diff=lfs merge=lfs -text
		*.json filter=lfs diff=lfs merge=lfs -text