manually copy il scripts from dyth-babyai-1.1

vishalbelsare · Jun 17, 2020 · 45ad3bf · 45ad3bf
1 parent 9aea47d
commit 45ad3bf
Show file tree

Hide file tree

Showing 6 changed files with 405 additions and 32 deletions.
diff --git a/babyai/plotting.py b/babyai/plotting.py
@@ -133,7 +133,7 @@ def plot_all_runs(df, regex, quantity='return_mean', x_axis='frames', window=1,
 
 def model_num_samples(model):
     # the number of samples is mangled in the name
-    return int(re.findall('([0-9]+)', model)[0])
+    return int(re.findall('_([0-9]+)', model)[0])
 
 
 def best_within_normal_time(df, regex, patience, limit='epochs', window=1, normal_time=None, summary_path=None):
@@ -190,7 +190,7 @@ def best_within_normal_time(df, regex, patience, limit='epochs', window=1, norma
                                        else int(1e9))
         if df_model[limit].max() < normal_time:
             need_more_time = True
-        print("{: <100} {: <5.4g}\t{: <5.4g}\t{: <5.3g}\t{:.3g}".format(
+        print("{: <50} {: <5.4g}\t{: <5.4g}\t{: <5.3g}\t{:.3g}".format(
             model.split('/')[-1],
             max_within_normal_time * 100,
             success_rate.max() * 100,
@@ -222,26 +222,39 @@ def estimate_sample_efficiency(df, visualize=False, figure_path=None):
     # preprocess the data
     print("{} datapoints".format(len(df)))
     x = np.log2(df['num_samples'].values)
-    y = (df['success_rate'] - 0.99).values * 100
+    y = df['success_rate']
     indices = np.argsort(x)
     x = x[indices]
-    y = y[indices]
+    y = y[indices].values
 
-    if y.min() < -4:
-        print("dropping {} data points with too low performance".format((y < -4).sum()))
-        keep_indices = y > -4
-        x = x[keep_indices]
-        y = y[keep_indices]
-    print("min x: {}, max x: {}, min y: {}, max y: {}".format(x.min(), x.max(), y.min(), y.max()))
-    if (y < 0).sum() < 5:
-        raise ValueError("You have less than 5 datapoints below the threshold.\n"
-                         "Consider running experiments with less examples.")
-    if (y > 0).sum() < 5:
-        raise ValueError("You have less than 5 datapoints above the threshold.\n"
+    success_threshold = 0.99
+    min_datapoints = 5
+    almost_threshold = 0.95
+
+    if (y > success_threshold).sum() < min_datapoints:
+        raise ValueError(f"You have less than {min_datapoints} datapoints above the threshold.\n"
                          "Consider running experiments with more examples.")
+    if ((y > almost_threshold) & (y < success_threshold)).sum() < min_datapoints:
+        raise ValueError(f"You have less than {min_datapoints} datapoints"
+              " for which the threshold is almost crossed.\n"
+              "Consider running experiments with less examples.")
+    # try to throw away the extra points with low performance
+    # the model is not suitable for handling those
+    while True:
+        if ((y[1:] > success_threshold).sum() >= min_datapoints
+                and ((y[1:] > almost_threshold) & (y[1:] < success_threshold)).sum()
+                        >= min_datapoints):
+            print('throwing away x={}, y={}'.format(x[0], y[0]))
+            x = x[1:]
+            y = y[1:]
+        else:
+            break
+
+    print("min x: {}, max x: {}, min y: {}, max y: {}".format(x.min(), x.max(), y.min(), y.max()))
+    y = (y - success_threshold) * 100
 
     # fit an RBF GP
-    kernel = 1.0 * RBF() + WhiteKernel(noise_level_bounds=(1e-10, 1))
+    kernel = 1.0 * RBF() + WhiteKernel(noise_level_bounds=(1e-10, 3))
     gp = GaussianProcessRegressor(kernel=kernel, alpha=0, normalize_y=False).fit(x[:, None], y)
     print("Kernel:", gp.kernel_)
     print("Marginal likelihood:", gp.log_marginal_likelihood_value_)

diff --git a/scripts/il_dataeff.py b/scripts/il_dataeff.py
@@ -4,6 +4,7 @@
 import pandas
 import os
 import json
+import shutil
 
 from babyai import plotting
 
@@ -17,9 +18,9 @@
 parser.add_argument("report")
 args = parser.parse_args()
 
-# if os.path.exists(args.report):
-#     raise ValueError("report directory already exists")
-# os.mkdir(args.report)
+if os.path.exists(args.report):
+    shutil.rmtree(args.report)
+    raise ValueError("report directory already exists")
 
 summary_path = os.path.join(args.report, 'summary.csv')
 figure_path = os.path.join(args.report, 'visualization.png')
@@ -29,10 +30,10 @@
 df_success_rate, normal_time = plotting.best_within_normal_time(
     df_logs, args.regex,
     patience=args.patience, window=args.window, limit=args.limit,
-    summary_path=None)
+    summary_path=summary_path)
 result = plotting.estimate_sample_efficiency(
     df_success_rate, visualize=True, figure_path=figure_path)
 result['normal_time'] = normal_time
 
-# with open(result_path, 'w') as dst:
-#     json.dump(result, dst)
+with open(result_path, 'w') as dst:
+    json.dump(result, dst)
diff --git a/scripts/il_fps.py b/scripts/il_fps.py
@@ -0,0 +1,127 @@
+#!/usr/bin/env python3
+import argparse
+import pandas
+import os
+import json
+import re
+import numpy as np
+
+from babyai import plotting
+from babyai.plotting import model_num_samples
+
+
+parser = argparse.ArgumentParser("Analyze data efficiency of imitation learning")
+parser.add_argument('--path', default='.')
+parser.add_argument("--regex", default='.*')
+parser.add_argument("--patience", default=2, type=int)
+parser.add_argument("--window", default=10, type=int)
+parser.add_argument("--limit", default="frames")
+args = parser.parse_args()
+
+
+
+def best_within_normal_time_mutilated(df, regex, patience, limit='epochs', window=1, normal_time=None, summary_path=None):
+    """
+    Compute the best success rate that is achieved in all runs within the normal time.
+
+    The normal time is defined as `patience * T`, where `T` is the time it takes for the run
+    with the most demonstrations to converge. `window` is the size of the sliding window that is
+    used for smoothing.
+
+    Returns a dataframe with the best success rate for the runs that match `regex`.
+
+    """
+    models = [model for model in df['model'].unique() if re.match(regex, model)]
+    num_samples = [model_num_samples(model) for model in models]
+    print(len(num_samples))
+    # sort models according to the number of samples
+    models, num_samples = zip(*sorted(list(zip(models, num_samples)), key=lambda tupl: tupl[1]))
+
+    maxes = []
+    for model, num in zip(models, num_samples):
+        df_model = df[df['model'] == model]
+        fps = df_model['FPS']
+        # success_rate = df_model['validation_success_rate'].rolling(window, center=True).mean()
+        # print(success_rate.tolist())
+        # success_rate = success_rate[np.logical_not(np.isnan(success_rate))]
+        maxes.append(max(fps))# success_rate))
+    return np.array(maxes)
+
+
+
+
+
+# levels = ['GoTo']
+# archs = ['expert_filmcnn', 'expert_filmcnn_endpool_res_not_conv_bow']
+#
+# all_results = []
+# for level in levels:
+#     arch_results = []
+#     for arch in archs:
+#         results = ''
+#         path = f'../beluga/GoTo/{arch}/'
+#         print(path)
+#
+#         df_logs = pandas.concat(plotting.load_logs(path), sort=True)
+#
+#         maxes = best_within_normal_time_mutilated(
+#             df_logs, args.regex,
+#             patience=args.patience, window=args.window, limit=args.limit,
+#             summary_path=None)
+#         results += f"{np.mean(maxes)}\t"
+#         arch_results.append(results)
+#     all_results.append(arch_results)
+#
+# demos = '\t'
+# for sample in samples:
+#     demos += f'\t{sample}K'
+#
+# print(demos)
+# for level, ar in zip(levels, all_results):
+#     for arch, ar in zip(archs, ar):
+#         print(f'{level}\t{arch}\t' + ar)
+
+
+
+levels = ['GoToRedBallGrey', 'GoToRedBall', 'GoToLocal', 'PutNextLocal', 'PickupLoc']
+archs = ['expert_filmcnn', 'expert_filmcnn_endpool_res', 'expert_filmcnn_endpool_res_not_conv_bow']
+# samples = [5, 10, 25, 50, 100, 250, 500]
+samples = [5, 10, 50, 100, 500]
+
+# all_results = []
+# for level in levels:
+#     arch_results = []
+#     for sample in samples:
+#         results = ''
+#         for arch in archs:
+#             path = f'../beluga/il/{level}/{arch}/{sample}000/'
+#             print(path)
+#
+#             df_logs = pandas.concat(plotting.load_logs(path), sort=True)
+#             # print(df_logs['model'].unique().tolist())
+#             models = [model for model in df_logs['model'].unique() if re.match(args.regex, model)]
+#
+#             maxes = best_within_normal_time_mutilated(
+#                 df_logs, args.regex,
+#                 patience=args.patience, window=args.window, limit=args.limit,
+#                 summary_path=None)
+#             results += f"{maxes.mean()}\t{maxes.std()}\t"
+#             # results += f" & {100.*maxes.mean():.1f} $\pm$ {100.*maxes.std():.1f}"
+#         arch_results.append(results)
+#     all_results.append(arch_results)
+
+
+
+for arch in archs:
+    fps = []
+    for level in levels:
+        path = f'../beluga/il/{level}/{arch}/'
+        df_logs = pandas.concat(plotting.load_logs(path), sort=True)
+        fps.append(df_logs)
+
+    fps = pandas.concat(fps, sort=True)
+    maxes = best_within_normal_time_mutilated(
+        fps, args.regex,
+        patience=args.patience, window=args.window, limit=args.limit,
+        summary_path=None)
+    print(f'{maxes.mean()}\t{maxes.std()}')
diff --git a/scripts/il_performance.py b/scripts/il_performance.py
@@ -0,0 +1,123 @@
+#!/usr/bin/env python3
+import argparse
+import pandas
+import os
+import json
+import re
+import numpy as np
+
+from babyai import plotting
+from babyai.plotting import model_num_samples
+
+
+parser = argparse.ArgumentParser("Analyze data efficiency of imitation learning")
+parser.add_argument('--path', default='.')
+parser.add_argument("--regex", default='.*')
+parser.add_argument("--patience", default=2, type=int)
+parser.add_argument("--window", default=10, type=int)
+parser.add_argument("--limit", default="frames")
+args = parser.parse_args()
+
+
+
+def best_within_normal_time_mutilated(df, regex, patience, limit='epochs', window=1, normal_time=None, summary_path=None):
+    """
+    Compute the best success rate that is achieved in all runs within the normal time.
+
+    The normal time is defined as `patience * T`, where `T` is the time it takes for the run
+    with the most demonstrations to converge. `window` is the size of the sliding window that is
+    used for smoothing.
+
+    Returns a dataframe with the best success rate for the runs that match `regex`.
+
+    """
+    models = [model for model in df['model'].unique() if re.match(regex, model)]
+    num_samples = [model_num_samples(model) for model in models]
+    print(len(num_samples))
+    # sort models according to the number of samples
+    models, num_samples = zip(*sorted(list(zip(models, num_samples)), key=lambda tupl: tupl[1]))
+
+    maxes = []
+    for model, num in zip(models, num_samples):
+        df_model = df[df['model'] == model]
+        success_rate = df_model['validation_success_rate'].rolling(window, center=True).mean()
+        # print(success_rate.tolist())
+        success_rate = success_rate[np.logical_not(np.isnan(success_rate))]
+        maxes.append(max(success_rate))
+    print(maxes)
+    return np.array(maxes)
+
+
+
+
+
+# levels = ['GoTo']
+# archs = ['expert_filmcnn', 'expert_filmcnn_endpool_res', 'expert_filmcnn_endpool_res_not_conv_bow']
+# samples = [10, 100]
+#
+# all_results = []
+# for level in levels:
+#     arch_results = []
+#     for arch in archs:
+#         results = ''
+#         path = f'../beluga/logs/il/GoTo/{arch}/'
+#         print(path)
+#
+#         df_logs = pandas.concat(plotting.load_logs(path), sort=True)
+#
+#         maxes = best_within_normal_time_mutilated(
+#             df_logs, args.regex,
+#             patience=args.patience, window=args.window, limit=args.limit,
+#             summary_path=None)
+#         results += f"{np.mean(maxes)}\t"
+#         arch_results.append(results)
+#     all_results.append(arch_results)
+#
+# demos = '\t'
+# for sample in samples:
+#     demos += f'\t{sample}K'
+#
+# print(demos)
+# for level, ar in zip(levels, all_results):
+#     for arch, ar in zip(archs, ar):
+#         print(f'{level}\t{arch}\t' + ar)
+
+
+
+levels = ['GoToRedBallGrey', 'GoToRedBall', 'GoToLocal', 'PutNextLocal', 'PickupLoc']
+levels = ['GoTo']
+archs = ['expert_filmcnn', 'expert_filmcnn_endpool_res', 'expert_filmcnn_endpool_res_not_conv_bow']
+samples = [5, 10, 25, 50, 100, 250, 500]
+samples = [5, 10, 50, 100, 500]
+samples = [10, 100]
+
+all_results = []
+for level in levels:
+    arch_results = []
+    for sample in samples:
+        results = ''
+        for arch in archs:
+            path = f'../beluga/logs/il/{level}/{arch}/{sample}000/'
+            print(path)
+
+            df_logs = pandas.concat(plotting.load_logs(path), sort=True)
+            maxes = best_within_normal_time_mutilated(
+                df_logs, args.regex,
+                patience=args.patience, window=args.window, limit=args.limit,
+                summary_path=None)
+            # results += f"{maxes.mean()}\t{maxes.std()}\t"
+            results += f" & {100.*maxes.mean():.1f} $\pm$ {100.*maxes.std():.1f}"
+        arch_results.append(results)
+    all_results.append(arch_results)
+
+
+
+demos = '\t'
+for sample in samples:
+    demos += f'\t{sample}K'
+print(demos)
+
+for level, ar in zip(levels, all_results):
+    for sample, ar in zip(samples, ar):
+        print('&' + ar + ' \\\ ')
+        # print(f'{level}\t{arch}\t' + ar)