Skip to content

Commit

Permalink
Scrubbing off Caffe2 from codebase (facebookresearch#169)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: facebookresearch#169

deadcode

Reviewed By: MisterTea

Differential Revision: D17944627

fbshipit-source-id: 0d4af7df7a9c47cea72dbf75e6a5b437c4e23703
  • Loading branch information
kittipatv authored and facebook-github-bot committed Oct 16, 2019
1 parent 9a4c719 commit 11529f0
Show file tree
Hide file tree
Showing 12 changed files with 4 additions and 279 deletions.
79 changes: 0 additions & 79 deletions ml/rl/models/output_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,85 +94,6 @@ def export_q_values(self, net, q_values, action_names, action_name_blob):
)


class DiscreteActionOutputTransformer(OutputTransformerBase):
def __init__(self, action_names, temperature=1.0):
self.action_names = action_names
self.temperature = temperature

def create_net(self, original_output):
net = core.Net("output_transformer")
init_net = core.Net("output_transformer_init")

action_name_blob = self.create_action_name_blob(init_net, self.action_names)

q_values = original_output.q_values()
self.export_q_values(net, q_values, self.action_names, action_name_blob)

max_q_idx = net.ArgMax(q_values, net.NextScopedBlob("max_q_idx"), keepdims=0)
max_q_idx = net.Cast(
max_q_idx, net.NextScopedBlob("max_q_idx_int"), to=core.DataType.INT32
)

temperature = self.create_const(init_net, "temperature", self.temperature)
tempered_q_values = net.Div(
[q_values, temperature],
net.NextScopedBlob("tempered_q_values"),
broadcast=1,
)
softmax_values = net.Softmax(tempered_q_values, net.NextScopedBlob("softmax"))
softmax_act_idx = net.WeightedSample(
[softmax_values], net.NextScopedBlob("softmax_act_idx")
)

action_indices, _ = net.Concat(
[max_q_idx, softmax_act_idx],
[
net.NextScopedBlob("action_indices"),
net.NextScopedBlob("action_indices_spilt_info"),
],
axis=1,
add_axis=1,
)
flatten_action_indices = net.FlattenToVec(
action_indices, net.NextScopedBlob("flatten_action_indices")
)

lengths = core.BlobReference(
"output/string_single_categorical_features.lengths"
)
keys = core.BlobReference("output/string_single_categorical_features.keys")
values = core.BlobReference("output/string_single_categorical_features.values")

net.Gather([action_name_blob, flatten_action_indices], values)

net.ConstantFill([max_q_idx], lengths, value=2, dtype=core.DataType.INT32)

action_keys = self.create_const(
init_net, "action_keys", value=[0, 1], shape=[2], dtype=core.DataType.INT64
)
batch_size = net.Shape(max_q_idx, 1)
net.Tile([action_keys, batch_size], keys, axis=0)

net.AddExternalOutput(lengths, keys, values)

return OutputTransformerNet(net=net, init_net=init_net)


class ParametricActionOutputTransformer(OutputTransformerBase):
def __init__(self):
self.action_names = ["Q"]

def create_net(self, original_output):
net = core.Net("output_transformer")
init_net = core.Net("output_transformer_init")

action_name_blob = self.create_action_name_blob(init_net, self.action_names)

q_value = original_output.q_value()
self.export_q_values(net, q_value, self.action_names, action_name_blob)
return OutputTransformerNet(net=net, init_net=init_net)


class ActorOutputTransformer(OutputTransformerBase):
def __init__(
self,
Expand Down
1 change: 0 additions & 1 deletion ml/rl/test/gridworld/test_gridworld_parametric.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@

import numpy as np
import torch
from ml.rl.models.output_transformer import ParametricActionOutputTransformer
from ml.rl.models.parametric_dqn import FullyConnectedParametricDQN
from ml.rl.parameters import (
ContinuousActionModelParameters,
Expand Down
1 change: 0 additions & 1 deletion ml/rl/test/gridworld/test_gridworld_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
from ml.rl.models.dqn import FullyConnectedDQN
from ml.rl.models.dueling_q_network import DuelingQNetwork
from ml.rl.models.dueling_quantile_dqn import DuelingQuantileDQN
from ml.rl.models.output_transformer import DiscreteActionOutputTransformer
from ml.rl.models.quantile_dqn import QuantileDQN
from ml.rl.parameters import (
DiscreteActionModelParameters,
Expand Down
5 changes: 1 addition & 4 deletions ml/rl/test/gridworld/test_gridworld_sac.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,7 @@
import torch
from ml.rl.models.actor import DirichletFullyConnectedActor, GaussianFullyConnectedActor
from ml.rl.models.fully_connected_network import FullyConnectedNetwork
from ml.rl.models.output_transformer import (
ActorOutputTransformer,
ParametricActionOutputTransformer,
)
from ml.rl.models.output_transformer import ActorOutputTransformer
from ml.rl.models.parametric_dqn import FullyConnectedParametricDQN
from ml.rl.parameters import (
FeedForwardParameters,
Expand Down
5 changes: 1 addition & 4 deletions ml/rl/test/gridworld/test_gridworld_td3.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,7 @@
import numpy.testing as npt
import torch
from ml.rl.models.actor import FullyConnectedActor
from ml.rl.models.output_transformer import (
ActorOutputTransformer,
ParametricActionOutputTransformer,
)
from ml.rl.models.output_transformer import ActorOutputTransformer
from ml.rl.models.parametric_dqn import FullyConnectedParametricDQN
from ml.rl.parameters import (
FeedForwardParameters,
Expand Down
18 changes: 0 additions & 18 deletions ml/rl/test/gym/open_ai_gym_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from ml.rl.training.dqn_predictor import DQNPredictor
from ml.rl.training.off_policy_predictor import RLPredictor
from ml.rl.training.on_policy_predictor import OnPolicyPredictor
from ml.rl.training.parametric_dqn_predictor import ParametricDQNPredictor


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -217,23 +216,6 @@ def policy(
action_idx = int(max(q_values[0], key=q_values[0].get)) - self.state_dim
action[action_idx] = 1.0
return action, action_probability
elif isinstance(predictor, ParametricDQNPredictor):
# Needs to get a list of candidate actions if actions are continuous
if self.action_type == EnvType.CONTINUOUS_ACTION:
raise NotImplementedError()
action_probability = 1.0 if test else 1.0 - self.epsilon
state = np.repeat(state, repeats=self.action_dim, axis=0)
sparse_states = predictor.in_order_dense_to_sparse(state)
sparse_actions = [
{str(i + self.state_dim): 1} for i in range(self.action_dim)
]
q_values = predictor.predict(sparse_states, sparse_actions)
q_values = np.fromiter(
map(lambda x: x["Q"], q_values), np.float # type: ignore
).reshape(self.action_dim)
action_idx = np.argmax(q_values)
action[action_idx] = 1.0
return action, action_probability
elif predictor.policy_net(): # type: ignore
action_set = predictor.policy(state) # type: ignore
action, action_probability = action_set.greedy, action_set.greedy_propensity
Expand Down
103 changes: 1 addition & 102 deletions ml/rl/test/models/test_output_transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,108 +6,7 @@
import numpy as np
import numpy.testing as npt
from caffe2.python import core, schema, workspace
from ml.rl.models.output_transformer import (
ActorOutputTransformer,
DiscreteActionOutputTransformer,
ParametricActionOutputTransformer,
)


class DiscreteActionOutputTransformerTest(unittest.TestCase):
def test_create_net(self):
q_value_blob = core.BlobReference("q_values")
N = 10
# NOTE: We add `b` prefix here to match the return type of FetchBlob
actions = [b"yes", b"no"]
q_values = np.random.randn(N, len(actions)).astype(np.float32)
workspace.FeedBlob(q_value_blob, q_values)
ot = DiscreteActionOutputTransformer(actions)
output_record = schema.Struct(("q_values", schema.Scalar(blob=q_value_blob)))
nets = ot.create_net(output_record)
workspace.RunNetOnce(nets.init_net)
workspace.RunNetOnce(nets.net)

external_outputs = {str(b) for b in nets.net.external_outputs}

def fetch_blob(b):
self.assertIn(b, external_outputs)
return workspace.FetchBlob(b)

feature_lengths = fetch_blob(
"output/string_weighted_multi_categorical_features.lengths"
)
feature_keys = fetch_blob(
"output/string_weighted_multi_categorical_features.keys"
)
values_lengths = fetch_blob(
"output/string_weighted_multi_categorical_features.values.lengths"
)
values_keys = fetch_blob(
"output/string_weighted_multi_categorical_features.values.keys"
)
values_values = fetch_blob(
"output/string_weighted_multi_categorical_features.values.values"
)
action_lengths = fetch_blob("output/string_single_categorical_features.lengths")
action_keys = fetch_blob("output/string_single_categorical_features.keys")
action_values = fetch_blob("output/string_single_categorical_features.values")

npt.assert_array_equal(np.ones(N, dtype=np.int32), feature_lengths)
npt.assert_array_equal(np.zeros(N, dtype=np.int64), feature_keys)
npt.assert_array_equal([len(actions)] * N, values_lengths)
npt.assert_array_equal(np.array(actions * N, dtype=np.object), values_keys)
npt.assert_array_equal(q_values.reshape(-1), values_values)
npt.assert_array_equal([len(actions)] * N, action_lengths)
npt.assert_array_equal(list(range(len(actions))) * N, action_keys)

# We can only assert max-Q policy
max_q_actions = action_values.reshape(-1, len(actions))[::, 0]
npt.assert_array_equal(
[actions[i] for i in np.argmax(q_values, axis=1)], max_q_actions
)


class ParametricActionOutputTransformerTest(unittest.TestCase):
def test_create_net(self):
q_value_blob = core.BlobReference("q_values")
N = 10
# NOTE: We add `b` prefix here to match the return type of FetchBlob
actions = [b"Q"]
q_values = np.random.randn(N, len(actions)).astype(np.float32)
workspace.FeedBlob(q_value_blob, q_values)
ot = ParametricActionOutputTransformer()
output_record = schema.Struct(("q_value", schema.Scalar(blob=q_value_blob)))
nets = ot.create_net(output_record)
workspace.RunNetOnce(nets.init_net)
workspace.RunNetOnce(nets.net)

external_outputs = {str(b) for b in nets.net.external_outputs}

def fetch_blob(b):
self.assertIn(b, external_outputs)
return workspace.FetchBlob(b)

feature_lengths = fetch_blob(
"output/string_weighted_multi_categorical_features.lengths"
)
feature_keys = fetch_blob(
"output/string_weighted_multi_categorical_features.keys"
)
values_lengths = fetch_blob(
"output/string_weighted_multi_categorical_features.values.lengths"
)
values_keys = fetch_blob(
"output/string_weighted_multi_categorical_features.values.keys"
)
values_values = fetch_blob(
"output/string_weighted_multi_categorical_features.values.values"
)

npt.assert_array_equal(np.ones(N, dtype=np.int32), feature_lengths)
npt.assert_array_equal(np.zeros(N, dtype=np.int64), feature_keys)
npt.assert_array_equal([len(actions)] * N, values_lengths)
npt.assert_array_equal(np.array(actions * N, dtype=np.object), values_keys)
npt.assert_array_equal(q_values.reshape(-1), values_values)
from ml.rl.models.output_transformer import ActorOutputTransformer


class ActorOutputTransformerTest(unittest.TestCase):
Expand Down
1 change: 0 additions & 1 deletion ml/rl/test/workflow/test_oss_workflows.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
)
from ml.rl.test.base.horizon_test_base import HorizonTestBase
from ml.rl.training.dqn_predictor import DQNPredictor
from ml.rl.training.parametric_dqn_predictor import ParametricDQNPredictor
from ml.rl.workflow import dqn_workflow, parametric_dqn_workflow


Expand Down
19 changes: 0 additions & 19 deletions ml/rl/training/parametric_dqn_predictor.py

This file was deleted.

49 changes: 1 addition & 48 deletions ml/rl/training/rl_exporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,16 +4,11 @@
import logging

from ml.rl.models.actor import ActorWithPreprocessing
from ml.rl.models.output_transformer import (
ActorOutputTransformer,
ParametricActionOutputTransformer,
)
from ml.rl.models.parametric_dqn import ParametricDQNWithPreprocessing
from ml.rl.models.output_transformer import ActorOutputTransformer
from ml.rl.preprocessing.feature_extractor import PredictorFeatureExtractor
from ml.rl.preprocessing.normalization import get_action_output_parameters
from ml.rl.training.actor_predictor import ActorPredictor
from ml.rl.training.dqn_predictor import DQNPredictor
from ml.rl.training.parametric_dqn_predictor import ParametricDQNPredictor


logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -65,48 +60,6 @@ def export(self):
return self.predictor_class(pem, ws, **self.kwargs)


class ParametricDQNExporter(SandboxedRLExporter):
def __init__(
self,
dnn,
feature_extractor=None,
output_transformer=None,
state_preprocessor=None,
action_preprocessor=None,
):
super().__init__(
dnn,
ParametricDQNPredictor,
ParametricDQNWithPreprocessing,
feature_extractor,
output_transformer,
state_preprocessor,
action_preprocessor,
)

@classmethod
def from_state_action_normalization(
cls,
dnn,
state_normalization,
action_normalization,
state_preprocessor=None,
action_preprocessor=None,
**kwargs,
):
return cls(
dnn=dnn,
feature_extractor=PredictorFeatureExtractor(
state_normalization_parameters=state_normalization,
action_normalization_parameters=action_normalization,
),
output_transformer=ParametricActionOutputTransformer(),
state_preprocessor=state_preprocessor,
action_preprocessor=action_preprocessor,
**kwargs,
)


class ActorExporter(SandboxedRLExporter):
def __init__(
self,
Expand Down
1 change: 0 additions & 1 deletion ml/rl/workflow/dqn_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
import torch
from ml.rl.evaluation.evaluator import Evaluator
from ml.rl.json_serialize import from_json
from ml.rl.models.output_transformer import DiscreteActionOutputTransformer
from ml.rl.parameters import (
DiscreteActionModelParameters,
NormalizationParameters,
Expand Down
1 change: 0 additions & 1 deletion ml/rl/workflow/parametric_dqn_workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import torch
from ml.rl.evaluation.evaluator import Evaluator
from ml.rl.json_serialize import from_json
from ml.rl.models.output_transformer import ParametricActionOutputTransformer
from ml.rl.parameters import (
ContinuousActionModelParameters,
NormalizationParameters,
Expand Down

0 comments on commit 11529f0

Please sign in to comment.