Skip to content

Commit

Permalink
fix issue in reward function design, RL is working properlly now.
Browse files Browse the repository at this point in the history
  • Loading branch information
yun-long committed Aug 28, 2020
1 parent b665717 commit 9e6d228
Show file tree
Hide file tree
Showing 12 changed files with 120 additions and 133 deletions.
9 changes: 5 additions & 4 deletions flightlib/configs/quadrotor_env.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@ quadrotor_dynamics:
omega_max: [6.0, 6.0, 6.0] # body rate constraint (x, y, z)

rl:
Q_pos: [-0.01, -0.01, -0.01]
Q_ori: [-0.01, -0.01, -0.01]
Q_lin_vel: [-0.001, -0.001, -0.001]
Q_cmd: [-0.0001, -0.0001, -0.0001, -0.0001]
pos_coeff: -0.002 # reward coefficient for position
ori_coeff: -0.002 # reward coefficient for orientation
lin_vel_coeff: -0.0002 # reward coefficient for linear velocity
ang_vel_coeff: -0.0002 # reward coefficient for angular velocity
act_coeff: -0.0002 # reward coefficient for control actions
50 changes: 24 additions & 26 deletions flightlib/include/flightlib/envs/quadrotor_env/quadrotor_env.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,24 +19,26 @@

namespace flightlib {

enum CtlObsAct {
namespace quadenv {

enum Ctl : int {
// observations
kObs = 0,
//
kObsPos = 0,
kObsPosSize = 3,
kObsOri = 3,
kObsOriSize = 3,
kObsLinVel = 6,
kObsLinVelSize = 3,
kObsAngVel = 9,
kObsAngVelSize = 3,
kObsSize = 12,
kPos = 0,
kNPos = 3,
kOri = 3,
kNOri = 3,
kLinVel = 6,
kNLinVel = 3,
kAngVel = 9,
kNAngVel = 3,
kNObs = 12,
// control actions
kAct = 0,
kActSize = 4,
kNAct = 4,
};
};

class QuadrotorEnv final : public EnvBase {
public:
EIGEN_MAKE_ALIGNED_OPERATOR_NEW
Expand All @@ -56,9 +58,6 @@ class QuadrotorEnv final : public EnvBase {
bool getObs(Ref<Vector<>> obs) override;
bool getAct(Ref<Vector<>> act) const;
bool getAct(Command *const cmd) const;
void getQ(Ref<Matrix<CtlObsAct::kObsSize, CtlObsAct::kObsSize>> Q) const;
void getQAct(
Ref<Matrix<CtlObsAct::kActSize, CtlObsAct::kActSize>> Q_act) const;

// - auxiliar functions
bool isTerminalState(Scalar &reward) override;
Expand All @@ -74,22 +73,21 @@ class QuadrotorEnv final : public EnvBase {
Command cmd_;
Logger logger_{"QaudrotorEnv"};

// Define reward for training
Scalar pos_coeff_, ori_coeff_, lin_vel_coeff_, ang_vel_coeff_, act_coeff_;

// observations and actions (for RL)
Vector<CtlObsAct::kObsSize> quad_obs_;
Vector<CtlObsAct::kActSize> quad_act_;
Vector<quadenv::kNObs> quad_obs_;
Vector<quadenv::kNAct> quad_act_;

// reward function design (for model-free reinforcement learning)
Matrix<CtlObsAct::kObsSize, CtlObsAct::kObsSize> Q_;
Matrix<CtlObsAct::kActSize, CtlObsAct::kActSize> Q_act_;
Vector<CtlObsAct::kObsSize> goal_state_;
Vector<quadenv::kNObs> goal_state_;

// action and observation normalization (for learning)
// Vector<CtlObsAct::kActSize> act_mean_{-Gz, 0.0, 0.0, 0.0};
// Vector<CtlObsAct::kActSize> act_std_{10.0, 6.0, 6.0, 6.0};
Vector<CtlObsAct::kActSize> act_mean_;
Vector<CtlObsAct::kActSize> act_std_;
Vector<CtlObsAct::kObsSize> obs_mean_ = Vector<CtlObsAct::kObsSize>::Zero();
Vector<CtlObsAct::kObsSize> obs_std_ = Vector<CtlObsAct::kObsSize>::Ones();
Vector<quadenv::kNAct> act_mean_;
Vector<quadenv::kNAct> act_std_;
Vector<quadenv::kNObs> obs_mean_ = Vector<quadenv::kNObs>::Zero();
Vector<quadenv::kNObs> obs_std_ = Vector<quadenv::kNObs>::Ones();

YAML::Node cfg_;
Matrix<3, 2> world_box_;
Expand Down
4 changes: 2 additions & 2 deletions flightlib/include/flightlib/objects/quadrotor.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,8 @@ class Quadrotor : ObjectBase {
// quadrotor dynamics, integrators
QuadrotorDynamics dynamics_;
IMU imu_;
// std::unique_ptr<IntegratorRK4> integrator_ptr_;
std::unique_ptr<IntegratorEuler> integrator_ptr_;
std::unique_ptr<IntegratorRK4> integrator_ptr_;
// std::unique_ptr<IntegratorEuler> integrator_ptr_;
std::vector<RGBCamera*> rgb_cameras_;

// quad control command
Expand Down
2 changes: 2 additions & 0 deletions flightlib/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ class CMakeBuild(build_ext):
def run(self):
FLIGHTLIB_EXTERNAL_FILES = os.environ["FLIGHTMARE_PATH"] + \
"/flightlib/externals/"
# --------------------------------
# remove cached external files
# a hack to solve some cmake error when using "pip install ."
try:
Expand All @@ -29,6 +30,7 @@ def run(self):
print("Removing some cache file: ", p)
except:
pass
# --------------------------------
try:
out = subprocess.check_output(['cmake', '--version'])
except OSError:
Expand Down
100 changes: 51 additions & 49 deletions flightlib/src/envs/quadrotor_env/quadrotor_env.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@ QuadrotorEnv::QuadrotorEnv()

QuadrotorEnv::QuadrotorEnv(const std::string &cfg_path)
: EnvBase(),
Q_((Vector<CtlObsAct::kObsSize>() << -1e-2, -1e-2, -1e-2, -1e-2, -1e-2,
-1e-2, -1e-3, -1e-3, -1e-3, -1e-3, -1e-3, -1e-3)
.finished()
.asDiagonal()),
Q_act_(
Vector<CtlObsAct::kActSize>(-1e-4, -1e-4, -1e-4, -1e-4).asDiagonal()),
goal_state_((Vector<CtlObsAct::kObsSize>() << 0.0, 0.0, 5.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0, 0.0)
pos_coeff_(0.0),
ori_coeff_(0.0),
lin_vel_coeff_(0.0),
ang_vel_coeff_(0.0),
act_coeff_(0.0),
goal_state_((Vector<quadenv::kNObs>() << 0.0, 0.0, 5.0, 0.0, 0.0, 0.0, 0.0,
0.0, 0.0, 0.0, 0.0, 0.0)
.finished()) {
// load configuration file
YAML::Node cfg_ = YAML::LoadFile(cfg_path);
Expand All @@ -30,13 +29,13 @@ QuadrotorEnv::QuadrotorEnv(const std::string &cfg_path)
quadrotor_.setWorldBox(world_box_);

// define input and output dimension for the environment
obs_dim_ = CtlObsAct::kObsSize;
act_dim_ = CtlObsAct::kActSize;
obs_dim_ = quadenv::kNObs;
act_dim_ = quadenv::kNAct;


Scalar mass = quadrotor_.getMass();
act_mean_ = Vector<4>::Ones() * (-mass * Gz) / 4;
act_std_ = Vector<4>::Ones() * (-mass * 2 * Gz) / 4;
act_mean_ = Vector<quadenv::kNAct>::Ones() * (-mass * Gz) / 4;
act_std_ = Vector<quadenv::kNAct>::Ones() * (-mass * 2 * Gz) / 4;

// load parameters
loadParam(cfg_);
Expand Down Expand Up @@ -83,11 +82,11 @@ bool QuadrotorEnv::getObs(Ref<Vector<>> obs) {
quadrotor_.getState(&quad_state_);

// convert quaternion to euler angle
Vector<3> euler = quad_state_.q().toRotationMatrix().eulerAngles(0, 1, 2);
Vector<3> euler_zyx = quad_state_.q().toRotationMatrix().eulerAngles(2, 1, 0);
// quaternionToEuler(quad_state_.q(), euler);
quad_obs_ << quad_state_.p, euler, quad_state_.v, quad_state_.w;
quad_obs_ << quad_state_.p, euler_zyx, quad_state_.v, quad_state_.w;

obs.segment<CtlObsAct::kObsSize>(kObs) = quad_obs_;
obs.segment<quadenv::kNObs>(quadenv::kObs) = quad_obs_;
return true;
}

Expand All @@ -102,13 +101,38 @@ Scalar QuadrotorEnv::step(const Ref<Vector<>> act, Ref<Vector<>> obs) {
// update observations
getObs(obs);

// reward function design
Scalar stage_reward =
(quad_obs_ - goal_state_).transpose() * Q_ * (quad_obs_ - goal_state_);
Scalar act_reward =
(quad_act_ - act_mean_).transpose() * Q_act_ * (quad_act_ - act_mean_);

Scalar total_reward = stage_reward + act_reward + 1;
Matrix<3, 3> rot = quad_state_.q().toRotationMatrix();

// ---------------------- reward function design
// - position tracking
Scalar pos_reward =
pos_coeff_ * (quad_obs_.segment<quadenv::kNPos>(quadenv::kPos) -
goal_state_.segment<quadenv::kNPos>(quadenv::kPos))
.squaredNorm();
// - orientation tracking
Scalar ori_reward =
ori_coeff_ * (quad_obs_.segment<quadenv::kNOri>(quadenv::kOri) -
goal_state_.segment<quadenv::kNOri>(quadenv::kOri))
.squaredNorm();
// - linear velocity tracking
Scalar lin_vel_reward =
lin_vel_coeff_ * (quad_obs_.segment<quadenv::kNLinVel>(quadenv::kLinVel) -
goal_state_.segment<quadenv::kNLinVel>(quadenv::kLinVel))
.squaredNorm();
// - angular velocity tracking
Scalar ang_vel_reward =
ang_vel_coeff_ * (quad_obs_.segment<quadenv::kNAngVel>(quadenv::kAngVel) -
goal_state_.segment<quadenv::kNAngVel>(quadenv::kAngVel))
.squaredNorm();

// - control action penalty
Scalar act_reward = act_coeff_ * act.cast<Scalar>().norm();

Scalar total_reward =
pos_reward + ori_reward + lin_vel_reward + ang_vel_reward + act_reward;

// survival reward
total_reward += 0.1;

return total_reward;
}
Expand All @@ -132,37 +156,17 @@ bool QuadrotorEnv::loadParam(const YAML::Node &cfg) {

if (cfg["rl"]) {
// load reinforcement learning related parameters
std::vector<Scalar> Q_pos(3), Q_ori(3), Q_lin_vel(3), Q_cmd(4);
Q_pos = cfg["rl"]["Q_pos"].as<std::vector<Scalar>>();
Q_ori = cfg["rl"]["Q_ori"].as<std::vector<Scalar>>();
Q_lin_vel = cfg["rl"]["Q_lin_vel"].as<std::vector<Scalar>>();
Q_cmd = cfg["rl"]["Q_cmd"].as<std::vector<Scalar>>();

Q_ =
(Vector<CtlObsAct::kObsSize>() << Q_pos[0], Q_pos[1], Q_pos[2], Q_ori[0],
Q_ori[1], Q_ori[2], Q_lin_vel[0], Q_lin_vel[1], Q_lin_vel[2])
.finished()
.asDiagonal();
Q_act_ =
(Vector<CtlObsAct::kActSize>() << Q_cmd[0], Q_cmd[1], Q_cmd[2], Q_cmd[3])
.finished()
.asDiagonal();
pos_coeff_ = cfg["rl"]["pos_coeff"].as<Scalar>();
ori_coeff_ = cfg["rl"]["ori_coeff"].as<Scalar>();
lin_vel_coeff_ = cfg["rl"]["lin_vel_coeff"].as<Scalar>();
ang_vel_coeff_ = cfg["rl"]["ang_vel_coeff"].as<Scalar>();
act_coeff_ = cfg["rl"]["act_coeff"].as<Scalar>();
} else {
return false;
}
return true;
}

void QuadrotorEnv::getQ(
Ref<Matrix<CtlObsAct::kObsSize, CtlObsAct::kObsSize>> Q) const {
Q = Q_;
}

void QuadrotorEnv::getQAct(
Ref<Matrix<CtlObsAct::kActSize, CtlObsAct::kActSize>> Q_act) const {
Q_act = Q_act_;
}

bool QuadrotorEnv::getAct(Ref<Vector<>> act) const {
if (cmd_.t >= 0.0 && quad_act_.allFinite()) {
act = quad_act_;
Expand Down Expand Up @@ -192,8 +196,6 @@ std::ostream &operator<<(std::ostream &os, const QuadrotorEnv &quad_env) {
<< "act_std = [" << quad_env.act_std_.transpose() << "]\n"
<< "obs_mean = [" << quad_env.obs_mean_.transpose() << "]\n"
<< "obs_std = [" << quad_env.obs_std_.transpose() << "]\n"
<< "Q = [" << quad_env.Q_.diagonal().transpose() << "]\n"
<< "Q_act = [" << quad_env.Q_act_.diagonal().transpose()
<< "]" << std::endl;
os.precision();
return os;
Expand Down
9 changes: 5 additions & 4 deletions flightlib/src/objects/quadrotor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ bool Quadrotor::run(const Scalar ctl_dt) {
integrator_ptr_->step(state_.x, sim_dt, next_state.x);

// update state and sim time
next_state.qx.normalize();
state_.qx /= state_.qx.norm();

//
state_.x = next_state.x;
remain_ctl_dt -= sim_dt;
Expand Down Expand Up @@ -224,10 +225,10 @@ bool Quadrotor::updateDynamics(const QuadrotorDynamics &dynamics) {
return false;
}
dynamics_ = dynamics;
// integrator_ptr_ =
// std::make_unique<IntegratorRK4>(dynamics_.getDynamicsFunction(), 2.5e-3);
integrator_ptr_ =
std::make_unique<IntegratorEuler>(dynamics_.getDynamicsFunction(), 2.5e-3);
std::make_unique<IntegratorRK4>(dynamics_.getDynamicsFunction(), 2.5e-3);
// integrator_ptr_ =
// std::make_unique<IntegratorEuler>(dynamics_.getDynamicsFunction(), 2.5e-3);

B_allocation_ = dynamics_.getAllocationMatrix();
B_allocation_inv_ = B_allocation_.inverse();
Expand Down
20 changes: 2 additions & 18 deletions flightlib/tests/envs/quadrotor_env/quadrotor_env.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,34 +36,16 @@ TEST(QuadrotorEnv, Constructor) {

// evaluate parameters
Scalar expect_sim_dt = cfg["quadrotor_env"]["sim_dt"].as<Scalar>();
std::vector<Scalar> Q_pos(3), Q_ori(3), Q_lin_vel(3), Q_cmd(4);
Q_pos = cfg["rl"]["Q_pos"].as<std::vector<Scalar>>();
Q_ori = cfg["rl"]["Q_ori"].as<std::vector<Scalar>>();
Q_lin_vel = cfg["rl"]["Q_lin_vel"].as<std::vector<Scalar>>();
Q_cmd = cfg["rl"]["Q_cmd"].as<std::vector<Scalar>>();
Matrix<OBS_DIM, OBS_DIM> expect_Q =
(Vector<OBS_DIM>() << Q_pos[0], Q_pos[1], Q_pos[2], Q_ori[0], Q_ori[1],
Q_ori[2], Q_lin_vel[0], Q_lin_vel[1], Q_lin_vel[2])
.finished()
.asDiagonal();
Matrix<ACT_DIM, ACT_DIM> expect_Q_act =
(Vector<ACT_DIM>() << Q_cmd[0], Q_cmd[1], Q_cmd[2], Q_cmd[3])
.finished()
.asDiagonal();

const int obs_dim1 = env1.getObsDim();
const int act_dim1 = env1.getActDim();
const Scalar sim_dt = env1.getSimTimeStep();
Matrix<OBS_DIM, OBS_DIM> Q;
Matrix<ACT_DIM, ACT_DIM> Q_act;
env1.getQ(Q);
env1.getQAct(Q_act);

EXPECT_EQ(obs_dim1, OBS_DIM);
EXPECT_EQ(act_dim1, ACT_DIM);
EXPECT_EQ(expect_sim_dt, sim_dt);
EXPECT_TRUE(Q.isApprox(expect_Q));
EXPECT_TRUE(Q_act.isApprox(expect_Q_act));

//
std::cout << env1 << std::endl;
Expand All @@ -85,6 +67,8 @@ TEST(QuadrotorEnv, ResetEnv) {
EXPECT_EQ(obs(0), 0.0);
EXPECT_EQ(obs(1), 0.0);
EXPECT_EQ(obs(2), 0.0);

env.reset(obs);
}

TEST(QuadrotorEnv, StepEnv) {
Expand Down
9 changes: 3 additions & 6 deletions flightlib/tests/objects/quadrotor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ TEST(Quadrotor, RunQuadCmdFeedThrough) {
quad.getState(&final_state);

EXPECT_NEAR(final_state.t, quad_state.t, 1e-9);
EXPECT_TRUE(quad_state.x.isApprox(final_state.x));
EXPECT_TRUE(quad_state.x.isApprox(final_state.x, 1e-3));

// free fall
quad_state.setZero();
Expand All @@ -128,9 +128,7 @@ TEST(Quadrotor, RunQuadCmdFeedThrough) {
quad.getState(&final_state);

EXPECT_NEAR(final_state.t, quad_state.t, 1e-9);
std::cout << quad_state.x << std::endl;
std::cout << final_state.x << std::endl;
EXPECT_TRUE(quad_state.x.isApprox(final_state.x));
EXPECT_TRUE(quad_state.x.isApprox(final_state.x, 1e-3));

// taking off
quad_state.setZero();
Expand All @@ -156,11 +154,10 @@ TEST(Quadrotor, RunQuadCmdFeedThrough) {
quad_state.t += ctl_dt;
}

std::cout << quad_state << std::endl;
final_state.setZero();
quad.getState(&final_state);
EXPECT_NEAR(final_state.t, quad_state.t, 1e-9);
EXPECT_TRUE(quad_state.x.isApprox(final_state.x));
EXPECT_TRUE(quad_state.x.isApprox(final_state.x, 1e-3));
EXPECT_GT(quad_state.x(QS::POSZ), 1.0);
}

Expand Down
5 changes: 5 additions & 0 deletions flightrender/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
# Ignore everything in this directory
*
# Except this file
!.gitignore
!.clang-format
2 changes: 1 addition & 1 deletion flightrl/examples/run_drone_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def parser():
help="Directory where to load weights")
parser.add_argument('-m', '--mode', type=str, default='train',
help='set mode either train or test')
parser.add_argument('-w', '--weight', type=str, default='./saved/2020-08-27-21-48-07.zip',
parser.add_argument('-w', '--weight', type=str, default='./saved/2020-08-28-13-54-34_Iteration_393.zip',
help='trained weight path')
return parser

Expand Down
Loading

0 comments on commit 9e6d228

Please sign in to comment.