Skip to content

Commit

Permalink
tweak scoring - deliberate practice v.04
Browse files Browse the repository at this point in the history
  • Loading branch information
Sohojoe committed May 7, 2018
1 parent 32261bb commit c9b6afb
Showing 1 changed file with 33 additions and 12 deletions.
45 changes: 33 additions & 12 deletions Assets/MLA-MujocoUnity/Scripts/MujocoAgent.cs
Original file line number Diff line number Diff line change
Expand Up @@ -422,9 +422,10 @@ float GetEffort(string[] ignorJoints = null)
for (int i = 0; i < Actions.Count; i++)
{
var name = _mujocoController.MujocoJoints[i].JointName;
var jointEffort = Mathf.Pow(Mathf.Abs(_actions[i]),2);
if (!ignorJoints.Contains(name))
effort += jointEffort;
if (ignorJoints != null && ignorJoints.Contains(name))
continue;
var jointEffort = Mathf.Pow(Mathf.Abs(Actions[i]),2);
effort += jointEffort;
}
return (float)effort;
// var effort = _actions
Expand All @@ -434,6 +435,21 @@ float GetEffort(string[] ignorJoints = null)
// // Monitor.Log("effort", effort, MonitorType.text);
// return effort;
}
float GetJointsAtLimitPenality(string[] ignorJoints = null)
{
int atLimitCount = 0;
for (int i = 0; i < Actions.Count; i++)
{
var name = _mujocoController.MujocoJoints[i].JointName;
if (ignorJoints != null && ignorJoints.Contains(name))
continue;
bool atLimit = Mathf.Abs(Actions[i]) >= 1f;
if (atLimit)
atLimitCount++;
}
float penality = atLimitCount * 0.2f;
return (float)penality;
}
float GetEffortSum()
{
var effort = Actions
Expand Down Expand Up @@ -540,25 +556,30 @@ float StepReward_OaiHumanoidRun()
float limbPenalty = leftThighPenality + rightThighPenality + leftUarmPenality + rightUarmPenality;
limbPenalty = Mathf.Min(0.5f, limbPenalty);
// GetDirectionDebug("right_thigh");
float rightThighBonus = Mathf.Abs(GetUprightBonus("right_thigh")) / 2;
float leftThighBonus = Mathf.Abs(GetUprightBonus("left_thigh")) / 2;
float thighBonus = Mathf.Min(0.25f, leftThighBonus+rightThighBonus);

float effort = GetEffort(new string []{"right_hip_y", "left_hip_y"});
var effortPenality = 0.2f * (float)effort;
float rightThighBonus = GetUprightBonus("right_thigh");
float leftThighBonus = GetUprightBonus("left_thigh");
float thighBonus = Mathf.Abs(Mathf.Max(leftThighBonus, rightThighBonus));
thighBonus = Mathf.Min(0.25f, thighBonus / 2);
var jointsAtLimitPenality = GetJointsAtLimitPenality();
float effort = GetEffort(new string []{"right_hip_y", "right_knee", "left_hip_y", "left_knee"});
var effortPenality = 0.5f * (float)effort;
var reward = velocity
+ shouldersUprightBonus
+ pelvisUprightBonus
+ headForwardBonus
+ pelvisForwardBonus
+ thighBonus
- heightPenality
- effortPenality
- limbPenalty;
- limbPenalty
- jointsAtLimitPenality
- effortPenality;
// - armPenalty;
if (ShowMonitor) {
// var hist = new []{reward,velocity, shouldersUprightBonus, pelvisUprightBonus, headForwardBonus,- heightPenality,-effortPenality}.ToList();
var hist = new []{reward,velocity, shouldersUprightBonus, pelvisUprightBonus, headForwardBonus, pelvisForwardBonus, thighBonus,- heightPenality,-effortPenality, -limbPenalty}.ToList();
var hist = new []{
reward, velocity, shouldersUprightBonus, pelvisUprightBonus,
headForwardBonus, pelvisForwardBonus, thighBonus,
-heightPenality, -limbPenalty, -jointsAtLimitPenality, -effortPenality}.ToList();
Monitor.Log("rewardHist", hist, MonitorType.hist);
}
return reward;
Expand Down

0 comments on commit c9b6afb

Please sign in to comment.