Skip to content

Commit

Permalink
Merge pull request #24 from osudrl/development
Browse files Browse the repository at this point in the history
Development
  • Loading branch information
yeshg authored Feb 22, 2020
2 parents 5016a3a + 585d1fa commit 875b9ad
Show file tree
Hide file tree
Showing 10 changed files with 178 additions and 216 deletions.
143 changes: 74 additions & 69 deletions apex.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,95 +144,100 @@ def eval_policy(policy, args, run_args):
import termios
import select
import numpy as np
from cassie import CassieEnv, CassieStandingEnv

def isData():
return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], [])

max_traj_len = args.traj_len
visualize = True

env = env_factory(run_args.env_name, traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, mirror=run_args.mirror, clock_based=run_args.clock_based, history=run_args.history)()
if run_args.env_name == "Cassie-v0":
env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history)
else:
env = CassieStandingEnv(state_est=run_args.state_est)


old_settings = termios.tcgetattr(sys.stdin)

orient_add = 0

try:
tty.setcbreak(sys.stdin.fileno())

state = env.reset_for_test()
done = False
timesteps = 0
eval_reward = 0
speed = 0.0

while True:

if isData():
c = sys.stdin.read(1)
if c == 'w':
speed += 0.1
elif c == 's':
speed -= 0.1
elif c == 'l':
orient_add += .1
print("Increasing orient_add to: ", orient_add)
elif c == 'k':
orient_add -= .1
print("Decreasing orient_add to: ", orient_add)
elif c == 'p':
push = 100
push_dir = 2
force_arr = np.zeros(6)
force_arr[push_dir] = push
env.sim.apply_force(force_arr)

env.update_speed(speed)
print("speed: ", env.speed)

state = env.reset()
done = False
timesteps = 0
eval_reward = 0

while not done and timesteps < max_traj_len:

if isData():
c = sys.stdin.read(1)
if c == 'w':
env.speed += .1
print("speed: ", env.speed)
elif c == 's':
env.speed -= .1
print("speed: ", env.speed)
elif c == 'l':
orient_add += .1
print("Increasing orient_add to: ", orient_add)
elif c == 'k':
orient_add -= .1
print("Decreasing orient_add to: ", orient_add)
elif c == 'p':
push = 100
push_dir = 2
force_arr = np.zeros(6)
force_arr[push_dir] = push
env.sim.apply_force(force_arr)

# Update Orientation
quaternion = euler2quat(z=orient_add, y=0, x=0)
iquaternion = inverse_quaternion(quaternion)

if env.state_est:
curr_orient = state[1:5]
curr_transvel = state[14:17]
else:
curr_orient = state[2:6]
curr_transvel = state[20:23]

new_orient = quaternion_product(iquaternion, curr_orient)
# Update Orientation
quaternion = euler2quat(z=orient_add, y=0, x=0)
iquaternion = inverse_quaternion(quaternion)

if new_orient[0] < 0:
new_orient = -new_orient
if env.state_est:
curr_orient = state[1:5]
curr_transvel = state[14:17]
else:
curr_orient = state[2:6]
curr_transvel = state[20:23]

new_orient = quaternion_product(iquaternion, curr_orient)

if new_orient[0] < 0:
new_orient = -new_orient

new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion)

if env.state_est:
state[1:5] = torch.FloatTensor(new_orient)
state[14:17] = torch.FloatTensor(new_translationalVelocity)
# state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware.
else:
state[2:6] = torch.FloatTensor(new_orient)
state[20:23] = torch.FloatTensor(new_translationalVelocity)

new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion)
if hasattr(env, 'simrate'):
start = time.time()

if env.state_est:
state[1:5] = torch.FloatTensor(new_orient)
state[14:17] = torch.FloatTensor(new_translationalVelocity)
# state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware.
else:
state[2:6] = torch.FloatTensor(new_orient)
state[20:23] = torch.FloatTensor(new_translationalVelocity)

if hasattr(env, 'simrate'):
start = time.time()

action = policy.forward(torch.Tensor(state)).detach().numpy()
state, reward, done, _ = env.step(action)
if visualize:
env.render()
eval_reward += reward
timesteps += 1

if hasattr(env, 'simrate'):
# assume 30hz (hack)
end = time.time()
delaytime = max(0, 1000 / 30000 - (end-start))
time.sleep(delaytime)

print("Eval reward: ", eval_reward)
action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy()
state, reward, done, _ = env.step(action)
if visualize:
env.render()
eval_reward += reward
timesteps += 1

if hasattr(env, 'simrate'):
# assume 30hz (hack)
end = time.time()
delaytime = max(0, 1000 / 30000 - (end-start))
time.sleep(delaytime)

print("Eval reward: ", eval_reward)

finally:
termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
Expand Down
32 changes: 29 additions & 3 deletions cassie/cassie.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def __init__(self, traj='walking', simrate=60, clock_based=False, state_est=Fals
self.no_delta = no_delta
self.dynamics_randomization = dynamics_randomization

# Configure reference trajectory to use
# CONFIGURE REF TRAJECTORY to use
if traj == "aslip":
self.speeds = np.array([x / 10 for x in range(0, 21)])
self.trajectories = getAllTrajectories(self.speeds)
Expand Down Expand Up @@ -82,7 +82,12 @@ def __init__(self, traj='walking', simrate=60, clock_based=False, state_est=Fals
self.pos_index = np.array([1,2,3,4,5,6,7,8,9,14,15,16,20,21,22,23,28,29,30,34])
self.vel_index = np.array([0,1,2,3,4,5,6,7,8,12,13,14,18,19,20,21,25,26,27,31])

self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968])
# CONFIGURE OFFSET for No Delta Policies
if self.aslip_traj:
ref_pos, ref_vel = self.get_ref_state(self.phase)
self.offset = ref_pos[self.pos_idx]
else:
self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968])

# global flat foot orientation, can be useful part of reward function:
self.global_initial_foot_orient = np.array([-0.24135469773826795, -0.24244324494623198, -0.6659363823866352, 0.6629463911006771])
Expand Down Expand Up @@ -445,6 +450,21 @@ def reset_for_test(self):

return actor_state

# Helper function for updating the speed, used in visualization tests
def update_speed(self, new_speed):
if self.aslip_traj:
self.speed = new_speed
self.trajectory = self.trajectories[(np.abs(self.speeds - self.speed)).argmin()]
old_phaselen = self.phaselen
self.phaselen = self.trajectory.length - 1
# update phase
self.phase = int(self.phaselen * self.phase / old_phaselen)
# new offset
ref_pos, ref_vel = self.get_ref_state(self.phase)
self.offset = ref_pos[self.pos_idx]
else:
self.speed = new_speed

# NOTE: this reward is slightly different from the one in Xie et al
# see notes for details
def compute_reward(self, action):
Expand Down Expand Up @@ -491,7 +511,8 @@ def get_ref_state(self, phase=None):
pos[1] = 0

vel = np.copy(self.trajectory.qvel[phase * self.simrate]) if not self.aslip_traj else np.copy(self.trajectory.qvel[phase])
vel[0] *= self.speed
if not self.aslip_traj:
vel[0] *= self.speed

return pos, vel

Expand All @@ -514,19 +535,24 @@ def get_full_state(self):
# trajectory despite being global coord. Y is only invariant to straight
# line trajectories.

# CLOCK BASED (NO TRAJECTORY)
if self.clock_based and not self.aslip_traj:
clock = [np.sin(2 * np.pi * self.phase / self.phaselen),
np.cos(2 * np.pi * self.phase / self.phaselen)]

ext_state = np.concatenate((clock, [self.speed]))

# ASLIP TRAJECTORY
elif self.aslip_traj:
if(self.phase == 0):
ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.phaselen - 1))
else:
ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.phase))

# OTHER TRAJECTORY
else:
ext_state = np.concatenate([ref_pos[self.pos_index], ref_vel[self.vel_index]])
print(ext_state.shape)

# Use state estimator
robot_state = np.concatenate([
Expand Down
49 changes: 19 additions & 30 deletions deprecated/renderpol.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,29 @@
import sys
sys.path.append("..") # Adds higher directory to python modules path.

from rl.utils import renderpolicy, rendermultipolicy, renderpolicy_speedinput, rendermultipolicy_speedinput
from rl.distributions.gaussian import GaussianMLP
from cassie import CassieEnv
# from rl.policies import GaussianMLP, BetaMLP

# from cassie.slipik_env import CassieIKEnv
# from cassie.no_delta_env import CassieEnv_nodelta
# from cassie.speed_env import CassieEnv_speed
# from cassie.speed_double_freq_env import CassieEnv_speed_dfreq
# from cassie.speed_no_delta_env import CassieEnv_speed_no_delta
# from cassie.speed_no_delta_neutral_foot_env import CassieEnv_speed_no_delta_neutral_foot
# from cassie.standing_env import CassieEnv_stand
# from cassie.speed_sidestep_env import CassieEnv_speed_sidestep
from cassie.aslipik_unified_env import UnifiedCassieIKEnv
from cassie.aslipik_unified_env_alt_reward import UnifiedCassieIKEnvAltReward
from cassie.aslipik_unified_env_task_reward import UnifiedCassieIKEnvTaskReward
from cassie.aslipik_unified_no_delta_env import UnifiedCassieIKEnvNoDelta

import torch

import numpy as np
import os
import time

# cassie_env = CassieEnv("walking", clock_based=True, state_est=True)
# cassie_env = CassieEnv_nodelta("walking", clock_based=True, state_est=False)
# cassie_env = CassieEnv_speed("walking", clock_based=True, state_est=True)
# cassie_env = CassieEnv_speed_dfreq("walking", clock_based=True, state_est=False)
# cassie_env = CassieEnv_speed_no_delta("walking", clock_based=True, state_est=False)
# cassie_env = CassieEnv_speed_no_delta_neutral_foot("walking", clock_based=True, state_est=True)
# cassie_env = CassieEnv_speed_sidestep("walking", clock_based=True, state_est=True)
cassie_env = UnifiedCassieIKEnvNoDelta("walking", clock_based=True, state_est=True, debug=True)
# cassie_env = CassieEnv_stand(state_est=False)

# policy = torch.load("./trained_models/stiff_spring/stiff_StateEst_speed2.pt")
# policy = torch.load("./trained_models/sidestep_StateEst_footxypenaltysmall_forcepenalty_hipyaw_limittargs_pelaccel3_speed-05-1_side03_freq1.pt")
policy = torch.load("./trained_models/aslip_unified_no_delta_70_TS_only.pt")
# policy = torch.load("./trained_models/aslip_unified_no_delta_0_v4.pt")
import argparse
import pickle

parser = argparse.ArgumentParser()
parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details")
args = parser.parse_args()
run_args = pickle.load(open(args.path + "experiment.pkl", "rb"))

cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta)
policy = torch.load(args.path + "actor.pt")
policy.eval()
renderpolicy_speedinput(cassie_env, policy, deterministic=True, dt=0.05, speedup = 2)

# cassie_env = CassieEnv(traj="aslip", clock_based=False, state_est=True, dynamics_randomization=False, no_delta=False)
# policy = torch.load(args.path + "aslip_unified_task10_v7.pt")
# policy.eval()

renderpolicy_speedinput(cassie_env, policy, deterministic=False, dt=0.05, speedup = 2)
File renamed without changes
9 changes: 4 additions & 5 deletions tools/eval_perturb.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def compute_perturbs(cassie_env, policy, wait_time, perturb_duration, perturb_si
print("search time: ", time.time() - curr_start)

print("Total compute time: ", time.time() - eval_start)
# np.save("test_perturb_eval_phase.npy", max_force)
np.save("test_perturb_eval_phase.npy", max_force)

@ray.remote
@torch.no_grad()
Expand Down Expand Up @@ -226,10 +226,10 @@ def plot_perturb(filename):
ax_image.imshow(img, alpha=.3)
ax_image.axis('off')
plt.show()
# plt.savefig("./test_perturb_eval_phase.png")
plt.savefig("./test_perturb_eval_phase.png")

# plot_perturb("./test_perturb_eval_phase.npy")
# exit()
plot_perturb("./test_perturb_eval_phase.npy")
exit()

import argparse
import pickle
Expand All @@ -246,7 +246,6 @@ def plot_perturb(filename):
# env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True)
cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random)
policy = torch.load(args.path + "actor.pt")
policy.eval()

wait_time = 4
perturb_duration = 0.2
Expand Down
Loading

0 comments on commit 875b9ad

Please sign in to comment.