diff --git a/apex.py b/apex.py index 790d0964..240b412c 100644 --- a/apex.py +++ b/apex.py @@ -144,6 +144,7 @@ def eval_policy(policy, args, run_args): import termios import select import numpy as np + from cassie import CassieEnv, CassieStandingEnv def isData(): return select.select([sys.stdin], [], [], 0) == ([sys.stdin], [], []) @@ -151,8 +152,12 @@ def isData(): max_traj_len = args.traj_len visualize = True - env = env_factory(run_args.env_name, traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, mirror=run_args.mirror, clock_based=run_args.clock_based, history=run_args.history)() + if run_args.env_name == "Cassie-v0": + env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history) + else: + env = CassieStandingEnv(state_est=run_args.state_est) + old_settings = termios.tcgetattr(sys.stdin) orient_add = 0 @@ -160,79 +165,79 @@ def isData(): try: tty.setcbreak(sys.stdin.fileno()) + state = env.reset_for_test() + done = False + timesteps = 0 + eval_reward = 0 + speed = 0.0 + while True: + + if isData(): + c = sys.stdin.read(1) + if c == 'w': + speed += 0.1 + elif c == 's': + speed -= 0.1 + elif c == 'l': + orient_add += .1 + print("Increasing orient_add to: ", orient_add) + elif c == 'k': + orient_add -= .1 + print("Decreasing orient_add to: ", orient_add) + elif c == 'p': + push = 100 + push_dir = 2 + force_arr = np.zeros(6) + force_arr[push_dir] = push + env.sim.apply_force(force_arr) + + env.update_speed(speed) + print("speed: ", env.speed) - state = env.reset() - done = False - timesteps = 0 - eval_reward = 0 - - while not done and timesteps < max_traj_len: - - if isData(): - c = sys.stdin.read(1) - if c == 'w': - env.speed += .1 - print("speed: ", env.speed) - elif c == 's': - env.speed -= .1 - print("speed: ", env.speed) - elif c == 'l': - orient_add += .1 - print("Increasing orient_add to: ", orient_add) - elif c == 'k': - orient_add -= .1 - print("Decreasing orient_add to: ", orient_add) - elif c == 'p': - push = 100 - push_dir = 2 - force_arr = np.zeros(6) - force_arr[push_dir] = push - env.sim.apply_force(force_arr) - - # Update Orientation - quaternion = euler2quat(z=orient_add, y=0, x=0) - iquaternion = inverse_quaternion(quaternion) - - if env.state_est: - curr_orient = state[1:5] - curr_transvel = state[14:17] - else: - curr_orient = state[2:6] - curr_transvel = state[20:23] - - new_orient = quaternion_product(iquaternion, curr_orient) + # Update Orientation + quaternion = euler2quat(z=orient_add, y=0, x=0) + iquaternion = inverse_quaternion(quaternion) - if new_orient[0] < 0: - new_orient = -new_orient + if env.state_est: + curr_orient = state[1:5] + curr_transvel = state[14:17] + else: + curr_orient = state[2:6] + curr_transvel = state[20:23] + + new_orient = quaternion_product(iquaternion, curr_orient) + + if new_orient[0] < 0: + new_orient = -new_orient + + new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) + + if env.state_est: + state[1:5] = torch.FloatTensor(new_orient) + state[14:17] = torch.FloatTensor(new_translationalVelocity) + # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. + else: + state[2:6] = torch.FloatTensor(new_orient) + state[20:23] = torch.FloatTensor(new_translationalVelocity) - new_translationalVelocity = rotate_by_quaternion(curr_transvel, iquaternion) + if hasattr(env, 'simrate'): + start = time.time() - if env.state_est: - state[1:5] = torch.FloatTensor(new_orient) - state[14:17] = torch.FloatTensor(new_translationalVelocity) - # state[0] = 1 # For use with StateEst. Replicate hack that height is always set to one on hardware. - else: - state[2:6] = torch.FloatTensor(new_orient) - state[20:23] = torch.FloatTensor(new_translationalVelocity) - - if hasattr(env, 'simrate'): - start = time.time() - - action = policy.forward(torch.Tensor(state)).detach().numpy() - state, reward, done, _ = env.step(action) - if visualize: - env.render() - eval_reward += reward - timesteps += 1 - - if hasattr(env, 'simrate'): - # assume 30hz (hack) - end = time.time() - delaytime = max(0, 1000 / 30000 - (end-start)) - time.sleep(delaytime) - - print("Eval reward: ", eval_reward) + action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() + state, reward, done, _ = env.step(action) + if visualize: + env.render() + eval_reward += reward + timesteps += 1 + + if hasattr(env, 'simrate'): + # assume 30hz (hack) + end = time.time() + delaytime = max(0, 1000 / 30000 - (end-start)) + time.sleep(delaytime) + + print("Eval reward: ", eval_reward) finally: termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) diff --git a/cassie/cassie.py b/cassie/cassie.py index 3f14cb9d..ca2230f6 100644 --- a/cassie/cassie.py +++ b/cassie/cassie.py @@ -25,7 +25,7 @@ def __init__(self, traj='walking', simrate=60, clock_based=False, state_est=Fals self.no_delta = no_delta self.dynamics_randomization = dynamics_randomization - # Configure reference trajectory to use + # CONFIGURE REF TRAJECTORY to use if traj == "aslip": self.speeds = np.array([x / 10 for x in range(0, 21)]) self.trajectories = getAllTrajectories(self.speeds) @@ -82,7 +82,12 @@ def __init__(self, traj='walking', simrate=60, clock_based=False, state_est=Fals self.pos_index = np.array([1,2,3,4,5,6,7,8,9,14,15,16,20,21,22,23,28,29,30,34]) self.vel_index = np.array([0,1,2,3,4,5,6,7,8,12,13,14,18,19,20,21,25,26,27,31]) - self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) + # CONFIGURE OFFSET for No Delta Policies + if self.aslip_traj: + ref_pos, ref_vel = self.get_ref_state(self.phase) + self.offset = ref_pos[self.pos_idx] + else: + self.offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) # global flat foot orientation, can be useful part of reward function: self.global_initial_foot_orient = np.array([-0.24135469773826795, -0.24244324494623198, -0.6659363823866352, 0.6629463911006771]) @@ -445,6 +450,21 @@ def reset_for_test(self): return actor_state + # Helper function for updating the speed, used in visualization tests + def update_speed(self, new_speed): + if self.aslip_traj: + self.speed = new_speed + self.trajectory = self.trajectories[(np.abs(self.speeds - self.speed)).argmin()] + old_phaselen = self.phaselen + self.phaselen = self.trajectory.length - 1 + # update phase + self.phase = int(self.phaselen * self.phase / old_phaselen) + # new offset + ref_pos, ref_vel = self.get_ref_state(self.phase) + self.offset = ref_pos[self.pos_idx] + else: + self.speed = new_speed + # NOTE: this reward is slightly different from the one in Xie et al # see notes for details def compute_reward(self, action): @@ -491,7 +511,8 @@ def get_ref_state(self, phase=None): pos[1] = 0 vel = np.copy(self.trajectory.qvel[phase * self.simrate]) if not self.aslip_traj else np.copy(self.trajectory.qvel[phase]) - vel[0] *= self.speed + if not self.aslip_traj: + vel[0] *= self.speed return pos, vel @@ -514,19 +535,24 @@ def get_full_state(self): # trajectory despite being global coord. Y is only invariant to straight # line trajectories. + # CLOCK BASED (NO TRAJECTORY) if self.clock_based and not self.aslip_traj: clock = [np.sin(2 * np.pi * self.phase / self.phaselen), np.cos(2 * np.pi * self.phase / self.phaselen)] ext_state = np.concatenate((clock, [self.speed])) + # ASLIP TRAJECTORY elif self.aslip_traj: if(self.phase == 0): ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.phaselen - 1)) else: ext_state = np.concatenate(get_ref_aslip_ext_state(self, self.phase)) + + # OTHER TRAJECTORY else: ext_state = np.concatenate([ref_pos[self.pos_index], ref_vel[self.vel_index]]) + print(ext_state.shape) # Use state estimator robot_state = np.concatenate([ diff --git a/deprecated/renderpol.py b/deprecated/renderpol.py index 2e1f5706..69f092ff 100644 --- a/deprecated/renderpol.py +++ b/deprecated/renderpol.py @@ -1,20 +1,8 @@ +import sys +sys.path.append("..") # Adds higher directory to python modules path. + from rl.utils import renderpolicy, rendermultipolicy, renderpolicy_speedinput, rendermultipolicy_speedinput -from rl.distributions.gaussian import GaussianMLP from cassie import CassieEnv -# from rl.policies import GaussianMLP, BetaMLP - -# from cassie.slipik_env import CassieIKEnv -# from cassie.no_delta_env import CassieEnv_nodelta -# from cassie.speed_env import CassieEnv_speed -# from cassie.speed_double_freq_env import CassieEnv_speed_dfreq -# from cassie.speed_no_delta_env import CassieEnv_speed_no_delta -# from cassie.speed_no_delta_neutral_foot_env import CassieEnv_speed_no_delta_neutral_foot -# from cassie.standing_env import CassieEnv_stand -# from cassie.speed_sidestep_env import CassieEnv_speed_sidestep -from cassie.aslipik_unified_env import UnifiedCassieIKEnv -from cassie.aslipik_unified_env_alt_reward import UnifiedCassieIKEnvAltReward -from cassie.aslipik_unified_env_task_reward import UnifiedCassieIKEnvTaskReward -from cassie.aslipik_unified_no_delta_env import UnifiedCassieIKEnvNoDelta import torch @@ -22,19 +10,20 @@ import os import time -# cassie_env = CassieEnv("walking", clock_based=True, state_est=True) -# cassie_env = CassieEnv_nodelta("walking", clock_based=True, state_est=False) -# cassie_env = CassieEnv_speed("walking", clock_based=True, state_est=True) -# cassie_env = CassieEnv_speed_dfreq("walking", clock_based=True, state_est=False) -# cassie_env = CassieEnv_speed_no_delta("walking", clock_based=True, state_est=False) -# cassie_env = CassieEnv_speed_no_delta_neutral_foot("walking", clock_based=True, state_est=True) -# cassie_env = CassieEnv_speed_sidestep("walking", clock_based=True, state_est=True) -cassie_env = UnifiedCassieIKEnvNoDelta("walking", clock_based=True, state_est=True, debug=True) -# cassie_env = CassieEnv_stand(state_est=False) - -# policy = torch.load("./trained_models/stiff_spring/stiff_StateEst_speed2.pt") -# policy = torch.load("./trained_models/sidestep_StateEst_footxypenaltysmall_forcepenalty_hipyaw_limittargs_pelaccel3_speed-05-1_side03_freq1.pt") -policy = torch.load("./trained_models/aslip_unified_no_delta_70_TS_only.pt") -# policy = torch.load("./trained_models/aslip_unified_no_delta_0_v4.pt") +import argparse +import pickle + +parser = argparse.ArgumentParser() +parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details") +args = parser.parse_args() +run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) + +cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta) +policy = torch.load(args.path + "actor.pt") policy.eval() -renderpolicy_speedinput(cassie_env, policy, deterministic=True, dt=0.05, speedup = 2) \ No newline at end of file + +# cassie_env = CassieEnv(traj="aslip", clock_based=False, state_est=True, dynamics_randomization=False, no_delta=False) +# policy = torch.load(args.path + "aslip_unified_task10_v7.pt") +# policy.eval() + +renderpolicy_speedinput(cassie_env, policy, deterministic=False, dt=0.05, speedup = 2) \ No newline at end of file diff --git a/cassie_top_white.png b/tools/cassie_top_white.png similarity index 100% rename from cassie_top_white.png rename to tools/cassie_top_white.png diff --git a/tools/eval_perturb.py b/tools/eval_perturb.py index 4aad8f8a..b08c0941 100644 --- a/tools/eval_perturb.py +++ b/tools/eval_perturb.py @@ -81,7 +81,7 @@ def compute_perturbs(cassie_env, policy, wait_time, perturb_duration, perturb_si print("search time: ", time.time() - curr_start) print("Total compute time: ", time.time() - eval_start) - # np.save("test_perturb_eval_phase.npy", max_force) + np.save("test_perturb_eval_phase.npy", max_force) @ray.remote @torch.no_grad() @@ -226,10 +226,10 @@ def plot_perturb(filename): ax_image.imshow(img, alpha=.3) ax_image.axis('off') plt.show() - # plt.savefig("./test_perturb_eval_phase.png") + plt.savefig("./test_perturb_eval_phase.png") -# plot_perturb("./test_perturb_eval_phase.npy") -# exit() +plot_perturb("./test_perturb_eval_phase.npy") +exit() import argparse import pickle @@ -246,7 +246,6 @@ def plot_perturb(filename): # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True) cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) policy = torch.load(args.path + "actor.pt") -policy.eval() wait_time = 4 perturb_duration = 0.2 diff --git a/tools/plotData.py b/tools/plotData.py index a94bc03e..31e7d8c9 100644 --- a/tools/plotData.py +++ b/tools/plotData.py @@ -24,25 +24,24 @@ # Load environment and policy # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True) -cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) +cassie_env = CassieEnv(traj=run_args.traj, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, clock_based=run_args.clock_based, history=run_args.history) policy = torch.load(args.path + "actor.pt") -policy.eval() def avg_pols(policies, state): total_act = np.zeros(10) for policy in policies: - _, action = policy(state, False) - total_act += action.data[0].numpy() + action = policy.forward(torch.Tensor(state), True).detach().numpy() + total_act += action return total_act / len(policies) obs_dim = cassie_env.observation_space.shape[0] # TODO: could make obs and ac space static properties action_dim = cassie_env.action_space.shape[0] do_multi = False -no_delta = True +no_delta = cassie_env.no_delta limittargs = False lininterp = False -offset = np.array([0.0045, 0.0, 0.4973, -1.1997, -1.5968, 0.0045, 0.0, 0.4973, -1.1997, -1.5968]) +offset = cassie_env.offset policies = [] if do_multi: @@ -58,7 +57,6 @@ def avg_pols(policies, state): # policies.append(policy) for i in [1, 2, 3, 5]: policy = torch.load("./trained_models/stiff_spring/stiff_StateEst_speed{}.pt".format(i)) - policy.eval() policies.append(policy) num_steps = 100 @@ -83,22 +81,20 @@ def avg_pols(policies, state): vel_idx = [6, 7, 8, 12, 18, 19, 20, 21, 25, 31] # Execute policy and save torques with torch.no_grad(): - state = torch.Tensor(cassie_env.reset_for_test()) + state = cassie_env.reset_for_test() cassie_env.speed = 0 # cassie_env.side_speed = .2 - cassie_env.phase_add = 1 for i in range(pre_steps): if not do_multi: - action = policy(state, True) - state, reward, done, _ = cassie_env.step(action.data.numpy()) + action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() + state, reward, done, _ = cassie_env.step(action) else: action = avg_pols(policies, state) state, reward, done, _ = cassie_env.step(action) state = torch.Tensor(state) for i in range(num_steps): if not do_multi: - action = policy(state, True) - action = action.data.numpy() + action = policy.forward(torch.Tensor(state), deterministic=True).detach().numpy() else: action = avg_pols(policies, state) # state, reward, done, _ = cassie_env.step(action) @@ -166,7 +162,6 @@ def avg_pols(policies, state): cassie_env.counter += 1 state = cassie_env.get_full_state() - state = torch.Tensor(state) # Graph torque data fig, ax = plt.subplots(2, 5, figsize=(15, 5)) diff --git a/tools/policy_eval_suite.py b/tools/policy_eval_suite.py index c218623f..63c8e02a 100644 --- a/tools/policy_eval_suite.py +++ b/tools/policy_eval_suite.py @@ -1,3 +1,6 @@ +import time +import numpy as np +import torch #validate the sensitivity of policy to environment parameters ##########To do######### @@ -5,21 +8,23 @@ #Get to run with user input of env and policy without iteration #Iterate through each parameter -def iterativeValidation(policy, env, max_traj_len=1000, visualize=True, env_name=None, speed=0.0, state_est=True, clock_based=False): - # Follow apex.py and use an env_factory, or expect an env on input? - - #if env_name is None: - # env = env_factory(policy.env_name, speed=speed, state_est=state_est, clock_based=clock_based)() - #else: - # env = env_factory(env_name, speed=speed, state_est=state_est, clock_based=clock_based)() +def iterativeValidation(cassie_env, policy): + state = torch.Tensor(cassie_env.reset_for_test()) + eval_start = time.time() # This stuff was ripped out from Jonah's dynamics randomization. # We are leaving it as is while building the infrastructure for testing. # Future plan: Put all the damp/mass ranges into a tuple and iterate through, # at each iteration running a test with the current parameter randomized, and all # others default. Then log results and output in a sane and useful way. - damp = env.default_damping + # TODO: Edit below into usable format for setting up a sweep of values for + # dof damping and mass changing. Also setup ground friction values. The + # ultimate plan is to sweep over all joints and bodies that we care about + # and change their values, then run the sim for a few seconds and see if + # cassie falls over. then we will return an array of values representing + # the survival data for every parameter. + damp = cassie_env.default_damping weak_factor = 0.5 strong_factor = 1.5 pelvis_damp_range = [[damp[0], damp[0]], @@ -33,19 +38,19 @@ def iterativeValidation(policy, env, max_traj_len=1000, visualize=True, env_name [damp[7]*weak_factor, damp[7]*strong_factor], [damp[8]*weak_factor, damp[8]*strong_factor]] # 6->8 and 19->21 - achilles_damp_range = [[damp[9]*weak_factor, damp[9]*strong_factor], - [damp[10]*weak_factor, damp[10]*strong_factor], - [damp[11]*weak_factor, damp[11]*strong_factor]] # 9->11 and 22->24 + #achilles_damp_range = [[damp[9]*weak_factor, damp[9]*strong_factor], + # [damp[10]*weak_factor, damp[10]*strong_factor], + # [damp[11]*weak_factor, damp[11]*strong_factor]] # 9->11 and 22->24 knee_damp_range = [[damp[12]*weak_factor, damp[12]*strong_factor]] # 12 and 25 shin_damp_range = [[damp[13]*weak_factor, damp[13]*strong_factor]] # 13 and 26 tarsus_damp_range = [[damp[14], damp[14]]] # 14 and 27 - heel_damp_range = [[damp[15], damp[15]]] # 15 and 28 - fcrank_damp_range = [[damp[16]*weak_factor, damp[16]*strong_factor]] # 16 and 29 - prod_damp_range = [[damp[17], damp[17]]] # 17 and 30 + #heel_damp_range = [[damp[15], damp[15]]] # 15 and 28 + #fcrank_damp_range = [[damp[16]*weak_factor, damp[16]*strong_factor]] # 16 and 29 + #prod_damp_range = [[damp[17], damp[17]]] # 17 and 30 foot_damp_range = [[damp[18]*weak_factor, damp[18]*strong_factor]] # 18 and 31 - side_damp = hip_damp_range + achilles_damp_range + knee_damp_range + shin_damp_range + tarsus_damp_range + heel_damp_range + fcrank_damp_range + prod_damp_range + foot_damp_range + side_damp = hip_damp_range + knee_damp_range + shin_damp_range + tarsus_damp_range + foot_damp_range damp_range = pelvis_damp_range + side_damp + side_damp damp_noise = [np.random.uniform(a, b) for a, b in damp_range] @@ -82,35 +87,29 @@ def iterativeValidation(policy, env, max_traj_len=1000, visualize=True, env_name fric_noise = [np.random.uniform(0.4, 1.4)] + [np.random.uniform(3e-3, 8e-3)] + list(env.default_fric[2:]) - env.sim.set_dof_damping(np.clip(damp_noise, 0, None)) - env.sim.set_body_mass(np.clip(mass_noise, 0, None)) - env.sim.set_body_ipos(com_noise) - env.sim.set_ground_friction(np.clip(fric_noise, 0, None)) - - # From policy_eval - while True: - state = env.reset() - done = False - timesteps = 0 - eval_reward = 0 - while not done and timesteps < max_traj_len: - if hasattr(env, 'simrate'): - start = time.time() - - action = policy.forward(torch.Tensor(state)).detach().numpy() - state, reward, done, _ = env.step(action) - if visualize: - env.render() - eval_reward += reward - timesteps += 1 - - if hasattr(env, 'simrate'): - # assume 30hz (hack) - end = time.time() - delaytime = max(0, 1000 / 30000 - (end-start)) - time.sleep(delaytime) - - print("Eval reward: ", eval_reward) + cassie_env.sim.set_dof_damping(np.clip(damp_noise, 0, None)) + cassie_env.sim.set_body_mass(np.clip(mass_noise, 0, None)) + cassie_env.sim.set_body_ipos(com_noise) + cassie_env.sim.set_ground_friction(np.clip(fric_noise, 0, None)) + + #TODO: Set a range of values to sweep for dof damping + + for i in range(10): # 10 is just a placeholder for how granular + # our sweep will be. + + while not done: + reset(cassie_env, policy) + curr_time = cassie_env.sim.time() + + while curr_time < start_t + wait_time: + action = policy(state, True) + action = action.data.numpy() + state, reward, done, _ = cassie_env.step(action) + state = torch.Tensor(state) + curr_time = cassie_env.sim.time() + if casse_env.sim.qpos()[2] < 0.4: + done = True + break # Testing to see if the above is even working @@ -189,24 +188,3 @@ def iterativeValidation(policy, env, max_traj_len=1000, visualize=True, env_name # [33] Right plantar rod # [34] Right foot (Motor [9], Joint [5]) -# qvel layout -# [ 0] Pelvis x -# [ 1] Pelvis y -# [ 2] Pelvis z -# [ 3] Pelvis orientation wx -# [ 4] Pelvis orientation wy -# [ 5] Pelvis orientation wz -# [ 6] Left hip roll (Motor [0]) -# [ 7] Left hip yaw (Motor [1]) -# [ 8] Left hip pitch (Motor [2]) -# [ 9] Left knee (Motor [3]) -# [10] Left shin (Joint [0]) -# [11] Left tarsus (Joint [1]) -# [12] Left foot (Motor [4], Joint [2]) -# [13] Right hip roll (Motor [5]) -# [14] Right hip yaw (Motor [6]) -# [15] Right hip pitch (Motor [7]) -# [16] Right knee (Motor [8]) -# [17] Right shin (Joint [3]) -# [18] Right tarsus (Joint [4]) -# [19] Right foot (Motor [9], Joint [5]) \ No newline at end of file diff --git a/tools/renderpol.py b/tools/renderpol.py deleted file mode 100644 index 69f092ff..00000000 --- a/tools/renderpol.py +++ /dev/null @@ -1,29 +0,0 @@ -import sys -sys.path.append("..") # Adds higher directory to python modules path. - -from rl.utils import renderpolicy, rendermultipolicy, renderpolicy_speedinput, rendermultipolicy_speedinput -from cassie import CassieEnv - -import torch - -import numpy as np -import os -import time - -import argparse -import pickle - -parser = argparse.ArgumentParser() -parser.add_argument("--path", type=str, default="./trained_models/ppo/Cassie-v0/7b7e24-seed0/", help="path to folder containing policy and run details") -args = parser.parse_args() -run_args = pickle.load(open(args.path + "experiment.pkl", "rb")) - -cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random, no_delta=run_args.no_delta) -policy = torch.load(args.path + "actor.pt") -policy.eval() - -# cassie_env = CassieEnv(traj="aslip", clock_based=False, state_est=True, dynamics_randomization=False, no_delta=False) -# policy = torch.load(args.path + "aslip_unified_task10_v7.pt") -# policy.eval() - -renderpolicy_speedinput(cassie_env, policy, deterministic=False, dt=0.05, speedup = 2) \ No newline at end of file diff --git a/tools/test_perturb_eval_phase.npy b/tools/test_perturb_eval_phase.npy new file mode 100644 index 00000000..eb3a5e63 Binary files /dev/null and b/tools/test_perturb_eval_phase.npy differ diff --git a/tools/vis_perturb.py b/tools/vis_perturb.py index e99001f5..0bfe0340 100644 --- a/tools/vis_perturb.py +++ b/tools/vis_perturb.py @@ -39,7 +39,6 @@ def reset_to_phase(env, policy, phase): # env_fn = partial(CassieEnv_speed_no_delta_neutral_foot, "walking", clock_based=True, state_est=True) cassie_env = CassieEnv(traj=run_args.traj, clock_based=run_args.clock_based, state_est=run_args.state_est, dynamics_randomization=run_args.dyn_random) policy = torch.load(args.path + "actor.pt") -policy.eval() state = torch.Tensor(cassie_env.reset_for_test()) # cassie_env.sim.step_pd(self.u)