Skip to content

Commit

Permalink
Merge pull request utiasDSL#95 from spencerteetaert/pypi
Browse files Browse the repository at this point in the history
Spencer's PyPI Changes
  • Loading branch information
JacopoPan authored Jun 21, 2022
2 parents 36da0bf + 2f84a2b commit d27f30d
Show file tree
Hide file tree
Showing 44 changed files with 747 additions and 364 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/push.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Test package quality

on: push

permissions:
contents: read

jobs:
run_tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
- name: Install package
run: |
pip install -e .
- name: Unit tests
run: |
python -m pytest tests/
rm -rf tmp/
70 changes: 70 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
# This workflow will upload a Python Package using Twine when a release is created
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries

# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.

name: Publish Python package to PyPi

on:
release:
types: [published]

permissions:
contents: read

jobs:
run_tests:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
- name: Install package
run: |
pip install -e .
- name: Unit tests
run: |
python -m pytest tests/
rm -rf tmp/
deploy:
needs: run_tests
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v3
with:
python-version: '3.8'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install poetry
- name: Build package
run: |
rm -rf dist/
poetry build
- name: Publish distribution 📦 to Test PyPI
uses: pypa/gh-action-pypi-publish@master
with:
password: ${{ secrets.TEST_PYPI_API_TOKEN }}
repository_url: https://test.pypi.org/legacy/
# - name: pypi-publish
# # You may pin to the exact commit or the version.
# uses: pypa/gh-action-pypi-publish@release/v1
# with:
# # PyPI user
# user: __token__
# # Password for your PyPI user or an access token
# password: ${{ secrets.PYPI_API_TOKEN }}
# # The repository URL to use
# repository_url: https://github.com/utiasDSL/gym-pybullet-drones
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ examples/test.py
# NumPy saves and videos
files/logs/*.npy
files/videos/*.mp4
results/
tmp/

# Learning results
experiments/learning/results/save-*
Expand Down Expand Up @@ -41,6 +43,7 @@ share/python-wheels/
.installed.cfg
*.egg
MANIFEST
.vscode/

# PyInstaller
# Usually these files are written by a python script from a template
Expand Down
2 changes: 1 addition & 1 deletion assignments/aer1216_fall2020_hw2_sim.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
from gym_pybullet_drones.envs.CtrlAviary import CtrlAviary
from gym_pybullet_drones.utils.Logger import Logger
from gym_pybullet_drones.utils.utils import sync
from gym_pybullet_drones.envs.BaseAviary import DroneModel
from gym_pybullet_drones.utils.enums import DroneModel
from aer1216_fall2020_hw2_ctrl import HW2Control

DURATION = 10
Expand Down
8 changes: 8 additions & 0 deletions build_project.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
echo "Y" | pip uninstall gym_pybullet_drones
rm -rf dist/
poetry build
pip install dist/gym_pybullet_drones-1.0.0-py3-none-any.whl
cd tests
python test_build.py
rm -rf results
cd ..
2 changes: 1 addition & 1 deletion experiments/learning/multiagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@
from ray.rllib.policy.sample_batch import SampleBatch
from ray.rllib.env.multi_agent_env import ENV_STATE

from gym_pybullet_drones.envs.BaseAviary import DroneModel, Physics
from gym_pybullet_drones.utils.enums import DroneModel, Physics
from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary
Expand Down
102 changes: 62 additions & 40 deletions experiments/learning/singleagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,72 +56,79 @@
EPISODE_REWARD_THRESHOLD = -0 # Upperbound: rewards are always negative, but non-zero
"""float: Reward threshold to halt the script."""

if __name__ == "__main__":
DEFAULT_ENV = 'hover'
DEFAULT_ALGO = 'ppo'
DEFAULT_OBS = ObservationType('kin')
DEFAULT_ACT = ActionType('one_d_rpm')
DEFAULT_CPU = 1
DEFAULT_STEPS = 35000
DEFAULT_OUTPUT_FOLDER = 'results'

#### Define and parse (optional) arguments for the script ##
parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script')
parser.add_argument('--env', default='hover', type=str, choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='')
parser.add_argument('--algo', default='ppo', type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='')
parser.add_argument('--obs', default='kin', type=ObservationType, help='Observation space (default: kin)', metavar='')
parser.add_argument('--act', default='one_d_rpm', type=ActionType, help='Action space (default: one_d_rpm)', metavar='')
parser.add_argument('--cpu', default='1', type=int, help='Number of training environments (default: 1)', metavar='')
ARGS = parser.parse_args()
def run(
env=DEFAULT_ENV,
algo=DEFAULT_ALGO,
obs=DEFAULT_OBS,
act=DEFAULT_ACT,
cpu=DEFAULT_CPU,
steps=DEFAULT_STEPS,
output_folder=DEFAULT_OUTPUT_FOLDER
):

#### Save directory ########################################
filename = os.path.dirname(os.path.abspath(__file__))+'/results/save-'+ARGS.env+'-'+ARGS.algo+'-'+ARGS.obs.value+'-'+ARGS.act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S")
filename = os.path.join(output_folder, 'save-'+env+'-'+algo+'-'+obs.value+'-'+act.value+'-'+datetime.now().strftime("%m.%d.%Y_%H.%M.%S"))
if not os.path.exists(filename):
os.makedirs(filename+'/')

#### Print out current git commit hash #####################
if platform == "linux" or platform == "darwin":
if (platform == "linux" or platform == "darwin") and ('GITHUB_ACTIONS' not in os.environ.keys()):
git_commit = subprocess.check_output(["git", "describe", "--tags"]).strip()
with open(filename+'/git_commit.txt', 'w+') as f:
f.write(str(git_commit))

#### Warning ###############################################
if ARGS.env == 'tune' and ARGS.act != ActionType.TUN:
if env == 'tune' and act != ActionType.TUN:
print("\n\n\n[WARNING] TuneAviary is intended for use with ActionType.TUN\n\n\n")
if ARGS.act == ActionType.ONE_D_RPM or ARGS.act == ActionType.ONE_D_DYN or ARGS.act == ActionType.ONE_D_PID:
if act == ActionType.ONE_D_RPM or act == ActionType.ONE_D_DYN or act == ActionType.ONE_D_PID:
print("\n\n\n[WARNING] Simplified 1D problem for debugging purposes\n\n\n")
#### Errors ################################################
if not ARGS.env in ['takeoff', 'hover']:
if not env in ['takeoff', 'hover']:
print("[ERROR] 1D action space is only compatible with Takeoff and HoverAviary")
exit()
if ARGS.act == ActionType.TUN and ARGS.env != 'tune' :
if act == ActionType.TUN and env != 'tune' :
print("[ERROR] ActionType.TUN is only compatible with TuneAviary")
exit()
if ARGS.algo in ['sac', 'td3', 'ddpg'] and ARGS.cpu!=1:
if algo in ['sac', 'td3', 'ddpg'] and cpu!=1:
print("[ERROR] The selected algorithm does not support multiple environments")
exit()

#### Uncomment to debug slurm scripts ######################
# exit()

env_name = ARGS.env+"-aviary-v0"
sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=ARGS.obs, act=ARGS.act)
# train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=ARGS.obs, act=ARGS.act) # single environment instead of a vectorized one
env_name = env+"-aviary-v0"
sa_env_kwargs = dict(aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act)
# train_env = gym.make(env_name, aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS, obs=obs, act=act) # single environment instead of a vectorized one
if env_name == "takeoff-aviary-v0":
train_env = make_vec_env(TakeoffAviary,
env_kwargs=sa_env_kwargs,
n_envs=ARGS.cpu,
n_envs=cpu,
seed=0
)
if env_name == "hover-aviary-v0":
train_env = make_vec_env(HoverAviary,
env_kwargs=sa_env_kwargs,
n_envs=ARGS.cpu,
n_envs=cpu,
seed=0
)
if env_name == "flythrugate-aviary-v0":
train_env = make_vec_env(FlyThruGateAviary,
env_kwargs=sa_env_kwargs,
n_envs=ARGS.cpu,
n_envs=cpu,
seed=0
)
if env_name == "tune-aviary-v0":
train_env = make_vec_env(TuneAviary,
env_kwargs=sa_env_kwargs,
n_envs=ARGS.cpu,
n_envs=cpu,
seed=0
)
print("[INFO] Action space:", train_env.action_space)
Expand All @@ -132,25 +139,25 @@
onpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
net_arch=[512, 512, dict(vf=[256, 128], pi=[256, 128])]
) # or None
if ARGS.algo == 'a2c':
if algo == 'a2c':
model = A2C(a2cppoMlpPolicy,
train_env,
policy_kwargs=onpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
) if ARGS.obs == ObservationType.KIN else A2C(a2cppoCnnPolicy,
) if obs == ObservationType.KIN else A2C(a2cppoCnnPolicy,
train_env,
policy_kwargs=onpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
)
if ARGS.algo == 'ppo':
if algo == 'ppo':
model = PPO(a2cppoMlpPolicy,
train_env,
policy_kwargs=onpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
) if ARGS.obs == ObservationType.KIN else PPO(a2cppoCnnPolicy,
) if obs == ObservationType.KIN else PPO(a2cppoCnnPolicy,
train_env,
policy_kwargs=onpolicy_kwargs,
tensorboard_log=filename+'/tb/',
Expand All @@ -161,51 +168,51 @@
offpolicy_kwargs = dict(activation_fn=torch.nn.ReLU,
net_arch=[512, 512, 256, 128]
) # or None # or dict(net_arch=dict(qf=[256, 128, 64, 32], pi=[256, 128, 64, 32]))
if ARGS.algo == 'sac':
if algo == 'sac':
model = SAC(sacMlpPolicy,
train_env,
policy_kwargs=offpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
) if ARGS.obs==ObservationType.KIN else SAC(sacCnnPolicy,
) if obs==ObservationType.KIN else SAC(sacCnnPolicy,
train_env,
policy_kwargs=offpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
)
if ARGS.algo == 'td3':
if algo == 'td3':
model = TD3(td3ddpgMlpPolicy,
train_env,
policy_kwargs=offpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
) if ARGS.obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy,
) if obs==ObservationType.KIN else TD3(td3ddpgCnnPolicy,
train_env,
policy_kwargs=offpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
)
if ARGS.algo == 'ddpg':
if algo == 'ddpg':
model = DDPG(td3ddpgMlpPolicy,
train_env,
policy_kwargs=offpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
) if ARGS.obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy,
) if obs==ObservationType.KIN else DDPG(td3ddpgCnnPolicy,
train_env,
policy_kwargs=offpolicy_kwargs,
tensorboard_log=filename+'/tb/',
verbose=1
)

#### Create eveluation environment #########################
if ARGS.obs == ObservationType.KIN:
if obs == ObservationType.KIN:
eval_env = gym.make(env_name,
aggregate_phy_steps=shared_constants.AGGR_PHY_STEPS,
obs=ARGS.obs,
act=ARGS.act
obs=obs,
act=act
)
elif ARGS.obs == ObservationType.RGB:
elif obs == ObservationType.RGB:
if env_name == "takeoff-aviary-v0":
eval_env = make_vec_env(TakeoffAviary,
env_kwargs=sa_env_kwargs,
Expand Down Expand Up @@ -242,11 +249,11 @@
verbose=1,
best_model_save_path=filename+'/',
log_path=filename+'/',
eval_freq=int(2000/ARGS.cpu),
eval_freq=int(2000/cpu),
deterministic=True,
render=False
)
model.learn(total_timesteps=35000, #int(1e12),
model.learn(total_timesteps=steps, #int(1e12),
callback=eval_callback,
log_interval=100,
)
Expand All @@ -258,4 +265,19 @@
#### Print training progression ############################
with np.load(filename+'/evaluations.npz') as data:
for j in range(data['timesteps'].shape[0]):
print(str(data['timesteps'][j])+","+str(data['results'][j][0][0]))
print(str(data['timesteps'][j])+","+str(data['results'][j][0]))


if __name__ == "__main__":
#### Define and parse (optional) arguments for the script ##
parser = argparse.ArgumentParser(description='Single agent reinforcement learning experiments script')
parser.add_argument('--env', default=DEFAULT_ENV, type=str, choices=['takeoff', 'hover', 'flythrugate', 'tune'], help='Task (default: hover)', metavar='')
parser.add_argument('--algo', default=DEFAULT_ALGO, type=str, choices=['a2c', 'ppo', 'sac', 'td3', 'ddpg'], help='RL agent (default: ppo)', metavar='')
parser.add_argument('--obs', default=DEFAULT_OBS, type=ObservationType, help='Observation space (default: kin)', metavar='')
parser.add_argument('--act', default=DEFAULT_ACT, type=ActionType, help='Action space (default: one_d_rpm)', metavar='')
parser.add_argument('--cpu', default=DEFAULT_CPU, type=int, help='Number of training environments (default: 1)', metavar='')
parser.add_argument('--steps', default=DEFAULT_STEPS, type=int, help='Number of training time steps (default: 35000)', metavar='')
parser.add_argument('--output_folder', default=DEFAULT_OUTPUT_FOLDER, type=str, help='Folder where to save logs (default: "results")', metavar='')
ARGS = parser.parse_args()

run(**vars(ARGS))
2 changes: 1 addition & 1 deletion experiments/learning/test_multiagent.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
from ray.rllib.models import ModelCatalog
from ray.rllib.policy.sample_batch import SampleBatch

from gym_pybullet_drones.envs.BaseAviary import DroneModel, Physics
from gym_pybullet_drones.utils.enums import DroneModel, Physics
from gym_pybullet_drones.envs.multi_agent_rl.FlockAviary import FlockAviary
from gym_pybullet_drones.envs.multi_agent_rl.LeaderFollowerAviary import LeaderFollowerAviary
from gym_pybullet_drones.envs.multi_agent_rl.MeetupAviary import MeetupAviary
Expand Down
Loading

0 comments on commit d27f30d

Please sign in to comment.