formating and added documentation

Unity-Technologies · Sep 18, 2017 · a3f1645 · a3f1645
1 parent 3bc9675
commit a3f1645
Show file tree

Hide file tree

Showing 20 changed files with 558 additions and 492 deletions.
diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
-![alt text](images/banner.png "Unity ML - Agents")
-
+<img src="images/unity-wide.png" align="middle" width="3000"/>
 
 # Unity ML - Agents
 **Unity Machine Learning Agents** allows researchers and developers to create games and simulations using the Unity Editor which serve as environments where intelligent agents can be trained using reinforcement learning, neuroevolution, or other machine learning methods through a simple-to-use Python API. For more information, see the [wiki page](../../wiki).

diff --git a/images/unity-wide.png b/images/unity-wide.png
diff --git a/python/Basics.ipynb b/python/Basics.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "# Unity ML Agents\n",
     "## Environment Basics\n",
-    "This notebook contains a walkthrough of the basic functions of the Python API for Unity ML Agents. For instructions on building a Unity environment, see [here](https://github.com/Unity-Technologies/python-rl-control/tree/master/Projects)."
+    "This notebook contains a walkthrough of the basic functions of the Python API for Unity ML Agents. For instructions on building a Unity environment, see [here](https://github.com/Unity-Technologies/ml-agents/wiki/Getting-Started-with-Balance-Ball)."
    ]
   },
   {
@@ -66,7 +66,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "env = UnityEnvironment(file_name=env_name)\n",
@@ -90,7 +92,9 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "metadata": {},
+   "metadata": {
+    "collapsed": true
+   },
    "outputs": [],
    "source": [
     "# Reset the environment\n",
@@ -163,21 +167,21 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.10"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
   }
  },
  "nbformat": 4,

diff --git a/python/PPO.ipynb b/python/PPO.ipynb
@@ -47,8 +47,8 @@
     "run_path = \"ppo\" # The sub-directory name for model and summary statistics\n",
     "load_model = False # Whether to load a saved model.\n",
     "train_model = True # Whether to train the model.\n",
-    "summary_freq = 1000 # Frequency at which to save training statistics.\n",
-    "save_freq = 20000 # Frequency at which to save model.\n",
+    "summary_freq = 10000 # Frequency at which to save training statistics.\n",
+    "save_freq = 50000 # Frequency at which to save model.\n",
     "env_name = \"simple\" # Name of the training environment file.\n",
     "\n",
     "### Algorithm-specific parameters for tuning\n",
@@ -182,21 +182,21 @@
  "metadata": {
   "anaconda-cloud": {},
   "kernelspec": {
-   "display_name": "Python 2",
+   "display_name": "Python 3",
    "language": "python",
-   "name": "python2"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.10"
+   "pygments_lexer": "ipython3",
+   "version": "3.6.2"
   }
  },
  "nbformat": 4,

diff --git a/python/README.md b/python/README.md
@@ -1,3 +1,5 @@
+![alt text](../images/banner.png "Unity ML - Agents")
+
 # Unity ML - Agents (Python API)
 
 ## Python Setup
@@ -45,7 +47,7 @@ Then navigate to `localhost:6006` to monitor progress with Tensorboard.
 
 To train using PPO without the notebook, run:
 
-`python3 ppo.py <env_name> --train-model`
+`python3 ppo.py <env_name> --train`
 
 For a list of additional hyperparameters, run:
 

diff --git a/python/ppo.py b/python/ppo.py
@@ -17,10 +17,10 @@
   --help                     Show this message.
   --max-step=<n>             Maximum number of steps to run environment [default: 5e6].
   --run-path=<path>          The sub-directory name for model and summary statistics [default: ppo].
-  --load-model               Whether to load the model or randomly initialize [default: False].
-  --train-model              Whether to train model, or only run inference [default: True].
-  --summary-freq=<n>         Frequency at which to save training statistics [default: 5000].
-  --save-freq=<n>            Frequency at which to save model [default: 20000].
+  --load                     Whether to load the model or randomly initialize [default: False].
+  --train                    Whether to train model, or only run inference [default: True].
+  --summary-freq=<n>         Frequency at which to save training statistics [default: 10000].
+  --save-freq=<n>            Frequency at which to save model [default: 50000].
   --gamma=<n>                Reward discount rate [default: 0.99].
   --lambd=<n>                Lambda parameter for GAE [default: 0.95].
   --time-horizon=<n>         How many steps to collect per agent before adding to buffer [default: 2048].
@@ -40,8 +40,8 @@
 max_steps = float(options['--max-step'])
 model_path = './models/{}'.format(str(options['--run-path']))
 summary_path = './summaries/{}'.format(str(options['--run-path']))
-load_model = options['--load-model']
-train_model = options['--train-model']
+load_model = options['--load']
+train_model = options['--train']
 summary_freq = int(options['--summary-freq'])
 save_freq = int(options['--save-freq'])
 env_name = options['<env>']
@@ -93,7 +93,7 @@
     summary_writer = tf.summary.FileWriter(summary_path)
     info = env.reset(train_mode=train_model)[brain_name]
     trainer = Trainer(ppo_model, sess, info, is_continuous, use_observations)
-    while steps <= max_steps:
+    while steps <= max_steps or not train_model:
         if env.global_done:
             info = env.reset(train_mode=train_model)[brain_name]
         # Decide and take an action

diff --git a/python/ppo/trainer.py b/python/ppo/trainer.py
@@ -165,7 +165,7 @@ def write_summary(self, summary_writer, steps):
         summary = tf.Summary()
         for key in self.stats:
             if len(self.stats[key]) > 0:
-                stat_mean = np.mean(self.stats[key])
+                stat_mean = float(np.mean(self.stats[key]))
                 summary.value.add(tag='Info/{}'.format(key), simple_value=stat_mean)
                 self.stats[key] = []
         summary_writer.add_summary(summary, steps)

diff --git a/python/setup.py b/python/setup.py
@@ -11,7 +11,7 @@
       description='Unity Machine Learning Agents',
       license='Apache License 2.0',
       author='Unity Technologies',
-      author_email='ml@unity3d.com',
+      author_email='ML-Agents@unity3d.com',
       url='https://github.com/Unity-Technologies/ml-agents',
       packages=find_packages(exclude = ['ppo']),
       install_requires = required,

diff --git a/python/unityagents/__init__.py b/python/unityagents/__init__.py
@@ -1 +1,3 @@
 from .environment import *
+from .brain import *
+from .exception import *
diff --git a/python/unityagents/environment.py b/python/unityagents/environment.py
@@ -9,8 +9,8 @@
 import subprocess
 import signal
 
-from .brain import *
-from .exception import *
+from .brain import BrainInfo, BrainParameters
+from .exception import UnityEnvironmentException, UnityActionException
 
 from PIL import Image
 from sys import platform

diff --git a/unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAcademy.cs b/unity-environment/Assets/ML-Agents/Examples/3DBall/Scripts/Ball3DAcademy.cs
@@ -2,20 +2,15 @@
 using System.Collections.Generic;
 using UnityEngine;
 
-public class Ball3DAcademy : Academy {
-
-
-
-	public override void AcademyReset()
-	{
-
-
+public class Ball3DAcademy : Academy
+{
+    public override void AcademyReset()
+    {
 
-	}
+    }
 
     public override void AcademyStep()
     {
 
     }
-
 }