Showing preview only (957K chars total). Download the full file or copy to clipboard to get everything.
Repository: Observerspy/CS294
Branch: master
Commit: 849aa311e276
Files: 224
Total size: 77.1 MB
Directory structure:
gitextract_ql1s9b61/
├── README.md
├── hw1/
│ ├── .idea/
│ │ ├── hw1.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── BehavioralCloning.py
│ ├── DAgger.py
│ ├── README.md
│ ├── data/
│ │ ├── Ant-v1.train.npz
│ │ ├── HalfCheetah-v1.train.npz
│ │ ├── Hopper-v1.train.npz
│ │ ├── Humanoid-v1.train.npz
│ │ ├── Reacher-v1.train.npz
│ │ └── Walker2d-v1.train.npz
│ ├── demo.bash
│ ├── experts/
│ │ ├── Ant-v1.pkl
│ │ ├── HalfCheetah-v1.pkl
│ │ ├── Hopper-v1.pkl
│ │ ├── Humanoid-v1.pkl
│ │ ├── Reacher-v1.pkl
│ │ └── Walker2d-v1.pkl
│ ├── load_policy.py
│ ├── log/
│ │ ├── Ant-v1_BC_30-01-2018_10-32-45/
│ │ │ └── log.txt
│ │ ├── Ant-v1_DA_30-01-2018_10-51-03/
│ │ │ └── log.txt
│ │ ├── HalfCheetah-v1_BC_30-01-2018_10-51-19/
│ │ │ └── log.txt
│ │ ├── HalfCheetah-v1_DA_30-01-2018_11-11-33/
│ │ │ └── log.txt
│ │ ├── Hopper-v1_BC_30-01-2018_10-55-39/
│ │ │ └── log.txt
│ │ ├── Hopper-v1_BCbig_30-01-2018_11-02-29/
│ │ │ └── log.txt
│ │ ├── Hopper-v1_DA_30-01-2018_11-24-58/
│ │ │ └── log.txt
│ │ ├── Humanoid-v1_BC_30-01-2018_10-28-53/
│ │ │ └── log.txt
│ │ ├── Humanoid-v1_DA_30-01-2018_10-31-26/
│ │ │ └── log.txt
│ │ ├── Reacher-v1_BC_30-01-2018_10-57-25/
│ │ │ └── log.txt
│ │ ├── Reacher-v1_DA_30-01-2018_11-27-44/
│ │ │ └── log.txt
│ │ ├── Walker2d-v1_BC_30-01-2018_10-58-02/
│ │ │ └── log.txt
│ │ └── Walker2d-v1_DA_30-01-2018_11-28-50/
│ │ └── log.txt
│ ├── logz.py
│ ├── plot.py
│ ├── run_expert.py
│ └── tf_util.py
├── hw2/
│ ├── .idea/
│ │ ├── hw2.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── data/
│ │ ├── HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ └── sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/
│ │ ├── 1/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── 11/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── 21/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── 31/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ └── 41/
│ │ ├── log.txt
│ │ ├── params.json
│ │ └── vars.pkl
│ ├── logz.py
│ ├── plot.py
│ └── train_pg.py
├── hw3/
│ ├── .idea/
│ │ ├── hw3.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── README
│ ├── atari_wrappers.py
│ ├── dqn.py
│ ├── dqn_utils.py
│ ├── log/
│ │ ├── _RAM_30-01-2018_15-20-56/
│ │ │ └── log.txt
│ │ ├── _RAM_30-01-2018_22-29-12/
│ │ │ └── log.txt
│ │ └── _RAM_31-01-2018_08-28-28/
│ │ └── log.txt
│ ├── logz.py
│ ├── plot.py
│ ├── run_dqn_atari.py
│ └── run_dqn_ram.py
└── hw4/
├── .idea/
│ ├── hw4.iml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── cheetah_env.py
├── controllers.py
├── cost_functions.py
├── data/
│ ├── mb_mpc_HalfCheetah-v1_28-01-2018_16-06-09/
│ │ └── log.txt
│ └── mb_mpc_HalfCheetah-v1_30-01-2018_09-57-32/
│ └── log.txt
├── dynamics.py
├── logz.py
├── main.py
└── plot.py
================================================
FILE CONTENTS
================================================
================================================
FILE: README.md
================================================
# CS294
homework for CS294 Fall 2017
================================================
FILE: hw1/.idea/hw1.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Nosetests" />
<option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module>
================================================
FILE: hw1/.idea/misc.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.2 (~/anaconda3/bin/python)" project-jdk-type="Python SDK" />
</project>
================================================
FILE: hw1/.idea/modules.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/hw1.iml" filepath="$PROJECT_DIR$/.idea/hw1.iml" />
</modules>
</component>
</project>
================================================
FILE: hw1/.idea/workspace.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="fc5b0bfb-f4a5-436c-bf6d-cd2f66c61056" name="Default" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="DAgger.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/DAgger.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="459">
<caret line="17" column="22" lean-forward="true" selection-start-line="17" selection-start-column="22" selection-end-line="17" selection-end-column="22" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="run_expert.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/run_expert.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="402">
<caret line="71" column="0" lean-forward="false" selection-start-line="71" selection-start-column="0" selection-end-line="75" selection-end-column="74" />
<folding>
<element signature="e#312#325#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>int64_list</find>
<find>value=</find>
</findStrings>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/2layersNN.py" />
<option value="$PROJECT_DIR$/BehavioralCloning.py" />
<option value="$PROJECT_DIR$/run_expert.py" />
<option value="$PROJECT_DIR$/DAgger.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="-10" />
<option name="width" value="1430" />
<option name="height" value="1070" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="Scope" />
<pane id="Scratches" />
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="hw1" type="b2602c69:ProjectViewProjectNode" />
<item name="hw1" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
</panes>
</component>
<component name="PropertiesComponent">
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/../hw2" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager" selected="Python.DAgger">
<configuration name="2layersNN" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="hw1" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/BehavioralCloning.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
</configuration>
<configuration name="DAgger" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="hw1" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/DAgger.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
</configuration>
<configuration name="run_expert" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="hw1" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/run_expert.py" />
<option name="PARAMETERS" value="experts/Hopper-v1.pkl Hopper-v1 --render --num_rollouts=10" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
</configuration>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="Python.run_expert" />
<item index="1" class="java.lang.String" itemvalue="Python.2layersNN" />
<item index="2" class="java.lang.String" itemvalue="Python.DAgger" />
</list>
<recent_temporary>
<list size="3">
<item index="0" class="java.lang.String" itemvalue="Python.DAgger" />
<item index="1" class="java.lang.String" itemvalue="Python.run_expert" />
<item index="2" class="java.lang.String" itemvalue="Python.2layersNN" />
</list>
</recent_temporary>
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="fc5b0bfb-f4a5-436c-bf6d-cd2f66c61056" name="Default" comment="" />
<created>1516085912078</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1516085912078</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="-10" y="0" width="1430" height="1070" extended-state="0" />
<editor active="true" />
<layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="10" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="true" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32916668" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="true" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="7" side_tool="false" content_ui="tabs" />
<window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32983193" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="8" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="3" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="9" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/run_expert.py</url>
<line>33</line>
<option name="timeStamp" value="24" />
</line-breakpoint>
</breakpoints>
<option name="time" value="35" />
</breakpoint-manager>
<watches-manager>
<configuration name="PythonConfigurationType">
<watch expression="expert_data['actions']" language="Python" />
<watch expression="str(expert_data['observations'][0])" language="Python" />
<watch expression="obs[None,:]" language="Python" />
<watch expression="shape(o)" language="Python" />
<watch expression="args.expert_policy_file" language="Python" />
</configuration>
</watches-manager>
</component>
<component name="editorHistoryManager">
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py-0.5.7.dist-info/INSTALLER">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py-0.5.7.dist-info/RECORD">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py-0.5.7.dist-info/top_level.txt">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py-0.5.7.dist-info/WHEEL">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py/util.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding>
<element signature="e#0#22#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py/config.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="567">
<caret line="30" column="0" lean-forward="true" selection-start-line="30" selection-start-column="0" selection-end-line="30" selection-end-column="0" />
<folding>
<element signature="e#0#24#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/mujoco_py/__init__.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="27">
<caret line="1" column="3" lean-forward="true" selection-start-line="1" selection-start-column="3" selection-end-line="1" selection-end-column="3" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/tf_util.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-12388">
<caret line="7" column="0" lean-forward="true" selection-start-line="7" selection-start-column="0" selection-end-line="7" selection-end-column="0" />
<folding>
<element signature="e#0#18#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/README.md">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="459">
<caret line="17" column="53" lean-forward="true" selection-start-line="17" selection-start-column="20" selection-end-line="17" selection-end-column="53" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/core/example/feature_pb2.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="165">
<caret line="68" column="24" lean-forward="false" selection-start-line="68" selection-start-column="18" selection-end-line="68" selection-end-column="24" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../drebin/data2TFrecords.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="94">
<caret line="8" column="0" lean-forward="false" selection-start-line="8" selection-start-column="0" selection-end-line="9" selection-end-column="56" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../drebin/data_process.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="877">
<caret line="37" column="16" lean-forward="true" selection-start-line="37" selection-start-column="16" selection-end-line="37" selection-end-column="16" />
<folding>
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../drebin/load.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="459">
<caret line="17" column="35" lean-forward="true" selection-start-line="17" selection-start-column="35" selection-end-line="17" selection-end-column="35" />
<folding>
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/numpy/lib/npyio.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="264">
<caret line="724" column="0" lean-forward="false" selection-start-line="724" selection-start-column="0" selection-end-line="724" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../KDD99/data_process.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="334">
<caret line="147" column="35" lean-forward="false" selection-start-line="147" selection-start-column="29" selection-end-line="147" selection-end-column="35" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/losses/util.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="237">
<caret line="92" column="24" lean-forward="false" selection-start-line="92" selection-start-column="24" selection-end-line="92" selection-end-column="24" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../drebin/2layersNN.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="297">
<caret line="308" column="18" lean-forward="false" selection-start-line="308" selection-start-column="8" selection-end-line="308" selection-end-column="18" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/losses/losses_impl.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="233">
<caret line="497" column="23" lean-forward="false" selection-start-line="497" selection-start-column="12" selection-end-line="497" selection-end-column="23" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/numpy/core/numeric.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="206">
<caret line="530" column="0" lean-forward="false" selection-start-line="530" selection-start-column="0" selection-end-line="530" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/argparse.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="206">
<caret line="1728" column="8" lean-forward="false" selection-start-line="1728" selection-start-column="8" selection-end-line="1728" selection-end-column="8" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../KDD99/2layersNN.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="310">
<caret line="196" column="0" lean-forward="false" selection-start-line="196" selection-start-column="0" selection-end-line="199" selection-end-column="75" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/../../../../KDD99/load_data.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="216">
<caret line="8" column="20" lean-forward="false" selection-start-line="8" selection-start-column="20" selection-end-line="8" selection-end-column="20" />
<folding>
<element signature="e#0#9#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/load_policy.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="324">
<caret line="12" column="0" lean-forward="true" selection-start-line="12" selection-start-column="0" selection-end-line="12" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/codecs.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="183">
<caret line="308" column="0" lean-forward="false" selection-start-line="308" selection-start-column="0" selection-end-line="308" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/client/session.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="507">
<caret line="1099" column="0" lean-forward="false" selection-start-line="1099" selection-start-column="0" selection-end-line="1099" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/.PyCharmCE2017.2/system/python_stubs/2065551341/numpy/core/multiarray.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="183">
<caret line="560" column="4" lean-forward="false" selection-start-line="560" selection-start-column="4" selection-end-line="560" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/BehavioralCloning.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="148">
<caret line="172" column="0" lean-forward="false" selection-start-line="172" selection-start-column="0" selection-end-line="172" selection-end-column="0" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/run_expert.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="402">
<caret line="71" column="0" lean-forward="false" selection-start-line="71" selection-start-column="0" selection-end-line="75" selection-end-column="74" />
<folding>
<element signature="e#312#325#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/DAgger.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="459">
<caret line="17" column="22" lean-forward="true" selection-start-line="17" selection-start-column="22" selection-end-line="17" selection-end-column="22" />
<folding>
<element signature="e#0#23#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
================================================
FILE: hw1/BehavioralCloning.py
================================================
import tensorflow as tf
import os
import numpy as np
import tqdm
import gym
import logz
import time
import math
class Config(object):
n_features = 11
n_classes = 3
dropout = 0.5
hidden_size_1 = 128
hidden_size_2 = 256
hidden_size_3 = 64
batch_size = 256
lr = 0.0005
itera = 20
train_itera = 20
envname = 'Hopper-v1'
max_steps = 1000
class NN(object):
def add_placeholders(self):
self.input_placeholder = tf.placeholder(tf.float32, shape=(None, Config.n_features), name="input")
self.labels_placeholder = tf.placeholder(tf.float32, shape=(None, Config.n_classes), name="label")
self.dropout_placeholder = tf.placeholder(tf.float32, name="drop")
self.is_training = tf.placeholder(tf.bool)
def create_feed_dict(self, inputs_batch, labels_batch=None, dropout=1, is_training=False):
if labels_batch is None:
feed_dict = {self.input_placeholder: inputs_batch,
self.dropout_placeholder: dropout, self.is_training: is_training}
else:
feed_dict = {self.input_placeholder: inputs_batch, self.labels_placeholder: labels_batch,
self.dropout_placeholder: dropout, self.is_training: is_training}
return feed_dict
def add_prediction_op(self):
self.global_step = tf.Variable(0)
with tf.name_scope('layer1'):
hidden1 = tf.contrib.layers.fully_connected(self.input_placeholder, num_outputs=Config.hidden_size_1,
activation_fn=tf.nn.relu)
with tf.name_scope('layer2'):
hidden2 = tf.contrib.layers.fully_connected(hidden1, num_outputs=Config.hidden_size_2,
activation_fn=tf.nn.relu)
with tf.name_scope('layer3'):
hidden3 = tf.contrib.layers.fully_connected(hidden2, num_outputs=Config.hidden_size_3,
activation_fn=tf.nn.relu)
# hidden3 = tf.nn.dropout(hidden3, self.dropout_placeholder)
with tf.name_scope('output'):
pred = tf.contrib.layers.fully_connected(hidden3, num_outputs=Config.n_classes,
activation_fn=None)
return pred
def add_loss_op(self, pred):
loss = tf.losses.mean_squared_error(predictions=pred, labels=self.labels_placeholder)
tf.summary.scalar('loss', loss)
return loss
def add_training_op(self, loss):
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
learning_rate = tf.train.exponential_decay(Config.lr, self.global_step, 1000, 0.8, staircase=True)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=self.global_step)
return train_op
def train_on_batch(self, sess, inputs_batch, labels_batch, merged, train_writer, i):
feed = self.create_feed_dict(inputs_batch, labels_batch, self.config.dropout, True)
rs, _, loss = sess.run([merged, self.train_op, self.loss], feed_dict=feed)
train_writer.add_summary(rs, i)
return loss
def __init__(self, config):
self.config = config
self.build()
def fit(self, sess, train_x, train_y):
loss = self.train_on_batch(sess, train_x, train_y)
def build(self):
with tf.name_scope('inputs'):
self.add_placeholders()
with tf.name_scope('predict'):
self.pred = self.add_prediction_op()
with tf.name_scope('loss'):
self.loss = self.add_loss_op(self.pred)
with tf.name_scope('train'):
self.train_op = self.add_training_op(self.loss)
def get_pred(self, sess, inputs_batch):
feed = self.create_feed_dict(inputs_batch, dropout=1, is_training=False)
p = sess.run(self.pred, feed_dict=feed)
return p
def load(path):
all = np.load(path)
X = all["arr_0"]
y = all["arr_1"]
y1 = y.reshape(y.shape[0], y.shape[2])
return X, y1
def main():
PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
train_path = os.path.join(PROJECT_ROOT, "data/"+Config.envname+".train.npz")
train_log_path = os.path.join(PROJECT_ROOT, "log/train/")
logz.configure_output_dir(os.path.join(PROJECT_ROOT, "log/"+Config.envname+"_BC_"+time.strftime("%d-%m-%Y_%H-%M-%S")))
X_train, y_train = load(train_path)#debug
print("train size :", X_train.shape, y_train.shape)
print("start training")
with tf.Graph().as_default():
config = Config()
nn = NN(config)
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=0.5)
#必须在session外面
shuffle_batch_x, shuffle_batch_y = tf.train.shuffle_batch(
[X_train, y_train], batch_size=Config.batch_size, capacity=10000,
min_after_dequeue=5000, enqueue_many=True)
with tf.Session() as session:
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(train_log_path, session.graph)
session.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(session, coord)
for j in tqdm.tqdm(range(Config.itera)):
i = 0
try:
for i in range(int(math.ceil(Config.train_itera * X_train.shape[0] / Config.batch_size))):
batch_x, batch_y = session.run([shuffle_batch_x, shuffle_batch_y])
loss = nn.train_on_batch(session, batch_x, batch_y, merged, train_writer, i)
i += 1
if i % 1000 == 0:
print("step:", i, "loss:", loss)
saver.save(session, os.path.join(PROJECT_ROOT, "model/model_ckpt"), global_step=i)
except tf.errors.OutOfRangeError:
print("")
finally:
coord.request_stop()
coord.join(threads)
env = gym.make(Config.envname)
rollouts = 20
returns = []
for _ in range(rollouts):
obs = env.reset()
done = False
totalr = 0.
steps = 0
while not done:
action = nn.get_pred(session, obs[None, :])
obs, r, done, _ = env.step(action)
totalr += r
steps += 1
# if args.render:
# env.render()
if steps >= Config.max_steps:
break
returns.append(totalr)
# print('results for ', Config.envname)
# print('returns', returns)
# print('mean return', np.mean(returns))
# print('std of return', np.std(returns))
logz.log_tabular('Iteration', j)
logz.log_tabular('AverageReturn', np.mean(returns))
logz.log_tabular('StdReturn', np.std(returns))
logz.dump_tabular()
if __name__ == '__main__':
main()
================================================
FILE: hw1/DAgger.py
================================================
import tensorflow as tf
import os
import numpy as np
import tqdm
import gym
import load_policy
import math
import logz
import time
class Config(object):
n_features = 17
n_classes = 6
dropout = 0.5
hidden_size_1 = 128
hidden_size_2 = 256
hidden_size_3 = 64
batch_size = 256
lr = 0.0005
itera = 20
train_itera = 20
envname = 'Walker2d-v1'
max_steps = 1000
class NN(object):
def add_placeholders(self):
self.input_placeholder = tf.placeholder(tf.float32, shape=(None, Config.n_features), name="input")
self.labels_placeholder = tf.placeholder(tf.float32, shape=(None, Config.n_classes), name="label")
self.dropout_placeholder = tf.placeholder(tf.float32, name="drop")
self.is_training = tf.placeholder(tf.bool)
def create_feed_dict(self, inputs_batch, labels_batch=None, dropout=1, is_training=False):
if labels_batch is None:
feed_dict = {self.input_placeholder: inputs_batch,
self.dropout_placeholder: dropout, self.is_training: is_training}
else:
feed_dict = {self.input_placeholder: inputs_batch, self.labels_placeholder: labels_batch,
self.dropout_placeholder: dropout, self.is_training: is_training}
return feed_dict
def add_prediction_op(self):
self.global_step = tf.Variable(0)
with tf.name_scope('layer1'):
hidden1 = tf.contrib.layers.fully_connected(self.input_placeholder, num_outputs=Config.hidden_size_1,
activation_fn=tf.nn.relu)
with tf.name_scope('layer2'):
hidden2 = tf.contrib.layers.fully_connected(hidden1, num_outputs=Config.hidden_size_2,
activation_fn=tf.nn.relu)
with tf.name_scope('layer3'):
hidden3 = tf.contrib.layers.fully_connected(hidden2, num_outputs=Config.hidden_size_3,
activation_fn=tf.nn.relu)
with tf.name_scope('output'):
pred = tf.contrib.layers.fully_connected(hidden3, num_outputs=Config.n_classes,
activation_fn=None)
return pred
def add_loss_op(self, pred):
loss = tf.losses.mean_squared_error(predictions=pred, labels=self.labels_placeholder)
tf.summary.scalar('loss', loss)
return loss
def add_training_op(self, loss):
extra_update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(extra_update_ops):
learning_rate = tf.train.exponential_decay(Config.lr, self.global_step, 1000, 0.8, staircase=True)
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss, global_step=self.global_step)
return train_op
def train_on_batch(self, sess, inputs_batch, labels_batch, merged, train_writer, i):
feed = self.create_feed_dict(inputs_batch, labels_batch, self.config.dropout, True)
rs, _, loss = sess.run([merged, self.train_op, self.loss], feed_dict=feed)
train_writer.add_summary(rs, i)
return loss
def __init__(self, config):
self.config = config
self.build()
def fit(self, sess, train_x, train_y):
loss = self.train_on_batch(sess, train_x, train_y)
def build(self):
with tf.name_scope('inputs'):
self.add_placeholders()
with tf.name_scope('predict'):
self.pred = self.add_prediction_op()
with tf.name_scope('loss'):
self.loss = self.add_loss_op(self.pred)
with tf.name_scope('train'):
self.train_op = self.add_training_op(self.loss)
def get_pred(self, sess, inputs_batch):
feed = self.create_feed_dict(inputs_batch, dropout=1, is_training=False)
p = sess.run(self.pred, feed_dict=feed)
return p
def load(path):
all = np.load(path)
X = all["arr_0"]
y = all["arr_1"]
y1 = y.reshape(y.shape[0], y.shape[2])
return X, y1
def run_env(env, nn,session):
obs = env.reset()
done = False
totalr = 0.
steps = 0
observations = []
while not done:
action = nn.get_pred(session, obs[None, :])
observations.append(obs)
obs, r, done, _ = env.step(action)
totalr += r
steps += 1
# if args.render:
# env.render()
if steps >= Config.max_steps:
break
return totalr, observations
def shuffle(X_train, y_train):
training_data = np.concatenate((X_train, y_train), axis=1)
np.random.shuffle(training_data)
X = training_data[:, :-Config.n_classes]
y = training_data[:, -Config.n_classes:]
return X, y
def main():
PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
train_path = os.path.join(PROJECT_ROOT, "data/"+Config.envname+".train.npz")
policy_path = os.path.join(PROJECT_ROOT, "experts/"+Config.envname+".pkl")
train_log_path = os.path.join(PROJECT_ROOT, "log/train/")
logz.configure_output_dir(os.path.join(PROJECT_ROOT, "log/"+Config.envname+"_DA_"+time.strftime("%d-%m-%Y_%H-%M-%S")))
X_train, y_train = load(train_path)#debug
print("train size :", X_train.shape, y_train.shape)
print("start training")
with tf.Graph().as_default():
config = Config()
nn = NN(config)
init = tf.global_variables_initializer()
saver = tf.train.Saver(max_to_keep=10, keep_checkpoint_every_n_hours=0.5)
print('loading and building expert policy')
policy_fn = load_policy.load_policy(policy_path)
print('loaded and built')
with tf.Session() as session:
merged = tf.summary.merge_all()
train_writer = tf.summary.FileWriter(train_log_path, session.graph)
session.run(init)
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(session, coord)
#iter
for j in tqdm.tqdm(range(Config.itera)):
#train
X_train, y_train = shuffle(X_train, y_train)
i = 0
try:
for i in range(int(math.ceil(Config.train_itera * X_train.shape[0] / Config.batch_size))):
offset = (i * Config.batch_size) % X_train.shape[0]
# shuffle
batch_x = X_train[offset:(offset + Config.batch_size), :]
batch_y = y_train[offset:(offset + Config.batch_size)]
loss = nn.train_on_batch(session, batch_x, batch_y, merged, train_writer, i)
i += 1
print("step:", i, "loss:", loss)
# saver.save(session, os.path.join(PROJECT_ROOT, "model/model_ckpt"), global_step=i)
except tf.errors.OutOfRangeError:
print("done")
finally:
coord.request_stop()
coord.join(threads)
#get new data and label
observations = []
actions = []
env = gym.make(Config.envname)
for _ in range(10):
_, o = run_env(env, nn, session)
observations.extend(o)
action = policy_fn(o)
actions.extend(action)
new_x = np.array(observations)
new_y = np.array(actions)
X_train = np.concatenate((X_train, new_x))
y_train = np.concatenate((y_train, new_y))
print("train size :", X_train.shape, y_train.shape)
#test
# print("iter:", j, " train finished")
# print(Config.envname + " start")
rollouts = 20
returns = []
for _ in range(rollouts):
totalr, _ = run_env(env, nn, session)
returns.append(totalr)
# print('results for ', Config.envname)
# print('returns', returns)
# print('mean return', np.mean(returns), 'std of return', np.std(returns))
# print('mean return', np.mean(returns))
print()
logz.log_tabular('Iteration', j)
logz.log_tabular('AverageReturn', np.mean(returns))
logz.log_tabular('StdReturn', np.std(returns))
logz.dump_tabular()
if __name__ == '__main__':
main()
================================================
FILE: hw1/README.md
================================================
# CS294-112 HW 1: Imitation Learning
Dependencies: TensorFlow, MuJoCo version 1.31, OpenAI Gym
**Note**: MuJoCo versions until 1.5 do not support NVMe disks therefore won't be compatible with recent Mac machines.
There is a request for OpenAI to support it that can be followed [here](https://github.com/openai/gym/issues/638).
The only file that you need to look at is `run_expert.py`, which is code to load up an expert policy, run a specified number of roll-outs, and save out data.
In `experts/`, the provided expert policies are:
* Ant-v1.pkl
* HalfCheetah-v1.pkl
* Hopper-v1.pkl
* Humanoid-v1.pkl
* Reacher-v1.pkl
* Walker2d-v1.pkl
The name of the pickle file corresponds to the name of the gym environment.
================================================
FILE: hw1/data/Ant-v1.train.npz
================================================
[File too large to display: 17.5 MB]
================================================
FILE: hw1/data/Humanoid-v1.train.npz
================================================
[File too large to display: 58.7 MB]
================================================
FILE: hw1/demo.bash
================================================
#!/bin/bash
set -eux
for e in Hopper-v1 Ant-v1 HalfCheetah-v1 Humanoid-v1 Reacher-v1 Walker2d-v1
do
python run_expert.py experts/$e.pkl $e --render --num_rollouts=1
done
================================================
FILE: hw1/load_policy.py
================================================
import pickle, tensorflow as tf, tf_util, numpy as np
def load_policy(filename):
with open(filename, 'rb') as f:
data = pickle.loads(f.read())
# assert len(data.keys()) == 2
nonlin_type = data['nonlin_type']
policy_type = [k for k in data.keys() if k != 'nonlin_type'][0]
assert policy_type == 'GaussianPolicy', 'Policy type {} not supported'.format(policy_type)
policy_params = data[policy_type]
assert set(policy_params.keys()) == {'logstdevs_1_Da', 'hidden', 'obsnorm', 'out'}
# Keep track of input and output dims (i.e. observation and action dims) for the user
def build_policy(obs_bo):
def read_layer(l):
assert list(l.keys()) == ['AffineLayer']
assert sorted(l['AffineLayer'].keys()) == ['W', 'b']
return l['AffineLayer']['W'].astype(np.float32), l['AffineLayer']['b'].astype(np.float32)
def apply_nonlin(x):
if nonlin_type == 'lrelu':
return tf_util.lrelu(x, leak=.01) # openai/imitation nn.py:233
elif nonlin_type == 'tanh':
return tf.tanh(x)
else:
raise NotImplementedError(nonlin_type)
# Build the policy. First, observation normalization.
assert list(policy_params['obsnorm'].keys()) == ['Standardizer']
obsnorm_mean = policy_params['obsnorm']['Standardizer']['mean_1_D']
obsnorm_meansq = policy_params['obsnorm']['Standardizer']['meansq_1_D']
obsnorm_stdev = np.sqrt(np.maximum(0, obsnorm_meansq - np.square(obsnorm_mean)))
print('obs', obsnorm_mean.shape, obsnorm_stdev.shape)
normedobs_bo = (obs_bo - obsnorm_mean) / (obsnorm_stdev + 1e-6) # 1e-6 constant from Standardizer class in nn.py:409 in openai/imitation
curr_activations_bd = normedobs_bo
# Hidden layers next
assert list(policy_params['hidden'].keys()) == ['FeedforwardNet']
layer_params = policy_params['hidden']['FeedforwardNet']
for layer_name in sorted(layer_params.keys()):
l = layer_params[layer_name]
W, b = read_layer(l)
curr_activations_bd = apply_nonlin(tf.matmul(curr_activations_bd, W) + b)
# Output layer
W, b = read_layer(policy_params['out'])
output_bo = tf.matmul(curr_activations_bd, W) + b
return output_bo
obs_bo = tf.placeholder(tf.float32, [None, None])
a_ba = build_policy(obs_bo)
policy_fn = tf_util.function([obs_bo], a_ba)
return policy_fn
================================================
FILE: hw1/log/Ant-v1_BC_30-01-2018_10-32-45/log.txt
================================================
Iteration AverageReturn StdReturn
0 4460.34148867 504.993919545
1 4361.62089192 787.910341533
2 4238.35328327 1021.28077134
3 4551.3024466 220.243030137
4 4587.54091281 168.335418179
5 4536.1299135 153.39311344
6 4571.65449705 123.782802078
7 4468.68140776 489.42171743
8 4363.04400464 670.930898282
9 4477.84996454 448.159752304
10 4428.80299023 509.894288663
11 4219.75072886 982.726078559
12 4315.68265715 819.480876493
13 4549.6823578 116.079270449
14 4108.95615517 1174.93913987
15 4215.44535307 936.071898708
16 4431.84062818 476.960091374
17 4586.95139781 110.834919291
18 4431.21646369 610.424550643
19 4520.70269831 347.196475846
================================================
FILE: hw1/log/Ant-v1_DA_30-01-2018_10-51-03/log.txt
================================================
Iteration AverageReturn StdReturn
0 4543.92556297 163.700815617
1 4532.86754019 651.078836035
2 4718.40341227 334.079528476
3 4804.59842376 93.0563181661
4 4785.65700683 127.465976282
5 4831.53888416 93.6407853724
6 4767.63356673 112.208596337
7 4801.27090284 91.1866352943
8 4601.36647927 951.83089995
9 4787.38593852 275.842000254
10 4664.15443565 710.401178585
11 4804.25359337 113.174008233
12 4806.84341705 88.881374471
13 4772.95084954 103.853744593
14 4646.54370371 603.505425269
15 4604.3418062 770.681770359
16 4809.23885621 100.484625256
17 4703.23717096 418.746724941
18 4790.78266831 113.777606254
19 4783.07195431 108.151801332
================================================
FILE: hw1/log/HalfCheetah-v1_BC_30-01-2018_10-51-19/log.txt
================================================
Iteration AverageReturn StdReturn
0 3817.96495454 121.424448089
1 3936.04442016 149.995208225
2 3852.30571246 120.093945537
3 3906.3257817 137.039474081
4 3927.96557883 92.3172335412
5 3905.73231232 132.250735059
6 3894.78388609 104.606664587
7 3927.04549651 108.76612839
8 3891.86357872 120.74589099
9 3857.56540305 135.371682439
10 3876.91409242 118.576842405
11 3907.41133635 109.120138546
12 3877.17466 141.174229105
13 3875.88491827 149.663161746
14 3893.42180931 125.035365858
15 3927.75757089 114.108252154
16 3901.79379918 131.083160122
17 3884.49668982 145.973126754
18 3926.50703736 107.673754351
19 3902.56928807 121.983220478
================================================
FILE: hw1/log/HalfCheetah-v1_DA_30-01-2018_11-11-33/log.txt
================================================
Iteration AverageReturn StdReturn
0 3976.17700506 114.034047864
1 4080.46221353 84.1893455539
2 4133.95250959 81.0167961747
3 4101.48136021 94.9826005054
4 4135.3255688 80.7781973991
5 4140.00386975 97.4145974268
6 4127.59048502 98.0957605992
7 4136.09651306 87.8447357049
8 4168.61448977 58.1743328113
9 4175.37724354 76.5500116081
10 4152.01156376 100.515577257
11 4121.61130162 86.6234243495
12 4134.83157964 109.120217262
13 4128.08832282 70.0049383219
14 4142.06574835 99.4376440103
15 4145.22541853 68.6286481839
16 4164.73279931 89.1040051378
17 4096.95377812 90.370097307
18 4148.86261807 88.9318617818
19 4160.6740329 50.5316630273
================================================
FILE: hw1/log/Hopper-v1_BC_30-01-2018_10-55-39/log.txt
================================================
Iteration AverageReturn StdReturn
0 696.648776946 27.07925021
1 748.47888752 151.429815664
2 700.073941219 72.9177435482
3 731.887652211 113.92031914
4 715.92036383 112.632748448
5 702.746427086 72.807579689
6 737.028510731 118.896270982
7 753.175757248 123.58988432
8 728.528427466 88.1725040827
9 767.419942833 135.862982943
10 704.908491269 37.7345201986
11 820.057900984 162.74570912
12 722.067580433 95.4241626287
13 710.949850348 96.7033252778
14 721.164026913 74.5446602988
15 748.766636126 126.705015019
16 732.385777969 99.6523391183
17 732.09846089 121.161806134
18 754.424397187 137.548581585
19 718.219629462 97.6934148631
================================================
FILE: hw1/log/Hopper-v1_BCbig_30-01-2018_11-02-29/log.txt
================================================
Iteration AverageReturn StdReturn
0 1138.58195909 332.371517359
1 896.423291123 254.391048933
2 1151.49985307 695.596874896
3 933.454536471 275.828983102
4 1060.26470964 711.936395806
5 943.419330413 322.135059245
6 914.833224501 272.414134066
7 999.866949869 522.351021601
8 1061.79869625 347.475856152
9 1000.63941619 386.03117231
10 975.857956381 353.259897077
11 862.772266804 337.564889514
12 997.908818987 339.37936976
13 1161.14782133 389.451785869
14 994.682731442 313.7296743
15 1108.21639631 427.538386532
16 1014.65476481 321.52762664
17 1051.94580151 372.616078853
18 1072.21491361 383.494569749
19 1168.59860878 531.649394659
================================================
FILE: hw1/log/Hopper-v1_DA_30-01-2018_11-24-58/log.txt
================================================
Iteration AverageReturn StdReturn
0 1054.7477548 44.5027933049
1 2294.09469315 467.288023567
2 3707.44200499 260.030021057
3 3172.08454655 856.691867273
4 3778.9154217 4.68171457329
5 3775.94642069 2.83087791895
6 3776.23605724 3.29135654141
7 3776.0847846 4.88839549559
8 3775.58082393 3.35472305027
9 3776.7114718 3.99064616387
10 3775.31472267 2.87022549201
11 3776.24774355 2.89276807641
12 3775.22554377 3.05415386535
13 3775.01634936 4.56489670726
14 3777.48680881 3.09514046612
15 3776.23465168 4.09691179256
16 3777.64782077 4.50033528355
17 3775.7392206 3.79610240281
18 3777.38831693 3.00064812486
19 3775.72011362 4.24151812416
================================================
FILE: hw1/log/Humanoid-v1_BC_30-01-2018_10-28-53/log.txt
================================================
Iteration AverageReturn StdReturn
0 295.814730365 88.5813632879
1 238.349607493 76.3277190139
2 243.129245237 81.1495698412
3 259.916241087 57.306794984
4 247.425765644 56.0124802139
5 241.914222088 46.0838442162
6 253.931450644 63.2003431654
7 251.555778685 78.5263630011
8 268.208717827 64.7656412232
9 247.556079083 72.1923660942
10 251.774811937 69.0744558299
11 254.696568892 45.9480436897
12 253.242338361 76.8047371933
13 228.623935494 59.7224981115
14 229.862209508 57.370454754
15 229.956419335 55.6306188663
16 234.963972145 54.0542848773
17 265.384523529 68.401751621
18 254.279404448 82.3064869712
19 232.650468408 63.4344756441
================================================
FILE: hw1/log/Humanoid-v1_DA_30-01-2018_10-31-26/log.txt
================================================
Iteration AverageReturn StdReturn
0 277.763885265 74.5699695095
1 340.013799005 83.8174345466
2 281.151168831 58.8467520166
3 266.38751159 29.5122759836
4 273.078970953 22.275127559
5 245.575394751 18.4615901498
6 328.153923421 40.2691355568
7 388.722635173 78.4953628383
8 398.844472211 117.74492814
9 474.793752092 205.53054157
10 423.280078799 99.1227466344
11 500.104497995 187.592754877
12 454.008649777 133.582121407
13 508.123738555 171.632223784
14 464.06309332 136.519517931
15 505.570051114 158.128001299
16 537.379418303 162.645475863
17 496.810935012 105.635839043
18 539.956389194 170.934547794
19 500.060948891 133.550493957
================================================
FILE: hw1/log/Reacher-v1_BC_30-01-2018_10-57-25/log.txt
================================================
Iteration AverageReturn StdReturn
0 -9.14313732169 2.1057737525
1 -10.2464604937 2.91303780186
2 -9.36505170342 2.89134055057
3 -9.6073462012 2.38382175938
4 -9.0086068586 2.22443507996
5 -9.14235838387 1.41554386753
6 -9.32533608551 1.92161010184
7 -9.52716548162 2.15173210654
8 -9.18721318721 2.43739650362
9 -8.76734130122 1.87788078057
10 -9.31907915487 2.54378382632
11 -8.70922062576 1.71550860527
12 -9.42346594896 2.18220439629
13 -8.48235034653 1.9664693231
14 -9.52440599013 2.67361159834
15 -10.0461903527 2.14692809316
16 -10.4576060332 2.28085450116
17 -9.40975591137 2.4899395935
18 -9.7550098025 2.27262128127
19 -8.75621744838 1.7181797476
================================================
FILE: hw1/log/Reacher-v1_DA_30-01-2018_11-27-44/log.txt
================================================
Iteration AverageReturn StdReturn
0 -9.514077968 4.67060656967
1 -6.8483366701 2.22806332751
2 -7.28919411373 3.00020247685
3 -5.14896746284 1.78107674457
4 -4.61655540225 1.47905228634
5 -4.17923458664 1.21148217865
6 -4.23271202296 1.57480039431
7 -4.30797880136 1.56205175651
8 -3.86158286292 1.32237728902
9 -4.11994541821 1.39324099865
10 -4.33389428086 1.80342014308
11 -4.66312498685 1.19078597682
12 -3.88081879349 1.56953718566
13 -4.16823813172 1.25349473697
14 -3.78993609335 1.38466284504
15 -4.19153633158 1.7151942544
16 -3.35060677979 1.29864476833
17 -4.76815479509 1.75597118833
18 -4.48480997587 1.46280164822
19 -3.52572072065 1.45917975484
================================================
FILE: hw1/log/Walker2d-v1_BC_30-01-2018_10-58-02/log.txt
================================================
Iteration AverageReturn StdReturn
0 731.896679135 356.841079416
1 245.597115443 370.464617925
2 517.360851842 524.222617
3 375.242482521 396.621897565
4 586.719647307 560.506067304
5 452.02708465 572.73838478
6 315.448568014 432.609821525
7 605.402818104 488.550645007
8 399.929967675 379.255007212
9 662.501627219 561.574809086
10 464.163909044 642.583052804
11 572.401418705 554.032101995
12 446.562993296 583.800058368
13 408.118004614 472.033537121
14 341.961541246 468.061476134
15 537.260068744 668.650000523
16 455.874748994 496.489902983
17 403.340101964 466.662009166
18 391.087910426 636.457336138
19 596.027603618 501.401229289
================================================
FILE: hw1/log/Walker2d-v1_DA_30-01-2018_11-28-50/log.txt
================================================
Iteration AverageReturn StdReturn
0 672.000153601 623.031121462
1 1867.64727184 877.010515332
2 5434.69323044 45.0795878762
3 5421.01186299 47.2655173625
4 5454.79401462 90.7731058749
5 5453.22811906 123.152769513
6 5460.86284453 63.7242425634
7 5473.87194235 65.0663030077
8 5469.63486304 67.2099162387
9 5484.99459822 44.1331706997
10 5282.960857 866.813632308
11 5460.96627823 80.2270775643
12 5469.98407369 63.2927678597
13 5472.67758044 53.748518416
14 5283.16625756 864.037643147
15 5480.75283479 61.9244106372
16 5388.20256771 512.71497588
17 5085.95568353 984.286351731
18 5496.42654867 51.7899718995
19 5285.86858374 857.049898125
================================================
FILE: hw1/logz.py
================================================
import json
"""
Some simple logging functionality, inspired by rllab's logging.
Assumes that each diagnostic gets logged each iteration
Call logz.configure_output_dir() to start logging to a
tab-separated-values file (some_folder_name/log.txt)
To load the learning curves, you can do, for example
A = np.genfromtxt('/tmp/expt_1468984536/log.txt',delimiter='\t',dtype=None, names=True)
A['EpRewMean']
"""
import os.path as osp, shutil, time, atexit, os, subprocess
import pickle
import tensorflow as tf
color2num = dict(
gray=30,
red=31,
green=32,
yellow=33,
blue=34,
magenta=35,
cyan=36,
white=37,
crimson=38
)
def colorize(string, color, bold=False, highlight=False):
attr = []
num = color2num[color]
if highlight: num += 10
attr.append(str(num))
if bold: attr.append('1')
return '\x1b[%sm%s\x1b[0m' % (';'.join(attr), string)
class G:
output_dir = None
output_file = None
first_row = True
log_headers = []
log_current_row = {}
def configure_output_dir(d=None):
"""
Set output directory to d, or to /tmp/somerandomnumber if d is None
"""
G.output_dir = d or "/tmp/experiments/%i"%int(time.time())
if osp.exists(G.output_dir):
print("Log dir %s already exists! Delete it first or use a different dir"%G.output_dir)
else:
os.makedirs(G.output_dir)
G.output_file = open(osp.join(G.output_dir, "log.txt"), 'w')
atexit.register(G.output_file.close)
print(colorize("Logging data to %s"%G.output_file.name, 'green', bold=True))
def log_tabular(key, val):
"""
Log a value of some diagnostic
Call this once for each diagnostic quantity, each iteration
"""
if G.first_row:
G.log_headers.append(key)
else:
assert key in G.log_headers, "Trying to introduce a new key %s that you didn't include in the first iteration"%key
assert key not in G.log_current_row, "You already set %s this iteration. Maybe you forgot to call dump_tabular()"%key
G.log_current_row[key] = val
def save_params(params):
with open(osp.join(G.output_dir, "params.json"), 'w') as out:
out.write(json.dumps(params, separators=(',\n','\t:\t'), sort_keys=True))
def pickle_tf_vars():
"""
Saves tensorflow variables
Requires them to be initialized first, also a default session must exist
"""
_dict = {v.name : v.eval() for v in tf.global_variables()}
with open(osp.join(G.output_dir, "vars.pkl"), 'wb') as f:
pickle.dump(_dict, f)
def dump_tabular():
"""
Write all of the diagnostics from the current iteration
"""
vals = []
key_lens = [len(key) for key in G.log_headers]
max_key_len = max(15,max(key_lens))
keystr = '%'+'%d'%max_key_len
fmt = "| " + keystr + "s | %15s |"
n_slashes = 22 + max_key_len
print("-"*n_slashes)
for key in G.log_headers:
val = G.log_current_row.get(key, "")
if hasattr(val, "__float__"): valstr = "%8.3g"%val
else: valstr = val
print(fmt%(key, valstr))
vals.append(val)
print("-"*n_slashes)
if G.output_file is not None:
if G.first_row:
G.output_file.write("\t".join(G.log_headers))
G.output_file.write("\n")
G.output_file.write("\t".join(map(str,vals)))
G.output_file.write("\n")
G.output_file.flush()
G.log_current_row.clear()
G.first_row=False
================================================
FILE: hw1/plot.py
================================================
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import json
import os
"""
Using the plotter:
Call it from the command line, and supply it with logdirs to experiments.
Suppose you ran an experiment with name 'test', and you ran 'test' for 10
random seeds. The runner code stored it in the directory structure
data
L test_EnvName_DateTime
L 0
L log.txt
L params.json
L 1
L log.txt
L params.json
.
.
.
L 9
L log.txt
L params.json
To plot learning curves from the experiment, averaged over all random
seeds, call
python plot.py data/test_EnvName_DateTime --value AverageReturn
and voila. To see a different statistics, change what you put in for
the keyword --value. You can also enter /multiple/ values, and it will
make all of them in order.
Suppose you ran two experiments: 'test1' and 'test2'. In 'test2' you tried
a different set of hyperparameters from 'test1', and now you would like
to compare them -- see their learning curves side-by-side. Just call
python plot.py data/test1 data/test2
and it will plot them both! They will be given titles in the legend according
to their exp_name parameters. If you want to use custom legend titles, use
the --legend flag and then provide a title for each logdir.
"""
def plot_data(data, value="AverageReturn"):
if isinstance(data, list):
data = pd.concat(data, ignore_index=True)
sns.set(style="darkgrid", font_scale=1.5)
sns.tsplot(data=data, time="Iteration", value=value, unit="Unit", condition="Condition")
plt.legend(loc='best').draggable()
plt.show()
def get_datasets(fpath, condition=None):
unit = 0
flag = ""
datasets = []
for root, dir, files in os.walk(fpath):
if 'log.txt' in files:
# param_path = open(os.path.join(root,'params.json'))
# params = json.load(param_path)
# exp_name = params['exp_name']
paths = fpath.split("_")
exp_name = paths[0].split("/")[1]+"_"+paths[1]
log_path = os.path.join(root,'log.txt')
experiment_data = pd.read_table(log_path)
experiment_data.insert(
len(experiment_data.columns),
'Unit',
unit
)
experiment_data.insert(
len(experiment_data.columns),
'Condition',
condition or exp_name
)
datasets.append(experiment_data)
unit += 1
return datasets
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('logdir', nargs='*')
parser.add_argument('--legend', nargs='*')
parser.add_argument('--value', default='AverageReturn', nargs='*')
args = parser.parse_args()
use_legend = False
if args.legend is not None:
assert len(args.legend) == len(args.logdir), \
"Must give a legend title for each set of experiments."
use_legend = True
data = []
if use_legend:
for logdir, legend_title in zip(args.logdir, args.legend):
data += get_datasets(logdir, legend_title)
else:
for logdir in args.logdir:
data += get_datasets(logdir)
if isinstance(args.value, list):
values = args.value
else:
values = [args.value]
for value in values:
plot_data(data, value=value)
if __name__ == "__main__":
main()
================================================
FILE: hw1/run_expert.py
================================================
#!/usr/bin/env python
"""
Code to load an expert policy and generate roll-out data for behavioral cloning.
Example usage:
python run_expert.py experts/Humanoid-v1.pkl Humanoid-v1 --render \
--num_rollouts 20
Author of this script and included expert policies: Jonathan Ho (hoj@openai.com)
"""
import pickle
import tensorflow as tf
import numpy as np
import tf_util
import gym
import load_policy
import os
def main():
import argparse
parser = argparse.ArgumentParser()
parser.add_argument('expert_policy_file', type=str)
parser.add_argument('envname', type=str)
parser.add_argument('--render', action='store_true')
parser.add_argument("--max_timesteps", type=int)
parser.add_argument('--num_rollouts', type=int, default=20,
help='Number of expert roll outs')
args = parser.parse_args()
print('loading and building expert policy')
policy_fn = load_policy.load_policy(args.expert_policy_file)
print('loaded and built')
with tf.Session():
tf_util.initialize()
env = gym.make(args.envname)
max_steps = args.max_timesteps or env.spec.timestep_limit
print('max_steps:', max_steps)
returns = []
observations = []
actions = []
for i in range(args.num_rollouts):
print('iter', i)
obs = env.reset()
done = False
totalr = 0.
steps = 0
while not done:
action = policy_fn(obs[None,:])
observations.append(obs)
actions.append(action)
obs, r, done, _ = env.step(action)
totalr += r
steps += 1
# if args.render:
# env.render()
if steps % 100 == 0: print("%i/%i"%(steps, max_steps))
if steps >= max_steps:
break
returns.append(totalr)
print('returns', returns)
print('mean return', np.mean(returns))
print('std of return', np.std(returns))
expert_data = {'observations': np.array(observations), #(1000, 11)
'actions': np.array(actions)} #(1000, 1, 3)
#write
PROJECT_ROOT = os.path.dirname(os.path.realpath(__file__))
save_dir = os.path.join(PROJECT_ROOT, "data/")
out = os.path.join(save_dir, args.envname+'.train')
np.savez(out, expert_data['observations'], expert_data['actions'])
print("finished")
if __name__ == '__main__':
main()
================================================
FILE: hw1/tf_util.py
================================================
import numpy as np
import tensorflow as tf # pylint: ignore-module
#import builtins
import functools
import copy
import os
import collections
# ================================================================
# Import all names into common namespace
# ================================================================
clip = tf.clip_by_value
# Make consistent with numpy
# ----------------------------------------
def sum(x, axis=None, keepdims=False):
return tf.reduce_sum(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims)
def mean(x, axis=None, keepdims=False):
return tf.reduce_mean(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims)
def var(x, axis=None, keepdims=False):
meanx = mean(x, axis=axis, keepdims=keepdims)
return mean(tf.square(x - meanx), axis=axis, keepdims=keepdims)
def std(x, axis=None, keepdims=False):
return tf.sqrt(var(x, axis=axis, keepdims=keepdims))
def max(x, axis=None, keepdims=False):
return tf.reduce_max(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims)
def min(x, axis=None, keepdims=False):
return tf.reduce_min(x, reduction_indices=None if axis is None else [axis], keep_dims = keepdims)
def concatenate(arrs, axis=0):
return tf.concat(axis, arrs)
def argmax(x, axis=None):
return tf.argmax(x, dimension=axis)
def switch(condition, then_expression, else_expression):
'''Switches between two operations depending on a scalar value (int or bool).
Note that both `then_expression` and `else_expression`
should be symbolic tensors of the *same shape*.
# Arguments
condition: scalar tensor.
then_expression: TensorFlow operation.
else_expression: TensorFlow operation.
'''
x_shape = copy.copy(then_expression.get_shape())
x = tf.cond(tf.cast(condition, 'bool'),
lambda: then_expression,
lambda: else_expression)
x.set_shape(x_shape)
return x
# Extras
# ----------------------------------------
def l2loss(params):
if len(params) == 0:
return tf.constant(0.0)
else:
return tf.add_n([sum(tf.square(p)) for p in params])
def lrelu(x, leak=0.2):
f1 = 0.5 * (1 + leak)
f2 = 0.5 * (1 - leak)
return f1 * x + f2 * abs(x)
def categorical_sample_logits(X):
# https://github.com/tensorflow/tensorflow/issues/456
U = tf.random_uniform(tf.shape(X))
return argmax(X - tf.log(-tf.log(U)), axis=1)
# ================================================================
# Global session
# ================================================================
def get_session():
return tf.get_default_session()
def single_threaded_session():
tf_config = tf.ConfigProto(
inter_op_parallelism_threads=1,
intra_op_parallelism_threads=1)
return tf.Session(config=tf_config)
def make_session(num_cpu):
tf_config = tf.ConfigProto(
inter_op_parallelism_threads=num_cpu,
intra_op_parallelism_threads=num_cpu)
return tf.Session(config=tf_config)
ALREADY_INITIALIZED = set()
def initialize():
new_variables = set(tf.all_variables()) - ALREADY_INITIALIZED
get_session().run(tf.initialize_variables(new_variables))
ALREADY_INITIALIZED.update(new_variables)
def eval(expr, feed_dict=None):
if feed_dict is None: feed_dict = {}
return get_session().run(expr, feed_dict=feed_dict)
def set_value(v, val):
get_session().run(v.assign(val))
def load_state(fname):
saver = tf.train.Saver()
saver.restore(get_session(), fname)
def save_state(fname):
os.makedirs(os.path.dirname(fname), exist_ok=True)
saver = tf.train.Saver()
saver.save(get_session(), fname)
# ================================================================
# Model components
# ================================================================
def normc_initializer(std=1.0):
def _initializer(shape, dtype=None, partition_info=None): #pylint: disable=W0613
out = np.random.randn(*shape).astype(np.float32)
out *= std / np.sqrt(np.square(out).sum(axis=0, keepdims=True))
return tf.constant(out)
return _initializer
def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad="SAME", dtype=tf.float32, collections=None,
summary_tag=None):
with tf.variable_scope(name):
stride_shape = [1, stride[0], stride[1], 1]
filter_shape = [filter_size[0], filter_size[1], int(x.get_shape()[3]), num_filters]
# there are "num input feature maps * filter height * filter width"
# inputs to each hidden unit
fan_in = intprod(filter_shape[:3])
# each unit in the lower layer receives a gradient from:
# "num output feature maps * filter height * filter width" /
# pooling size
fan_out = intprod(filter_shape[:2]) * num_filters
# initialize weights with random weights
w_bound = np.sqrt(6. / (fan_in + fan_out))
w = tf.get_variable("W", filter_shape, dtype, tf.random_uniform_initializer(-w_bound, w_bound),
collections=collections)
b = tf.get_variable("b", [1, 1, 1, num_filters], initializer=tf.zeros_initializer,
collections=collections)
if summary_tag is not None:
tf.image_summary(summary_tag,
tf.transpose(tf.reshape(w, [filter_size[0], filter_size[1], -1, 1]),
[2, 0, 1, 3]),
max_images=10)
return tf.nn.conv2d(x, w, stride_shape, pad) + b
def dense(x, size, name, weight_init=None, bias=True):
w = tf.get_variable(name + "/w", [x.get_shape()[1], size], initializer=weight_init)
ret = tf.matmul(x, w)
if bias:
b = tf.get_variable(name + "/b", [size], initializer=tf.zeros_initializer)
return ret + b
else:
return ret
def wndense(x, size, name, init_scale=1.0):
v = tf.get_variable(name + "/V", [int(x.get_shape()[1]), size],
initializer=tf.random_normal_initializer(0, 0.05))
g = tf.get_variable(name + "/g", [size], initializer=tf.constant_initializer(init_scale))
b = tf.get_variable(name + "/b", [size], initializer=tf.constant_initializer(0.0))
# use weight normalization (Salimans & Kingma, 2016)
x = tf.matmul(x, v)
scaler = g / tf.sqrt(sum(tf.square(v), axis=0, keepdims=True))
return tf.reshape(scaler, [1, size]) * x + tf.reshape(b, [1, size])
def densenobias(x, size, name, weight_init=None):
return dense(x, size, name, weight_init=weight_init, bias=False)
def dropout(x, pkeep, phase=None, mask=None):
mask = tf.floor(pkeep + tf.random_uniform(tf.shape(x))) if mask is None else mask
if phase is None:
return mask * x
else:
return switch(phase, mask*x, pkeep*x)
def batchnorm(x, name, phase, updates, gamma=0.96):
k = x.get_shape()[1]
runningmean = tf.get_variable(name+"/mean", shape=[1, k], initializer=tf.constant_initializer(0.0), trainable=False)
runningvar = tf.get_variable(name+"/var", shape=[1, k], initializer=tf.constant_initializer(1e-4), trainable=False)
testy = (x - runningmean) / tf.sqrt(runningvar)
mean_ = mean(x, axis=0, keepdims=True)
var_ = mean(tf.square(x), axis=0, keepdims=True)
std = tf.sqrt(var_)
trainy = (x - mean_) / std
updates.extend([
tf.assign(runningmean, runningmean * gamma + mean_ * (1 - gamma)),
tf.assign(runningvar, runningvar * gamma + var_ * (1 - gamma))
])
y = switch(phase, trainy, testy)
out = y * tf.get_variable(name+"/scaling", shape=[1, k], initializer=tf.constant_initializer(1.0), trainable=True)\
+ tf.get_variable(name+"/translation", shape=[1,k], initializer=tf.constant_initializer(0.0), trainable=True)
return out
# ================================================================
# Basic Stuff
# ================================================================
def function(inputs, outputs, updates=None, givens=None):
if isinstance(outputs, list):
return _Function(inputs, outputs, updates, givens=givens)
elif isinstance(outputs, (dict, collections.OrderedDict)):
f = _Function(inputs, outputs.values(), updates, givens=givens)
return lambda *inputs : type(outputs)(zip(outputs.keys(), f(*inputs)))
else:
f = _Function(inputs, [outputs], updates, givens=givens)
return lambda *inputs : f(*inputs)[0]
class _Function(object):
def __init__(self, inputs, outputs, updates, givens, check_nan=False):
assert all(len(i.op.inputs)==0 for i in inputs), "inputs should all be placeholders"
self.inputs = inputs
updates = updates or []
self.update_group = tf.group(*updates)
self.outputs_update = list(outputs) + [self.update_group]
self.givens = {} if givens is None else givens
self.check_nan = check_nan
def __call__(self, *inputvals):
assert len(inputvals) == len(self.inputs)
feed_dict = dict(zip(self.inputs, inputvals))
feed_dict.update(self.givens)
results = get_session().run(self.outputs_update, feed_dict=feed_dict)[:-1]
if self.check_nan:
if any(np.isnan(r).any() for r in results):
raise RuntimeError("Nan detected")
return results
def mem_friendly_function(nondata_inputs, data_inputs, outputs, batch_size):
if isinstance(outputs, list):
return _MemFriendlyFunction(nondata_inputs, data_inputs, outputs, batch_size)
else:
f = _MemFriendlyFunction(nondata_inputs, data_inputs, [outputs], batch_size)
return lambda *inputs : f(*inputs)[0]
class _MemFriendlyFunction(object):
def __init__(self, nondata_inputs, data_inputs, outputs, batch_size):
self.nondata_inputs = nondata_inputs
self.data_inputs = data_inputs
self.outputs = list(outputs)
self.batch_size = batch_size
def __call__(self, *inputvals):
assert len(inputvals) == len(self.nondata_inputs) + len(self.data_inputs)
nondata_vals = inputvals[0:len(self.nondata_inputs)]
data_vals = inputvals[len(self.nondata_inputs):]
feed_dict = dict(zip(self.nondata_inputs, nondata_vals))
n = data_vals[0].shape[0]
for v in data_vals[1:]:
assert v.shape[0] == n
for i_start in range(0, n, self.batch_size):
slice_vals = [v[i_start:min(i_start+self.batch_size, n)] for v in data_vals]
for (var,val) in zip(self.data_inputs, slice_vals):
feed_dict[var]=val
results = tf.get_default_session().run(self.outputs, feed_dict=feed_dict)
if i_start==0:
sum_results = results
else:
for i in range(len(results)):
sum_results[i] = sum_results[i] + results[i]
for i in range(len(results)):
sum_results[i] = sum_results[i] / n
return sum_results
# ================================================================
# Modules
# ================================================================
class Module(object):
def __init__(self, name):
self.name = name
self.first_time = True
self.scope = None
self.cache = {}
def __call__(self, *args):
if args in self.cache:
print("(%s) retrieving value from cache"%self.name)
return self.cache[args]
with tf.variable_scope(self.name, reuse=not self.first_time):
scope = tf.get_variable_scope().name
if self.first_time:
self.scope = scope
print("(%s) running function for the first time"%self.name)
else:
assert self.scope == scope, "Tried calling function with a different scope"
print("(%s) running function on new inputs"%self.name)
self.first_time = False
out = self._call(*args)
self.cache[args] = out
return out
def _call(self, *args):
raise NotImplementedError
@property
def trainable_variables(self):
assert self.scope is not None, "need to call module once before getting variables"
return tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, self.scope)
@property
def variables(self):
assert self.scope is not None, "need to call module once before getting variables"
return tf.get_collection(tf.GraphKeys.VARIABLES, self.scope)
def module(name):
@functools.wraps
def wrapper(f):
class WrapperModule(Module):
def _call(self, *args):
return f(*args)
return WrapperModule(name)
return wrapper
# ================================================================
# Graph traversal
# ================================================================
VARIABLES = {}
def get_parents(node):
return node.op.inputs
def topsorted(outputs):
"""
Topological sort via non-recursive depth-first search
"""
assert isinstance(outputs, (list,tuple))
marks = {}
out = []
stack = [] #pylint: disable=W0621
# i: node
# jidx = number of children visited so far from that node
# marks: state of each node, which is one of
# 0: haven't visited
# 1: have visited, but not done visiting children
# 2: done visiting children
for x in outputs:
stack.append((x,0))
while stack:
(i,jidx) = stack.pop()
if jidx == 0:
m = marks.get(i,0)
if m == 0:
marks[i] = 1
elif m == 1:
raise ValueError("not a dag")
else:
continue
ps = get_parents(i)
if jidx == len(ps):
marks[i] = 2
out.append(i)
else:
stack.append((i,jidx+1))
j = ps[jidx]
stack.append((j,0))
return out
# ================================================================
# Flat vectors
# ================================================================
def var_shape(x):
out = [k.value for k in x.get_shape()]
assert all(isinstance(a, int) for a in out), \
"shape function assumes that shape is fully known"
return out
def numel(x):
return intprod(var_shape(x))
def intprod(x):
return int(np.prod(x))
def flatgrad(loss, var_list):
grads = tf.gradients(loss, var_list)
return tf.concat(0, [tf.reshape(grad, [numel(v)])
for (v, grad) in zip(var_list, grads)])
class SetFromFlat(object):
def __init__(self, var_list, dtype=tf.float32):
assigns = []
shapes = list(map(var_shape, var_list))
total_size = np.sum([intprod(shape) for shape in shapes])
self.theta = theta = tf.placeholder(dtype,[total_size])
start=0
assigns = []
for (shape,v) in zip(shapes,var_list):
size = intprod(shape)
assigns.append(tf.assign(v, tf.reshape(theta[start:start+size],shape)))
start+=size
self.op = tf.group(*assigns)
def __call__(self, theta):
get_session().run(self.op, feed_dict={self.theta:theta})
class GetFlat(object):
def __init__(self, var_list):
self.op = tf.concat(0, [tf.reshape(v, [numel(v)]) for v in var_list])
def __call__(self):
return get_session().run(self.op)
# ================================================================
# Misc
# ================================================================
def fancy_slice_2d(X, inds0, inds1):
"""
like numpy X[inds0, inds1]
XXX this implementation is bad
"""
inds0 = tf.cast(inds0, tf.int64)
inds1 = tf.cast(inds1, tf.int64)
shape = tf.cast(tf.shape(X), tf.int64)
ncols = shape[1]
Xflat = tf.reshape(X, [-1])
return tf.gather(Xflat, inds0 * ncols + inds1)
def scope_vars(scope, trainable_only):
"""
Get variables inside a scope
The scope can be specified as a string
"""
return tf.get_collection(
tf.GraphKeys.TRAINABLE_VARIABLES if trainable_only else tf.GraphKeys.VARIABLES,
scope=scope if isinstance(scope, str) else scope.name
)
def lengths_to_mask(lengths_b, max_length):
"""
Turns a vector of lengths into a boolean mask
Args:
lengths_b: an integer vector of lengths
max_length: maximum length to fill the mask
Returns:
a boolean array of shape (batch_size, max_length)
row[i] consists of True repeated lengths_b[i] times, followed by False
"""
lengths_b = tf.convert_to_tensor(lengths_b)
assert lengths_b.get_shape().ndims == 1
mask_bt = tf.expand_dims(tf.range(max_length), 0) < tf.expand_dims(lengths_b, 1)
return mask_bt
def in_session(f):
@functools.wraps(f)
def newfunc(*args, **kwargs):
with tf.Session():
f(*args, **kwargs)
return newfunc
_PLACEHOLDER_CACHE = {} # name -> (placeholder, dtype, shape)
def get_placeholder(name, dtype, shape):
print("calling get_placeholder", name)
if name in _PLACEHOLDER_CACHE:
out, dtype1, shape1 = _PLACEHOLDER_CACHE[name]
assert dtype1==dtype and shape1==shape
return out
else:
out = tf.placeholder(dtype=dtype, shape=shape, name=name)
_PLACEHOLDER_CACHE[name] = (out,dtype,shape)
return out
def get_placeholder_cached(name):
return _PLACEHOLDER_CACHE[name][0]
def flattenallbut0(x):
return tf.reshape(x, [-1, intprod(x.get_shape().as_list()[1:])])
def reset():
global _PLACEHOLDER_CACHE
global VARIABLES
_PLACEHOLDER_CACHE = {}
VARIABLES = {}
tf.reset_default_graph()
================================================
FILE: hw2/.idea/hw2.iml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<module type="PYTHON_MODULE" version="4">
<component name="NewModuleRootManager">
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="TestRunnerService">
<option name="projectConfiguration" value="Nosetests" />
<option name="PROJECT_TEST_RUNNER" value="Nosetests" />
</component>
</module>
================================================
FILE: hw2/.idea/misc.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.6.2 (~/anaconda3/bin/python)" project-jdk-type="Python SDK" />
</project>
================================================
FILE: hw2/.idea/modules.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/hw2.iml" filepath="$PROJECT_DIR$/.idea/hw2.iml" />
</modules>
</component>
</project>
================================================
FILE: hw2/.idea/workspace.xml
================================================
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ChangeListManager">
<list default="true" id="1f7ae058-573d-4474-ba07-210bddc97818" name="Default" comment="" />
<option name="EXCLUDED_CONVERTED_TO_IGNORED" value="true" />
<option name="TRACKING_ENABLED" value="true" />
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileEditorManager">
<leaf>
<file leaf-file-name="train_pg.py" pinned="false" current-in-tab="false">
<entry file="file://$PROJECT_DIR$/train_pg.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-1667">
<caret line="265" column="26" lean-forward="false" selection-start-line="265" selection-start-column="26" selection-end-line="265" selection-end-column="26" />
<folding>
<element signature="e#0#18#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
<file leaf-file-name="plot.py" pinned="false" current-in-tab="true">
<entry file="file://$PROJECT_DIR$/plot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="694">
<caret line="117" column="5" lean-forward="false" selection-start-line="117" selection-start-column="5" selection-end-line="117" selection-end-column="5" />
<folding>
<element signature="e#0#21#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</file>
</leaf>
</component>
<component name="FindInProjectRecents">
<findStrings>
<find>ob_no</find>
<find>loss</find>
<find>#bl2</find>
<find>build_mlp</find>
</findStrings>
</component>
<component name="IdeDocumentHistory">
<option name="CHANGED_PATHS">
<list>
<option value="$PROJECT_DIR$/train_pg.py" />
</list>
</option>
</component>
<component name="ProjectFrameBounds">
<option name="x" value="1" />
<option name="width" value="1505" />
<option name="height" value="1070" />
</component>
<component name="ProjectView">
<navigator currentView="ProjectPane" proportions="" version="1">
<flattenPackages />
<showMembers />
<showModules />
<showLibraryContents />
<hideEmptyPackages />
<abbreviatePackageNames />
<autoscrollToSource />
<autoscrollFromSource />
<sortByType />
<manualOrder />
<foldersAlwaysOnTop value="true" />
</navigator>
<panes>
<pane id="ProjectPane">
<subPane>
<expand>
<path>
<item name="hw2" type="b2602c69:ProjectViewProjectNode" />
<item name="hw2" type="462c0819:PsiDirectoryNode" />
</path>
</expand>
<select />
</subPane>
</pane>
<pane id="Scratches" />
<pane id="Scope" />
</panes>
</component>
<component name="PropertiesComponent">
<property name="settings.editor.selected.configurable" value="com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable" />
<property name="last_opened_file_path" value="$PROJECT_DIR$/../hw3" />
</component>
<component name="RunDashboard">
<option name="ruleStates">
<list>
<RuleState>
<option name="name" value="ConfigurationTypeDashboardGroupingRule" />
</RuleState>
<RuleState>
<option name="name" value="StatusDashboardGroupingRule" />
</RuleState>
</list>
</option>
</component>
<component name="RunManager" selected="Python.plot">
<configuration name="plot" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="hw2" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/plot.py" />
<option name="PARAMETERS" value="data/HalfCheetah_b50000_rtg_na_bl_l5_100_s128_HalfCheetah-v1_26-01-2018_08-32-35" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
</configuration>
<configuration name="train_pg" type="PythonConfigurationType" factoryName="Python" temporary="true">
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<module name="hw2" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/train_pg.py" />
<option name="PARAMETERS" value="HalfCheetah-v1 -ep 150 -lr 0.02 --discount 0.9 -rtg -e 5 -n 100 -b 50000 -l 5 -bl -s 128 --exp_name HalfCheetah_b50000_rtg_na_bl_l5_100_s128" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
</configuration>
<list size="2">
<item index="0" class="java.lang.String" itemvalue="Python.train_pg" />
<item index="1" class="java.lang.String" itemvalue="Python.plot" />
</list>
<recent_temporary>
<list size="2">
<item index="0" class="java.lang.String" itemvalue="Python.plot" />
<item index="1" class="java.lang.String" itemvalue="Python.train_pg" />
</list>
</recent_temporary>
</component>
<component name="ShelveChangesManager" show_recycled="false">
<option name="remove_strategy" value="false" />
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="Default task">
<changelist id="1f7ae058-573d-4474-ba07-210bddc97818" name="Default" comment="" />
<created>1516669850648</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1516669850648</updated>
</task>
<servers />
</component>
<component name="ToolWindowManager">
<frame x="1" y="0" width="1505" height="1070" extended-state="0" />
<layout>
<window_info id="Project" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="true" show_stripe_button="true" weight="0.23752496" sideWeight="0.5" order="0" side_tool="false" content_ui="combo" />
<window_info id="TODO" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="6" side_tool="false" content_ui="tabs" />
<window_info id="Event Log" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Run" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32604167" sideWeight="0.5" order="2" side_tool="false" content_ui="tabs" />
<window_info id="Version Control" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="false" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Python Console" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Structure" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Terminal" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Debug" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="3" side_tool="false" content_ui="tabs" />
<window_info id="Favorites" active="false" anchor="left" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="-1" side_tool="true" content_ui="tabs" />
<window_info id="Data View" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.32934132" sideWeight="0.5" order="-1" side_tool="false" content_ui="tabs" />
<window_info id="Cvs" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="4" side_tool="false" content_ui="tabs" />
<window_info id="Hierarchy" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="2" side_tool="false" content_ui="combo" />
<window_info id="Message" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Commander" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="0" side_tool="false" content_ui="tabs" />
<window_info id="Find" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.33" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
<window_info id="Inspection" active="false" anchor="bottom" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.4" sideWeight="0.5" order="5" side_tool="false" content_ui="tabs" />
<window_info id="Ant Build" active="false" anchor="right" auto_hide="false" internal_type="DOCKED" type="DOCKED" visible="false" show_stripe_button="true" weight="0.25" sideWeight="0.5" order="1" side_tool="false" content_ui="tabs" />
</layout>
</component>
<component name="VcsContentAnnotationSettings">
<option name="myLimit" value="2678400000" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<option name="time" value="7" />
</breakpoint-manager>
<watches-manager>
<configuration name="PythonConfigurationType">
<watch expression="action.size" />
</configuration>
</watches-manager>
</component>
<component name="editorHistoryManager">
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/layers/core.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="365">
<caret line="159" column="14" lean-forward="false" selection-start-line="159" selection-start-column="4" selection-end-line="159" selection-end-column="14" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/nn_ops.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="68">
<caret line="1616" column="4" lean-forward="false" selection-start-line="1616" selection-start-column="4" selection-end-line="1616" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/variable_scope.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="311">
<caret line="1047" column="4" lean-forward="false" selection-start-line="1047" selection-start-column="4" selection-end-line="1047" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/.PyCharmCE2017.2/system/python_stubs/2065551341/numpy/core/umath.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="149">
<caret line="3860" column="4" lean-forward="false" selection-start-line="3860" selection-start-column="4" selection-end-line="3860" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/.PyCharmCE2017.2/system/python_stubs/2065551341/numpy/core/multiarray.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="311">
<caret line="560" column="4" lean-forward="false" selection-start-line="560" selection-start-column="4" selection-end-line="560" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/numpy/core/fromnumeric.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="311">
<caret line="2805" column="17" lean-forward="true" selection-start-line="2805" selection-start-column="12" selection-end-line="2805" selection-end-column="17" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/gym/wrappers/time_limit.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="206">
<caret line="35" column="0" lean-forward="false" selection-start-line="35" selection-start-column="0" selection-end-line="35" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/gym/core.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="198">
<caret line="95" column="0" lean-forward="false" selection-start-line="95" selection-start-column="0" selection-end-line="95" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/multiprocessing/process.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="198">
<caret line="92" column="0" lean-forward="false" selection-start-line="92" selection-start-column="0" selection-end-line="92" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/gym/envs/classic_control/cartpole.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="206">
<caret line="56" column="14" lean-forward="false" selection-start-line="56" selection-start-column="8" selection-end-line="56" selection-end-column="14" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/random_ops.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="399">
<caret line="338" column="0" lean-forward="true" selection-start-line="338" selection-start-column="0" selection-end-line="338" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_array_ops.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-60">
<caret line="2548" column="25" lean-forward="false" selection-start-line="2548" selection-start-column="20" selection-end-line="2548" selection-end-column="25" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$USER_HOME$/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/gen_nn_ops.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-37">
<caret line="1470" column="4" lean-forward="false" selection-start-line="1470" selection-start-column="4" selection-end-line="1470" selection-end-column="4" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/logz.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="0">
<caret line="0" column="0" lean-forward="false" selection-start-line="0" selection-start-column="0" selection-end-line="0" selection-end-column="0" />
<folding />
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/train_pg.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="-1667">
<caret line="265" column="26" lean-forward="false" selection-start-line="265" selection-start-column="26" selection-end-line="265" selection-end-column="26" />
<folding>
<element signature="e#0#18#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
<entry file="file://$PROJECT_DIR$/plot.py">
<provider selected="true" editor-type-id="text-editor">
<state relative-caret-position="694">
<caret line="117" column="5" lean-forward="false" selection-start-line="117" selection-start-column="5" selection-end-line="117" selection-end-column="5" />
<folding>
<element signature="e#0#21#0" expanded="true" />
</folding>
</state>
</provider>
</entry>
</component>
</project>
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/1/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.133293 25.448426246643066 0 -179.33068933 39.2898608549 -64.8887104153 -299.604089513 151.0 0.0 50132 50132
0.0511833 42.846628189086914 1 -148.606064338 42.533900249 -49.9120428547 -276.510779299 151.0 0.0 50132 100264
0.0572527 61.10300421714783 2 -120.536215955 39.4846258104 -5.9060736545 -240.029126655 151.0 0.0 50132 150396
0.0298718 79.34754776954651 3 -97.9565585579 35.6283471029 -11.0812879966 -260.096540085 151.0 0.0 50132 200528
0.0322279 97.38736867904663 4 -94.2633140782 32.3465301386 9.16578404748 -202.962048552 151.0 0.0 50132 250660
0.016316 115.01092791557312 5 -93.9071039577 31.2213294903 0.401268223837 -227.466183436 151.0 0.0 50132 300792
0.0167591 132.51984119415283 6 -94.8293881237 29.1252327052 50.2664705411 -195.40892016 151.0 0.0 50132 350924
0.0264593 149.86110424995422 7 -95.7902107153 31.3967198491 -17.6409104329 -201.065445477 151.0 0.0 50132 401056
0.0203816 167.2442409992218 8 -86.1974336082 29.9126825368 -11.1624613763 -229.269790683 151.0 0.0 50132 451188
0.0195637 184.57763361930847 9 -75.3971787064 28.4099883616 -2.91159419919 -201.487737878 151.0 0.0 50132 501320
0.015681 201.94242334365845 10 -66.5291743251 27.3617021986 8.65267583443 -151.060197382 151.0 0.0 50132 551452
0.0202405 219.30347180366516 11 -63.0298191441 27.2448855245 -6.47570459077 -170.185939103 151.0 0.0 50132 601584
0.0156047 236.7127959728241 12 -57.3076110784 25.1331179495 3.6367831527 -137.297945971 151.0 0.0 50132 651716
0.015131 254.0550982952118 13 -50.3196637051 24.4452423525 17.5518845558 -152.020824906 151.0 0.0 50132 701848
0.0159931 271.41443967819214 14 -44.389283081 21.2386244378 11.3105494853 -122.173791072 151.0 0.0 50132 751980
0.0165923 288.7827877998352 15 -44.5700555413 22.5160967597 12.0620372677 -112.419782168 151.0 0.0 50132 802112
0.0185337 306.18880701065063 16 -38.8550387528 20.2803802874 29.8354785787 -106.702525151 151.0 0.0 50132 852244
0.019082 323.7642719745636 17 -36.0911314892 20.9151817188 22.393443393 -109.884108994 151.0 0.0 50132 902376
0.0225671 341.1691167354584 18 -34.0105432078 20.8576622287 15.754886997 -107.961152444 151.0 0.0 50132 952508
0.0102453 358.6126923561096 19 -35.0251613878 22.0372439003 29.8823515524 -127.517257298 151.0 0.0 50132 1002640
0.0161908 375.9906620979309 20 -31.5873060429 22.7862157451 34.1843875213 -117.737950912 151.0 0.0 50132 1052772
0.0169231 393.39186453819275 21 -28.5671175488 19.4448337439 27.8019358833 -97.6354796118 151.0 0.0 50132 1102904
0.017502 410.7973198890686 22 -22.4539025367 20.6658131139 26.8015904639 -126.852927183 151.0 0.0 50132 1153036
0.0144892 428.25348806381226 23 -16.8346210051 18.4712133575 34.3619588892 -93.4809736692 151.0 0.0 50132 1203168
0.0175294 445.75735449790955 24 -13.7300606352 16.5279274615 45.1834395994 -68.6667838992 151.0 0.0 50132 1253300
0.0269088 463.25252509117126 25 -11.6646432237 18.931788653 44.3404809664 -85.5989880009 151.0 0.0 50132 1303432
0.0183992 480.66894268989563 26 -8.8125187567 17.0538149711 40.0261280321 -64.0965170699 151.0 0.0 50132 1353564
0.0270967 498.06955766677856 27 -7.61458414919 15.5363423438 38.6545067547 -73.5994991664 151.0 0.0 50132 1403696
0.0202837 515.4838519096375 28 -7.5643906963 13.1692132795 28.8422193159 -58.7653458719 151.0 0.0 50132 1453828
0.032324 533.6044790744781 29 -7.88732992076 13.6205604434 23.7089055113 -52.7262926622 151.0 0.0 50132 1503960
0.017226 551.0418577194214 30 -4.81328252996 14.4281045663 40.3022453044 -60.7769945891 151.0 0.0 50132 1554092
0.0237148 568.540479183197 31 0.503191612289 14.7629697393 42.6004038947 -57.6291822069 151.0 0.0 50132 1604224
0.0241208 586.0562088489532 32 5.31118377239 17.3671926803 44.7379480195 -53.1878315807 151.0 0.0 50132 1654356
0.0157807 604.7549903392792 33 10.89034033 19.0480939249 55.0128118318 -73.1248891571 151.0 0.0 50132 1704488
0.020365 622.1728205680847 34 17.7671103225 17.7904016311 58.4101610036 -43.7355593981 151.0 0.0 50132 1754620
0.0129134 639.5883619785309 35 22.42801249 18.1763262835 66.1780468444 -46.6517369961 151.0 0.0 50132 1804752
0.00868595 656.9805538654327 36 24.6239850216 20.690837396 67.0156129223 -68.2607863022 151.0 0.0 50132 1854884
0.01336 674.3199441432953 37 28.7251779744 18.2555714257 69.4029800012 -41.9340622203 151.0 0.0 50132 1905016
0.0126717 692.2312703132629 38 29.6963350254 23.933793475 83.9837053273 -49.838575588 151.0 0.0 50132 1955148
0.0204675 709.6087455749512 39 33.0208667623 25.6643776793 98.4086242795 -80.8446065203 151.0 0.0 50132 2005280
0.0232269 726.9732248783112 40 40.6926470132 21.3550453036 92.3745433988 -52.7401874135 151.0 0.0 50132 2055412
-0.0107402 744.4198088645935 41 47.7884448095 20.3955976231 94.760215677 -29.0397346658 151.0 0.0 50132 2105544
-0.0106151 761.82421708107 42 53.1096144229 27.7124088791 108.505558718 -62.2108937376 151.0 0.0 50132 2155676
-0.00608519 779.2861967086792 43 51.8660344441 27.2646686222 103.014925578 -63.4603096516 151.0 0.0 50132 2205808
0.0137518 796.7234201431274 44 48.5393239361 35.3895358101 102.183362434 -61.5291894318 151.0 0.0 50132 2255940
0.0155841 814.1130645275116 45 48.6577722028 30.2651964165 100.343212064 -54.0211094641 151.0 0.0 50132 2306072
0.00917006 831.4584109783173 46 56.7313055657 26.0706073002 99.0665713258 -61.2469985286 151.0 0.0 50132 2356204
0.000182971 848.8481845855713 47 71.4240228836 26.6106093683 117.391216718 -24.3428371699 151.0 0.0 50132 2406336
0.00452052 866.282511472702 48 74.1239186246 28.1015744381 125.933737237 -23.664714475 151.0 0.0 50132 2456468
0.0138416 883.5756363868713 49 66.893572219 31.160214229 145.319864383 -46.7460073729 151.0 0.0 50132 2506600
0.0719024 901.0127730369568 50 71.5834981937 29.261178731 143.144330252 -21.1289007802 151.0 0.0 50132 2556732
-0.0407039 918.319787979126 51 76.4148287165 20.3878858901 118.227515096 1.67577172767 151.0 0.0 50132 2606864
-0.0166612 935.6414361000061 52 78.8036213533 20.4435520398 128.780111637 -25.4414792569 151.0 0.0 50132 2656996
-0.151419 952.9851241111755 53 87.6104612233 22.346310915 134.274052772 -23.4992014494 151.0 0.0 50132 2707128
-0.0136276 970.4374308586121 54 77.8166196929 23.6091843875 130.683760336 -6.52469421412 151.0 0.0 50132 2757260
-0.0302435 987.770161151886 55 81.0041288771 25.0533640845 139.95905789 -17.8392294624 151.0 0.0 50132 2807392
0.0260241 1005.0541090965271 56 78.3369537352 30.0950142681 126.539841377 -45.7873870624 151.0 0.0 50132 2857524
0.037265 1022.390073299408 57 81.0994985703 32.8203956722 128.559321558 -29.8774157352 151.0 0.0 50132 2907656
0.000206351 1039.7500381469727 58 80.1786041875 29.6551329964 133.21483671 -49.0817390529 151.0 0.0 50132 2957788
0.00672939 1057.593801021576 59 81.8096754822 23.7911667265 127.593774963 -12.6151574985 151.0 0.0 50132 3007920
0.0307942 1074.9470241069794 60 88.1199699425 20.7148027326 122.087145452 -24.3563414879 151.0 0.0 50132 3058052
0.0181423 1092.2785639762878 61 96.7669312931 21.7270481497 134.439164635 -16.063387008 151.0 0.0 50132 3108184
-0.0951916 1109.5738620758057 62 109.207726365 26.5131899319 161.72765998 2.68148326057 151.0 0.0 50132 3158316
-0.0543005 1126.9086530208588 63 103.894204594 28.5914713128 160.952670048 -17.6150390193 151.0 0.0 50132 3208448
-0.109457 1144.4107794761658 64 103.223968281 31.806931454 165.112164853 -47.0618067798 151.0 0.0 50132 3258580
-0.119589 1161.7094781398773 65 108.609907959 29.8939910654 171.629600104 -12.8924113624 151.0 0.0 50132 3308712
-0.0522546 1178.9669692516327 66 110.733140574 22.5958373242 144.517156909 -20.4782387412 151.0 0.0 50132 3358844
0.021341 1196.3099405765533 67 96.6009209841 21.2273345108 137.211783659 -14.1044777758 151.0 0.0 50132 3408976
-0.00994848 1213.7535479068756 68 108.917960397 18.6427279424 145.423670085 -40.7204619291 151.0 0.0 50132 3459108
-0.00197615 1231.0432980060577 69 120.291524999 16.8451213817 157.450465138 50.859044525 151.0 0.0 50132 3509240
0.0263877 1248.5932233333588 70 132.397156157 19.8816245645 175.026190802 35.1192246829 151.0 0.0 50132 3559372
-0.000850692 1266.052000284195 71 124.351170344 21.2110330596 170.371379569 15.0654154297 151.0 0.0 50132 3609504
0.0281867 1283.4100875854492 72 117.992789746 25.1851159455 166.57367373 -1.24354605101 151.0 0.0 50132 3659636
-0.0152832 1300.7378389835358 73 124.586990639 24.5938885653 170.878334584 7.81948911508 151.0 0.0 50132 3709768
-0.0137309 1318.1021897792816 74 128.61676341 27.8226730477 185.34528317 -4.14498221351 151.0 0.0 50132 3759900
-0.00500324 1335.4428339004517 75 131.302294374 28.5093408432 175.439084762 -6.57414068791 151.0 0.0 50132 3810032
0.0298657 1352.786170721054 76 135.503006655 22.6088686337 171.659523591 10.2975411796 151.0 0.0 50132 3860164
0.0430857 1369.9960262775421 77 135.012632709 17.9255098169 178.105758033 16.6179029283 151.0 0.0 50132 3910296
-0.000391383 1387.2642958164215 78 128.120669702 16.156769715 165.704975806 39.7954755939 151.0 0.0 50132 3960428
0.0278823 1404.5721728801727 79 123.90797326 17.9023473024 165.591772792 16.4438175941 151.0 0.0 50132 4010560
0.0431979 1421.912649154663 80 133.730214763 19.2199251931 189.347381836 39.545368668 151.0 0.0 50132 4060692
0.0640927 1439.2352764606476 81 133.076134323 22.8076704668 182.248326883 50.8470641852 151.0 0.0 50132 4110824
0.000945015 1456.4754464626312 82 113.582360316 41.2400371818 175.522275478 -8.19092507599 151.0 0.0 50132 4160956
-0.00385883 1473.756596326828 83 120.111571484 37.3903531712 166.492059968 -14.654343465 151.0 0.0 50132 4211088
0.2068 1491.0191214084625 84 127.278022046 26.8683626835 168.060325895 -15.7795142641 151.0 0.0 50132 4261220
0.0606409 1508.2981476783752 85 132.445201222 22.7112558845 169.298954893 -9.93169153637 151.0 0.0 50132 4311352
0.0986238 1525.5257413387299 86 137.673055324 17.5801615814 177.260248395 4.26701663692 151.0 0.0 50132 4361484
0.0580756 1542.7471117973328 87 137.952956407 16.1518806479 180.779046073 40.0552755122 151.0 0.0 50132 4411616
0.0152075 1559.9642806053162 88 118.090676764 41.1089102579 191.687041477 -21.2552235901 151.0 0.0 50132 4461748
0.0871946 1577.3119723796844 89 78.0761778291 62.6995555419 177.279672828 -32.172458644 151.0 0.0 50132 4511880
0.0861108 1594.578145980835 90 122.934924499 33.4669167603 172.162768163 -17.9278211916 151.0 0.0 50132 4562012
0.327106 1611.8292326927185 91 129.888923775 13.8975333486 166.656061132 86.3408857062 151.0 0.0 50132 4612144
0.0406095 1629.1160552501678 92 151.76482434 16.7412807028 190.689096854 25.6853591997 151.0 0.0 50132 4662276
-0.0338379 1646.4187574386597 93 148.798054311 21.1627531468 193.143202885 55.5361234051 151.0 0.0 50132 4712408
-0.326612 1663.6832489967346 94 142.325134132 20.9087122605 180.1284824 32.90802496 151.0 0.0 50132 4762540
-0.112979 1680.9754304885864 95 140.834106068 18.7779267411 185.781068192 7.76995464114 151.0 0.0 50132 4812672
-0.0305067 1698.2448518276215 96 136.959483761 19.5082067964 190.485582299 43.9965753337 151.0 0.0 50132 4862804
0.156979 1715.5525453090668 97 143.838990449 20.1867732249 195.767328563 64.0663237813 151.0 0.0 50132 4912936
-0.0117669 1732.8337411880493 98 150.5317061 18.1365910766 197.701370993 93.4293046108 151.0 0.0 50132 4963068
0.443599 1750.078330039978 99 154.288889537 16.9159632689 197.161612264 62.827653579 151.0 0.0 50132 5013200
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/1/params.json
================================================
{"animate" : false,
"env_name" : "HalfCheetah-v1",
"exp_name" : "HalfCheetah_b50000_rtg_na_25bl",
"gamma" : 0.9,
"learning_rate" : 0.025,
"logdir" : "data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/1",
"max_path_length" : 150.0,
"min_timesteps_per_batch" : 50000,
"n_iter" : 100,
"n_layers" : 1,
"nn_baseline" : true,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 1,
"size" : 32}
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/11/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.156345 24.443801641464233 0 -270.807132938 51.144434011 -124.247270511 -408.722559886 151.0 0.0 50132 50132
0.0745756 41.68954515457153 1 -208.174620004 42.8050799888 -96.1271702736 -322.408903503 151.0 0.0 50132 100264
0.0524621 58.94227194786072 2 -163.86329966 39.0648217774 -33.541631339 -297.024382203 151.0 0.0 50132 150396
0.0376806 76.17933058738708 3 -143.120324724 38.1613540631 -39.9368740615 -269.867959189 151.0 0.0 50132 200528
0.0194735 93.40901017189026 4 -131.582923395 37.4306005119 -41.4770662667 -262.189696779 151.0 0.0 50132 250660
0.019406 110.68946814537048 5 -123.347786916 36.2345390801 -16.1396148984 -245.603064289 151.0 0.0 50132 300792
0.0184991 127.94105243682861 6 -112.889513591 34.2592915777 -24.9485828047 -256.295959957 151.0 0.0 50132 350924
0.0212448 145.12072658538818 7 -108.85053352 34.216731248 -23.4573843867 -223.650869786 151.0 0.0 50132 401056
0.0176691 162.28984999656677 8 -100.777447103 33.0336073048 35.1413112999 -198.531529377 151.0 0.0 50132 451188
0.0136455 179.48703360557556 9 -96.4646472196 33.4928776315 7.01913793564 -207.898906884 151.0 0.0 50132 501320
0.0215281 196.7747299671173 10 -91.0654907222 34.8100614151 27.756712229 -188.680406673 151.0 0.0 50132 551452
0.0223921 214.01161670684814 11 -84.0375624954 35.4480411913 67.3296271314 -189.716663059 151.0 0.0 50132 601584
0.0215174 231.31497406959534 12 -76.0735008025 32.7773397301 20.2344331449 -186.426370003 151.0 0.0 50132 651716
0.0169964 248.65626192092896 13 -69.2143664409 31.9517135993 20.7611302791 -190.809494716 151.0 0.0 50132 701848
0.0142232 265.9935550689697 14 -57.9622883197 29.9415468864 26.8067527548 -170.49599844 151.0 0.0 50132 751980
0.0127593 283.34787821769714 15 -57.0049868941 29.7788677367 42.0555399094 -150.776807077 151.0 0.0 50132 802112
0.01255 300.75698709487915 16 -52.4070161588 27.7440190479 33.0667652876 -156.973353495 151.0 0.0 50132 852244
0.0157212 318.07285737991333 17 -50.0178988864 26.7919215559 26.1940825337 -147.869435494 151.0 0.0 50132 902376
0.0116063 335.4031457901001 18 -47.058347119 27.2101543524 23.8799112773 -147.117599183 151.0 0.0 50132 952508
0.0103434 352.7371082305908 19 -46.1477791211 24.7258875208 17.455954359 -136.159242166 151.0 0.0 50132 1002640
0.0141757 370.0111255645752 20 -44.7421885101 26.5518099028 18.9852314352 -158.481592444 151.0 0.0 50132 1052772
0.0170411 387.3496460914612 21 -37.6113435456 24.610869755 21.3342743107 -118.900679407 151.0 0.0 50132 1102904
0.0172864 404.6649658679962 22 -34.4413393363 24.8922226735 24.7217700601 -132.064007648 151.0 0.0 50132 1153036
0.0125309 422.0147588253021 23 -32.5791391956 25.1413827803 36.8870949818 -128.689781434 151.0 0.0 50132 1203168
0.0113816 439.35777401924133 24 -28.9719738336 19.8485325046 14.9354028209 -96.2432326478 151.0 0.0 50132 1253300
0.0127849 456.69245052337646 25 -26.6872084597 22.4459651495 30.3132533702 -106.262946466 151.0 0.0 50132 1303432
0.014384 474.02413630485535 26 -26.492911924 21.1923023928 31.8756407404 -121.618312114 151.0 0.0 50132 1353564
0.0150111 491.3865704536438 27 -22.5471363888 21.7282683118 51.4644042612 -139.288884604 151.0 0.0 50132 1403696
0.0107308 508.7335605621338 28 -21.8625348799 22.4595421731 59.1765901322 -98.4716067074 151.0 0.0 50132 1453828
0.0106887 526.0901181697845 29 -19.122674373 22.0124968795 52.5566398805 -122.764009032 151.0 0.0 50132 1503960
0.0135209 543.3986639976501 30 -20.1571496979 19.2183764843 35.8787573842 -90.683500138 151.0 0.0 50132 1554092
0.0148105 560.7340970039368 31 -18.1932959063 20.1338105964 42.5484711637 -114.017780143 151.0 0.0 50132 1604224
0.0168172 578.1244428157806 32 -17.3113719715 18.3242251908 32.1464217304 -95.2990426232 151.0 0.0 50132 1654356
0.0239558 595.9245526790619 33 -14.8479686503 15.621951826 33.904274897 -57.0699736474 151.0 0.0 50132 1704488
0.0141534 613.3564751148224 34 -13.1165029339 15.3295797531 29.245383332 -63.4801824559 151.0 0.0 50132 1754620
0.0158751 631.1353979110718 35 -12.6115729411 14.1498657802 31.5550869502 -62.2967103864 151.0 0.0 50132 1804752
0.0187876 649.3035516738892 36 -9.74873734089 12.9087835191 31.4540828312 -52.1836839756 151.0 0.0 50132 1854884
0.0205946 667.242800951004 37 -6.39376571649 13.3664898159 33.5742004633 -50.6052205571 151.0 0.0 50132 1905016
0.020675 687.1851966381073 38 -1.64690829924 16.1347912646 50.8222697974 -53.8731015827 151.0 0.0 50132 1955148
0.0180232 705.7867012023926 39 5.58895562653 17.3748414648 61.5233847372 -62.4213041612 151.0 0.0 50132 2005280
0.0214217 724.5065524578094 40 8.44418711816 16.390860669 66.8289876494 -63.5820963371 151.0 0.0 50132 2055412
0.0159438 749.6698753833771 41 8.61961584828 18.1544462687 60.8472784198 -44.707776249 151.0 0.0 50132 2105544
0.0165443 772.5674297809601 42 13.0264712926 16.0456063036 51.4251914293 -44.5981381416 151.0 0.0 50132 2155676
0.0143073 799.0406270027161 43 16.4536457806 15.6875054846 65.3031723392 -46.5263470391 151.0 0.0 50132 2205808
0.0191986 823.7559452056885 44 23.1974222047 16.7922518446 71.8889865064 -59.6553980292 151.0 0.0 50132 2255940
0.0169812 845.5439846515656 45 30.8654521791 21.452045185 85.2246107747 -57.0336624229 151.0 0.0 50132 2306072
0.0195603 865.8228287696838 46 35.7817425256 25.1589567016 86.6341985197 -93.5013420866 151.0 0.0 50132 2356204
0.0205392 887.0430307388306 47 39.7167506798 25.0403232768 94.2910649383 -46.7231567874 151.0 0.0 50132 2406336
0.00594468 908.2338352203369 48 51.2064485835 27.9200442591 103.265646891 -67.0813596421 151.0 0.0 50132 2456468
0.0130448 925.3884208202362 49 50.3234196493 29.7048632605 100.007991175 -46.522801209 151.0 0.0 50132 2506600
0.0242518 942.632294178009 50 49.8872459679 30.3881984586 104.918423251 -54.1735014646 151.0 0.0 50132 2556732
0.0204469 959.8109889030457 51 60.4612486257 26.8470099409 108.431421818 -50.2798967417 151.0 0.0 50132 2606864
0.0274831 977.0340876579285 52 64.0796140974 24.0725047216 105.907963541 -40.1578980498 151.0 0.0 50132 2656996
0.0240855 994.2316606044769 53 70.0180890763 18.146930569 117.538417677 0.0539951720255 151.0 0.0 50132 2707128
0.0291589 1011.4263353347778 54 77.0394790875 21.9981690846 120.661425928 -28.9163894042 151.0 0.0 50132 2757260
0.0152475 1028.5558805465698 55 86.9063687535 28.4297652798 142.984622225 -39.0498226022 151.0 0.0 50132 2807392
0.0214814 1045.6518015861511 56 87.7389149336 30.1476799697 147.91640431 -32.5425927192 151.0 0.0 50132 2857524
0.0197347 1062.789380788803 57 91.9224177279 28.5295269597 146.520222155 -44.2126761433 151.0 0.0 50132 2907656
0.0244948 1079.828236579895 58 95.1937808035 24.4394575485 143.225390406 -36.8699767283 151.0 0.0 50132 2957788
0.0291605 1096.8878691196442 59 91.2198217279 19.6436648462 128.851842416 -2.52456424265 151.0 0.0 50132 3007920
0.0170105 1114.0013291835785 60 86.0783659977 22.0866557276 129.556461159 -44.4423881159 151.0 0.0 50132 3058052
0.0585194 1131.1018915176392 61 92.4430588896 27.5265606691 139.718015317 -32.3077818066 151.0 0.0 50132 3108184
0.0107379 1148.2288718223572 62 87.8335394781 42.3994360402 152.307235048 -36.3599110433 151.0 0.0 50132 3158316
0.0738931 1165.37731051445 63 95.1449936882 29.9372617955 148.221298645 -17.4016885397 151.0 0.0 50132 3208448
0.00196401 1182.9774658679962 64 113.366769111 22.8176478714 158.342030284 -20.8055941882 151.0 0.0 50132 3258580
0.0257016 1200.0777685642242 65 101.891218234 21.5139339988 152.21256244 10.6277785019 151.0 0.0 50132 3308712
0.035666 1217.0094940662384 66 106.993209315 22.2765695381 153.008343057 -3.98400623564 151.0 0.0 50132 3358844
0.0267421 1233.9572584629059 67 116.750444257 23.0781954654 156.692886787 5.28665443411 151.0 0.0 50132 3408976
0.00536058 1252.9021184444427 68 107.992246665 36.3715297805 168.829405001 -14.5241811204 151.0 0.0 50132 3459108
0.0388299 1276.3621952533722 69 119.818148259 25.3014594114 164.534762626 -14.4663782028 151.0 0.0 50132 3509240
0.0289253 1296.22412276268 70 118.958933142 16.2935292019 158.573217925 45.1061127994 151.0 0.0 50132 3559372
0.00399954 1319.5692286491394 71 122.253037304 19.2025817564 166.385413012 45.5740948036 151.0 0.0 50132 3609504
-0.0025171 1336.4224970340729 72 133.059699773 23.6835090439 191.334367118 29.8291021627 151.0 0.0 50132 3659636
0.0254654 1353.2402832508087 73 129.887894452 27.2782072848 183.121149262 7.03850994806 151.0 0.0 50132 3709768
0.0378958 1370.1964020729065 74 131.436466745 25.9695131805 181.828270811 15.9956452175 151.0 0.0 50132 3759900
0.0287882 1387.1644208431244 75 137.462854549 21.213564358 181.657198203 13.5230592931 151.0 0.0 50132 3810032
0.0363687 1404.0474846363068 76 134.283955796 20.2636771391 187.253073008 29.5261382737 151.0 0.0 50132 3860164
0.0276075 1421.1449337005615 77 136.21439875 17.5353492814 184.462083085 76.9448736842 151.0 0.0 50132 3910296
0.0166955 1437.9838910102844 78 147.848918428 18.0620904164 193.859455075 40.651529847 151.0 0.0 50132 3960428
0.0121087 1454.828234910965 79 151.439633187 19.861722188 191.388273873 41.8960327984 151.0 0.0 50132 4010560
0.030101 1471.7287895679474 80 152.720079589 25.4702452017 202.231301104 -6.03768322686 151.0 0.0 50132 4060692
0.0331579 1488.5854642391205 81 152.109954982 22.7632641386 193.223533603 33.7299732257 151.0 0.0 50132 4110824
0.00135813 1505.4706330299377 82 157.323813746 19.8485591495 198.796394131 6.45282378137 151.0 0.0 50132 4160956
0.0423434 1522.3156311511993 83 152.527799863 19.5720795101 204.471543769 58.8893421734 151.0 0.0 50132 4211088
0.0253047 1539.0953946113586 84 154.654856521 22.7286714125 216.412050839 60.2356178898 151.0 0.0 50132 4261220
0.209883 1556.6230838298798 85 138.964597373 26.7855433292 197.007959878 26.0988262075 151.0 0.0 50132 4311352
0.0507359 1576.3277297019958 86 155.025858592 28.069273723 204.862363459 24.7556513974 151.0 0.0 50132 4361484
-0.151607 1593.2155303955078 87 142.980756837 29.1081306133 190.263756091 -27.3028635101 151.0 0.0 50132 4411616
0.0597955 1611.778851032257 88 127.859529801 38.5270466418 186.050405817 -29.7251590232 151.0 0.0 50132 4461748
0.0718467 1630.3496758937836 89 125.487502983 34.38721446 183.777501867 -13.2943008097 151.0 0.0 50132 4511880
0.00945624 1647.4859533309937 90 149.194999799 25.9601188005 212.143210583 11.5337576863 151.0 0.0 50132 4562012
0.0269386 1668.2153010368347 91 147.241223461 26.8564662814 207.567658497 26.0897613812 151.0 0.0 50132 4612144
0.0975457 1685.1769154071808 92 129.705021685 27.0230976014 177.503046355 5.77588939463 151.0 0.0 50132 4662276
0.0341558 1702.838742017746 93 134.968933171 24.2360338229 187.492442221 16.3165210175 151.0 0.0 50132 4712408
0.146843 1721.7922065258026 94 151.391529616 18.353520309 199.56123239 76.1616950038 151.0 0.0 50132 4762540
-0.070617 1739.300965309143 95 153.134481426 19.4791113047 199.244787185 42.8832644866 151.0 0.0 50132 4812672
0.0234003 1757.9904987812042 96 146.572508337 26.529475938 191.706583041 -15.5332168737 151.0 0.0 50132 4862804
0.0162911 1775.992045879364 97 146.041561919 31.7945806263 202.258060192 -30.044358145 151.0 0.0 50132 4912936
0.0659341 1794.1720831394196 98 136.124565595 29.6728462594 191.418251642 -10.8424923183 151.0 0.0 50132 4963068
0.0651561 1812.3902924060822 99 128.064240825 22.3645523068 174.156419102 31.9355341804 151.0 0.0 50132 5013200
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/11/params.json
================================================
{"animate" : false,
"env_name" : "HalfCheetah-v1",
"exp_name" : "HalfCheetah_b50000_rtg_na_25bl",
"gamma" : 0.9,
"learning_rate" : 0.025,
"logdir" : "data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/11",
"max_path_length" : 150.0,
"min_timesteps_per_batch" : 50000,
"n_iter" : 100,
"n_layers" : 1,
"nn_baseline" : true,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 11,
"size" : 32}
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/21/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.102255 25.70679473876953 0 -323.969673021 43.1211691714 -217.811784236 -472.659356681 151.0 0.0 50132 50132
0.0718359 42.85335898399353 1 -250.27580682 40.1936177359 -156.424604331 -374.707362593 151.0 0.0 50132 100264
0.0435652 59.91646480560303 2 -211.692384319 42.9265427551 -102.852937032 -352.736889921 151.0 0.0 50132 150396
0.026953 77.0094940662384 3 -188.875982318 43.8484870388 -83.3887182982 -319.687682358 151.0 0.0 50132 200528
0.0228926 94.11019897460938 4 -174.544094296 41.2176119966 -39.5838291301 -308.938616966 151.0 0.0 50132 250660
0.0197197 111.01148295402527 5 -167.771087918 37.3406721385 -77.1771885839 -276.662634771 151.0 0.0 50132 300792
0.0203707 127.54880833625793 6 -159.484659432 37.1298473241 -44.9099404577 -262.807247182 151.0 0.0 50132 350924
0.0203714 144.100172996521 7 -152.546285016 34.3399475656 -51.9027151929 -249.941427996 151.0 0.0 50132 401056
0.0213025 160.65814924240112 8 -148.334343787 33.0527210193 -50.6568253621 -257.001018225 151.0 0.0 50132 451188
0.0225206 177.2000379562378 9 -132.206156129 30.4062844509 -36.7580883033 -229.444688309 151.0 0.0 50132 501320
0.0210093 193.96677803993225 10 -123.431683276 28.1405986935 -37.1403380317 -235.797141132 151.0 0.0 50132 551452
0.0217689 211.28513169288635 11 -113.047048108 27.9222395298 -48.2442085882 -251.136819168 151.0 0.0 50132 601584
0.0236207 227.737380027771 12 -105.388254515 26.5204241642 -42.2873947152 -234.612112767 151.0 0.0 50132 651716
0.0204891 243.94443535804749 13 -98.0988877784 27.15046787 -26.9000314373 -233.693521394 151.0 0.0 50132 701848
0.0149134 260.25616121292114 14 -89.4653586628 22.947040142 -21.9975423918 -181.150584705 151.0 0.0 50132 751980
0.015993 276.4550313949585 15 -80.9568825201 23.8378058116 -14.3509826819 -181.321452913 151.0 0.0 50132 802112
0.0157254 292.6598074436188 16 -74.6336739486 26.3237757111 2.46803542256 -174.076436597 151.0 0.0 50132 852244
0.0158872 308.81813526153564 17 -70.850949906 25.678098135 1.15593412153 -153.042407926 151.0 0.0 50132 902376
0.0142792 324.9872281551361 18 -66.5133763792 25.5341598637 1.10210784721 -143.269514166 151.0 0.0 50132 952508
0.01619 341.1147994995117 19 -60.3243259469 25.444427888 36.596611818 -190.402268596 151.0 0.0 50132 1002640
0.0156251 357.30160903930664 20 -53.8180871352 24.239016034 15.7492563142 -169.296391319 151.0 0.0 50132 1052772
0.0137708 373.5174825191498 21 -51.55170019 24.6630451187 23.4155576904 -167.134562627 151.0 0.0 50132 1102904
0.0142438 389.690792798996 22 -45.5551235839 22.4433246618 13.3640874197 -148.185348854 151.0 0.0 50132 1153036
0.0143602 405.8326666355133 23 -43.9457356134 20.3547346281 12.6920359877 -117.680771557 151.0 0.0 50132 1203168
0.0155879 422.0374765396118 24 -42.9179297498 18.3397747538 10.1915958873 -105.586127798 151.0 0.0 50132 1253300
0.0204003 438.1307604312897 25 -40.3388933606 18.1256027294 20.4143013065 -111.281708383 151.0 0.0 50132 1303432
0.0255731 454.292268037796 26 -33.6774479916 17.4437129626 21.3489022474 -91.3885648604 151.0 0.0 50132 1353564
0.0140367 470.48041915893555 27 -30.6840460282 14.8707003227 16.4483022744 -70.811585945 151.0 0.0 50132 1403696
0.0133913 486.66046118736267 28 -28.3152501779 14.5460187026 16.1757897819 -84.9330387246 151.0 0.0 50132 1453828
0.0150401 502.7844190597534 29 -26.8132272684 13.5034840718 12.8957620058 -71.1787846657 151.0 0.0 50132 1503960
0.0159091 518.8856642246246 30 -23.8083704466 14.1947632241 11.9608637222 -67.3223615711 151.0 0.0 50132 1554092
0.0182149 535.0031504631042 31 -20.7848430514 13.0379433706 31.8350867446 -70.0749656611 151.0 0.0 50132 1604224
0.0214277 551.1289420127869 32 -18.3675213046 13.2768588224 21.5335140569 -57.4363776166 151.0 0.0 50132 1654356
0.0123329 567.297483921051 33 -14.0103907226 13.5160295985 26.7475426565 -69.2939908596 151.0 0.0 50132 1704488
0.0179343 583.4274439811707 34 -12.4618964833 14.0772799483 14.4490440327 -62.2395811357 151.0 0.0 50132 1754620
0.0140565 599.5822839736938 35 -8.32411653353 13.9311029153 27.8670320248 -63.1880865031 151.0 0.0 50132 1804752
0.016101 615.7283067703247 36 -4.76575933409 14.9172887945 34.1090437139 -68.2811508459 151.0 0.0 50132 1854884
0.0159986 631.8710033893585 37 -0.846634538965 14.3527046952 31.9478789707 -48.2931346211 151.0 0.0 50132 1905016
0.0131352 647.9776313304901 38 6.66422018579 16.5929490783 43.6131263883 -62.0069955394 151.0 0.0 50132 1955148
0.0149884 664.0657420158386 39 10.8310135299 21.5580188082 53.4050203982 -76.6274250649 151.0 0.0 50132 2005280
0.0133125 680.1107017993927 40 14.8660972496 24.4449729926 63.9009317675 -108.881512104 151.0 0.0 50132 2055412
0.0151188 696.1627497673035 41 23.1667380124 21.9864323198 69.2704215861 -69.6767194088 151.0 0.0 50132 2105544
0.0144031 712.1865272521973 42 23.7011563519 28.6488370844 74.0668269084 -91.4865969773 151.0 0.0 50132 2155676
0.0230641 728.2595946788788 43 29.6903315333 26.445817871 85.4311855333 -70.6201135742 151.0 0.0 50132 2205808
0.0330029 744.2708251476288 44 34.8996347338 24.533056575 83.5357651963 -82.8109500333 151.0 0.0 50132 2255940
0.0152059 760.3170561790466 45 35.7263670174 23.4637735099 86.4952645798 -76.2326316108 151.0 0.0 50132 2306072
0.0268034 776.377941608429 46 42.1218530537 22.9748166419 91.4909521042 -78.9933552093 151.0 0.0 50132 2356204
0.0162035 792.4250874519348 47 43.889804171 27.2636028428 94.0750157397 -68.602567579 151.0 0.0 50132 2406336
0.0163368 808.8426806926727 48 44.7562404424 33.0763994267 109.239524884 -69.8231436181 151.0 0.0 50132 2456468
0.0308516 825.3182489871979 49 54.6337905603 32.6978476247 105.288561197 -69.2987236584 151.0 0.0 50132 2506600
0.0241431 841.5765602588654 50 58.9986152298 26.4014086158 121.081281687 -51.5185729953 151.0 0.0 50132 2556732
0.021702 857.894558429718 51 58.1795111332 23.6259701738 108.012422855 -46.4501184846 151.0 0.0 50132 2606864
0.0199002 873.9185883998871 52 59.7864584712 22.9908694071 111.933247601 -56.1759173409 151.0 0.0 50132 2656996
0.0266708 889.9072835445404 53 72.8960772825 21.2535299426 117.789043563 -43.0505662623 151.0 0.0 50132 2707128
0.0209752 905.854489326477 54 79.6919922396 25.3710036598 129.299555008 -46.8741768659 151.0 0.0 50132 2757260
0.0210029 921.8397271633148 55 76.5678888021 33.7933325393 132.949564083 -55.9922751254 151.0 0.0 50132 2807392
0.0228996 937.8692770004272 56 68.508640693 35.1560415122 124.522167635 -61.2194289418 151.0 0.0 50132 2857524
0.05528 953.9288382530212 57 66.0477123406 28.7872587295 131.369078599 -46.5718718896 151.0 0.0 50132 2907656
0.0420829 969.8998148441315 58 73.7220115476 19.4707475593 119.384121605 0.515974206919 151.0 0.0 50132 2957788
0.0325335 985.8666784763336 59 86.8149087823 21.4657180367 147.9180831 -8.99830459291 151.0 0.0 50132 3007920
0.00410522 1001.8111519813538 60 99.3138106975 20.774695375 144.681356625 -8.10624391235 151.0 0.0 50132 3058052
0.0114449 1017.7370657920837 61 98.3421971169 27.3775953935 140.450383548 -26.6840265504 151.0 0.0 50132 3108184
0.0944026 1033.7344601154327 62 93.1629008936 34.7118301181 141.069634442 -37.487192472 151.0 0.0 50132 3158316
0.0596393 1049.660225391388 63 103.957806283 28.2253049919 142.375389778 -29.4456424345 151.0 0.0 50132 3208448
0.0266865 1065.54434132576 64 107.274604237 23.8590676332 144.963016754 -18.1222261813 151.0 0.0 50132 3258580
0.0198457 1081.4847359657288 65 108.701044317 19.7778646222 149.205726045 -19.8966574521 151.0 0.0 50132 3308712
0.0384747 1097.3906419277191 66 114.264637926 17.8230535463 156.360585072 18.7021231799 151.0 0.0 50132 3358844
0.0277865 1113.3256862163544 67 120.474405171 20.1692552051 167.083537244 18.3312619966 151.0 0.0 50132 3408976
0.143776 1129.2098016738892 68 116.423750256 22.1935147692 166.015400599 22.4382833843 151.0 0.0 50132 3459108
0.0200211 1145.2448682785034 69 115.515474841 28.1035450734 165.126234972 -0.670809441993 151.0 0.0 50132 3509240
0.0901678 1161.1808726787567 70 122.691701527 20.0957039741 165.942393894 24.6172318293 151.0 0.0 50132 3559372
0.0515626 1177.0716366767883 71 119.741289628 18.542192245 162.816997271 22.8979288109 151.0 0.0 50132 3609504
0.0216305 1192.9946014881134 72 119.253601648 14.4973023425 153.166738005 27.4342175452 151.0 0.0 50132 3659636
0.00866893 1208.887521982193 73 127.805758594 17.7487036426 171.467227092 52.4729986828 151.0 0.0 50132 3709768
0.0404077 1224.8040578365326 74 131.761632754 24.6841543941 180.883252508 -1.72452677977 151.0 0.0 50132 3759900
0.0289831 1240.7240166664124 75 138.543600374 24.6644739572 192.873929705 39.8066315724 151.0 0.0 50132 3810032
0.024745 1256.5996561050415 76 139.112191065 24.0078266343 199.800306519 -16.2100125198 151.0 0.0 50132 3860164
0.0253608 1272.4813752174377 77 136.343215269 28.0751326835 193.314465817 -16.4726678741 151.0 0.0 50132 3910296
0.0258509 1288.3132491111755 78 135.525932112 25.7890004981 196.866017985 -21.3685099615 151.0 0.0 50132 3960428
0.0300737 1304.2520060539246 79 145.862719115 20.2253936598 188.853754882 42.5244451034 151.0 0.0 50132 4010560
0.0223602 1320.219141960144 80 147.177975689 25.1961125966 196.503986966 -19.3069965066 151.0 0.0 50132 4060692
0.0367636 1336.2501208782196 81 131.345400948 44.4840618872 192.184920074 -34.4540549913 151.0 0.0 50132 4110824
0.0406775 1352.2665147781372 82 127.77501305 51.8557140199 199.352991085 -37.4636368382 151.0 0.0 50132 4160956
-0.0077692 1368.2241833209991 83 147.726069939 28.6886626731 204.624845761 -11.9716458347 151.0 0.0 50132 4211088
-0.043617 1384.1780722141266 84 154.528153399 20.3905168874 199.045802437 67.6875360231 151.0 0.0 50132 4261220
-0.019866 1400.1033561229706 85 154.433144883 22.228242372 202.50099925 40.5043530744 151.0 0.0 50132 4311352
0.0238131 1415.9775621891022 86 154.651550929 21.2147744236 224.480479495 40.5992400976 151.0 0.0 50132 4361484
0.00343529 1431.8327820301056 87 157.761814048 18.1808725956 208.392225681 82.1925561271 151.0 0.0 50132 4411616
-0.00244395 1447.7117676734924 88 160.445862764 19.5062224268 206.999202814 74.4782152449 151.0 0.0 50132 4461748
0.0528812 1463.596872806549 89 160.386689119 23.1501666331 211.408343845 9.09378989967 151.0 0.0 50132 4511880
0.0344855 1479.5526022911072 90 149.895101373 22.9329635835 199.304230206 39.2703671724 151.0 0.0 50132 4562012
0.00789516 1495.5338101387024 91 160.676660258 18.2853667056 208.600834902 91.7438790128 151.0 0.0 50132 4612144
0.0565331 1511.4780023097992 92 165.268686187 16.439704144 207.552721623 99.366429248 151.0 0.0 50132 4662276
0.0195069 1527.3645250797272 93 163.099531936 18.9345012753 210.435798978 97.9324245657 151.0 0.0 50132 4712408
0.0201567 1543.2536556720734 94 163.690341008 19.8303935404 211.5204182 72.8971792427 151.0 0.0 50132 4762540
0.0578299 1559.1140539646149 95 166.319303534 21.1021512464 225.583249453 23.0924826496 151.0 0.0 50132 4812672
-0.0281224 1575.030605316162 96 165.501761168 18.9318046148 219.171207163 66.5306777305 151.0 0.0 50132 4862804
0.0629756 1590.9852712154388 97 156.530854022 24.887882918 208.232027537 -12.7468570691 151.0 0.0 50132 4912936
0.00399466 1606.9721901416779 98 145.083674598 26.8406610254 190.727577801 4.9207258872 151.0 0.0 50132 4963068
0.0460498 1622.927895307541 99 149.925630217 26.7623627599 197.210988875 -27.3056911812 151.0 0.0 50132 5013200
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/21/params.json
================================================
{"animate" : false,
"env_name" : "HalfCheetah-v1",
"exp_name" : "HalfCheetah_b50000_rtg_na_25bl",
"gamma" : 0.9,
"learning_rate" : 0.025,
"logdir" : "data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/21",
"max_path_length" : 150.0,
"min_timesteps_per_batch" : 50000,
"n_iter" : 100,
"n_layers" : 1,
"nn_baseline" : true,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 21,
"size" : 32}
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/31/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.137417 21.064739227294922 0 -190.897240893 46.6089352185 -27.2258743391 -372.279547816 151.0 0.0 50132 50132
0.0548721 37.01219916343689 1 -140.687068262 37.5550347376 -15.4241633182 -347.560100712 151.0 0.0 50132 100264
0.0254206 52.98334240913391 2 -113.443101657 34.0651847357 6.5220181492 -226.412047749 151.0 0.0 50132 150396
0.0229544 68.96394920349121 3 -105.550082432 33.0630189002 -11.4026858985 -272.110759389 151.0 0.0 50132 200528
0.0215282 84.92519497871399 4 -100.724926887 29.7726846102 -25.4093405734 -220.793134011 151.0 0.0 50132 250660
0.0358222 100.9263162612915 5 -95.3767389644 30.6686572264 -22.3396122994 -213.260223785 151.0 0.0 50132 300792
0.0333784 116.99193334579468 6 -85.999404569 30.0432932559 -2.8104806923 -190.148792127 151.0 0.0 50132 350924
0.0253062 133.03315663337708 7 -72.1831591657 30.5999142771 17.7072532409 -200.888970509 151.0 0.0 50132 401056
0.0263611 149.11539363861084 8 -62.3333370128 30.3892770612 9.6332473869 -149.657489918 151.0 0.0 50132 451188
0.0255504 165.21782279014587 9 -57.4430553633 30.8875057162 18.7988287752 -183.82465023 151.0 0.0 50132 501320
0.0260348 181.41210007667542 10 -52.7501158126 25.3706754653 9.88592885505 -142.767834982 151.0 0.0 50132 551452
0.0201632 197.60471081733704 11 -51.2299891808 25.5608389428 19.5304245515 -141.964951906 151.0 0.0 50132 601584
0.015544 213.8857123851776 12 -47.3960241402 22.4114420524 26.3482275294 -132.262491589 151.0 0.0 50132 651716
0.0227444 230.15306186676025 13 -43.0149192138 20.9134460855 15.5373348516 -105.819302002 151.0 0.0 50132 701848
0.0270518 246.42102670669556 14 -38.7075874201 21.1408371093 16.2063365962 -131.815378584 151.0 0.0 50132 751980
0.0172788 262.65814113616943 15 -31.2693537199 18.7788736951 9.01618803073 -95.9511355041 151.0 0.0 50132 802112
0.0234184 278.8505423069 16 -29.0126709299 21.0707990403 28.2062867717 -125.234625004 151.0 0.0 50132 852244
0.0141834 294.98457193374634 17 -28.4471979452 19.7859834242 30.174272082 -117.61103905 151.0 0.0 50132 902376
0.0166277 311.05715465545654 18 -22.2930285644 19.1015513297 32.3666313002 -95.1723354391 151.0 0.0 50132 952508
0.0125452 327.1181809902191 19 -23.9325267928 19.8857207975 64.9427207886 -102.838066938 151.0 0.0 50132 1002640
0.0189832 343.1677055358887 20 -20.8642412711 20.2847755858 45.3923779366 -86.0235282556 151.0 0.0 50132 1052772
0.013688 359.22139048576355 21 -19.3619143454 19.4877900823 45.6431997911 -110.105098728 151.0 0.0 50132 1102904
0.0125831 375.23385977745056 22 -17.8529041217 19.9704261934 45.9422125709 -91.4852726421 151.0 0.0 50132 1153036
0.0162405 391.25556445121765 23 -14.5962164577 18.1935978051 42.2348051417 -107.280705997 151.0 0.0 50132 1203168
0.0202018 407.3027181625366 24 -13.1290494069 18.0261722097 34.4999885246 -62.8706914659 151.0 0.0 50132 1253300
0.0125985 423.3434066772461 25 -9.11078535316 16.4889916967 37.3181087863 -68.8006536647 151.0 0.0 50132 1303432
0.0200693 439.427223443985 26 -5.98068360088 15.7397560912 33.6409458905 -59.202459919 151.0 0.0 50132 1353564
0.0182452 455.47803831100464 27 -3.80501670307 15.1027528677 39.6468017656 -59.7213575001 151.0 0.0 50132 1403696
0.0243192 471.5159430503845 28 -2.45456027794 16.4637145066 33.5793841049 -60.4364516043 151.0 0.0 50132 1453828
0.0172235 487.513060092926 29 0.397111874907 15.7741482805 43.0901944993 -62.7576281623 151.0 0.0 50132 1503960
0.0181344 503.5424602031708 30 4.13516144519 17.4775060875 51.5956691576 -60.8039150411 151.0 0.0 50132 1554092
0.0219125 519.5819807052612 31 9.50153589248 18.5738665156 44.169005618 -63.324388225 151.0 0.0 50132 1604224
0.0157505 535.6305468082428 32 16.1005977172 20.6026930298 58.0264111574 -70.9330947236 151.0 0.0 50132 1654356
-0.005294 551.6901745796204 33 24.9695532164 21.1432296201 82.9368017193 -43.9526836755 151.0 0.0 50132 1704488
-0.00591889 567.7029840946198 34 31.7967625286 24.0974367746 79.0996233926 -54.2324659506 151.0 0.0 50132 1754620
0.0044767 583.6846339702606 35 36.7059585589 23.7288604499 89.4803002521 -67.4458974113 151.0 0.0 50132 1804752
0.0128981 599.671145439148 36 36.7625442245 26.081213187 99.9900685996 -57.6794475541 151.0 0.0 50132 1854884
0.0192258 615.6673228740692 37 41.0395612631 20.3254110718 93.4537018014 -23.2376133829 151.0 0.0 50132 1905016
0.0185188 631.6752853393555 38 45.4456976307 19.6714241482 104.124720889 -19.8603178666 151.0 0.0 50132 1955148
0.0228116 647.7017242908478 39 50.9457191851 19.3041116212 103.358485316 -16.1255487068 151.0 0.0 50132 2005280
0.0180701 663.755931854248 40 57.6866521229 21.7429541533 111.104701745 -32.1495732232 151.0 0.0 50132 2055412
0.0106462 679.7545435428619 41 65.3190478679 26.0372874473 116.342681955 -29.6507731649 151.0 0.0 50132 2105544
0.00469465 695.7248232364655 42 71.2761746527 27.0363985822 117.515091394 -22.5523999252 151.0 0.0 50132 2155676
0.0160797 711.6601173877716 43 80.6191865677 23.4371856629 129.832033136 -19.3377565008 151.0 0.0 50132 2205808
0.0163915 727.5960166454315 44 84.6768397483 24.5951832231 128.871368921 -17.4015387765 151.0 0.0 50132 2255940
0.0139268 743.5306708812714 45 86.0862117043 22.8650820411 128.747166869 -23.8155350137 151.0 0.0 50132 2306072
0.0137846 759.4366879463196 46 87.2088382449 22.7556186433 135.253044616 -5.87497609026 151.0 0.0 50132 2356204
0.0203777 775.3622851371765 47 88.3591791027 23.0475673916 142.627940218 -16.762946082 151.0 0.0 50132 2406336
0.0268116 791.2670509815216 48 91.0385866577 21.9926116962 136.526814219 -24.6036446253 151.0 0.0 50132 2456468
0.0257699 807.2021043300629 49 99.9906003041 22.9885674879 149.712585927 12.6479903912 151.0 0.0 50132 2506600
0.0217925 823.1649551391602 50 104.611142617 22.0755950715 151.034175448 -26.934059419 151.0 0.0 50132 2556732
0.0261802 839.4466128349304 51 106.218069624 22.5821631552 147.519430053 0.986892782937 151.0 0.0 50132 2606864
0.0204284 855.4352321624756 52 107.626738118 25.2706140748 147.806524856 -22.2732395798 151.0 0.0 50132 2656996
0.0320903 871.473474740982 53 104.53489423 30.1239150721 166.368379604 -8.60838109098 151.0 0.0 50132 2707128
0.0100599 887.474080324173 54 105.658615772 31.2614493293 162.987306642 -32.1394791563 151.0 0.0 50132 2757260
0.0107538 903.4120011329651 55 105.896688067 30.2527260044 165.584853701 -8.81360683908 151.0 0.0 50132 2807392
0.022464 919.3731837272644 56 105.32372695 34.6778233199 167.041198655 -40.077478116 151.0 0.0 50132 2857524
0.0321536 935.292423248291 57 116.312775875 26.8671333107 172.506968886 13.7994565857 151.0 0.0 50132 2907656
0.0353605 951.2215664386749 58 122.264585082 22.8301050567 165.951528707 14.7650316181 151.0 0.0 50132 2957788
-0.0119476 967.1317303180695 59 124.32549326 26.5050807841 170.429519205 1.95642025583 151.0 0.0 50132 3007920
-0.0143712 983.0468466281891 60 120.569354417 27.7430991638 172.523001202 -3.79190992541 151.0 0.0 50132 3058052
-0.0167701 998.9787476062775 61 121.200116812 24.4472005983 160.563812104 5.74553897134 151.0 0.0 50132 3108184
0.0276545 1014.8646898269653 62 117.285467142 22.3092820979 156.194329845 -31.5740898392 151.0 0.0 50132 3158316
0.0691551 1030.7945408821106 63 119.735544482 24.3995869152 165.593272125 10.8347979966 151.0 0.0 50132 3208448
0.0366182 1046.7075304985046 64 120.829524317 28.580546142 173.049784169 -5.68561368214 151.0 0.0 50132 3258580
0.0370144 1062.6959280967712 65 112.524071897 23.8027319316 165.804496396 31.2370132772 151.0 0.0 50132 3308712
0.0410086 1078.6244604587555 66 119.44230413 25.836517742 175.575571767 38.8972999883 151.0 0.0 50132 3358844
0.0946967 1094.5336291790009 67 135.399392361 22.4698488347 193.159269423 54.4406988058 151.0 0.0 50132 3408976
-0.110919 1110.4563348293304 68 136.510087407 23.9653281527 213.377574093 37.065811337 151.0 0.0 50132 3459108
-0.0295646 1126.4224982261658 69 124.864810199 17.1312447219 175.76342357 27.0759522219 151.0 0.0 50132 3509240
0.00613683 1142.4214551448822 70 123.843163252 17.3028488197 162.86355707 44.2659596417 151.0 0.0 50132 3559372
0.0318044 1158.3676855564117 71 123.05398977 18.5697683321 163.464111825 40.0986141339 151.0 0.0 50132 3609504
-0.0080424 1174.3955261707306 72 126.986674957 15.3685943703 158.887686276 56.0884548705 151.0 0.0 50132 3659636
0.0310976 1190.3974478244781 73 123.018958227 30.5229205001 166.095622963 -46.7811467477 151.0 0.0 50132 3709768
-0.000173947 1206.3737201690674 74 128.834892594 33.0510381498 185.451717526 1.05212078254 151.0 0.0 50132 3759900
0.0503898 1222.3190937042236 75 108.135696171 30.193786233 175.333513676 5.06389558324 151.0 0.0 50132 3810032
0.0605938 1238.2503597736359 76 127.366785702 28.638840751 195.951592661 11.3634381923 151.0 0.0 50132 3860164
7.23675e-05 1254.1596319675446 77 146.785480524 28.9668171373 194.432402821 -16.7274724239 151.0 0.0 50132 3910296
0.0877231 1270.0584568977356 78 144.385623936 30.2308058821 192.418453892 -1.52514026908 151.0 0.0 50132 3960428
-0.00821127 1285.9651894569397 79 141.063429493 31.0539604671 193.28060745 6.04300520934 151.0 0.0 50132 4010560
-0.0067739 1301.8844435214996 80 144.245392853 32.0111066051 208.416420987 -17.3969057581 151.0 0.0 50132 4060692
0.0179708 1317.7896411418915 81 145.15146033 28.8281195075 199.236234651 -6.10514908489 151.0 0.0 50132 4110824
0.0915122 1333.6866042613983 82 146.28937227 29.2069902817 201.807066435 -5.158123083 151.0 0.0 50132 4160956
0.062984 1349.6002659797668 83 139.017868502 33.2192232155 208.0613458 -6.15201574544 151.0 0.0 50132 4211088
0.059793 1365.5366797447205 84 126.864133614 40.4792665584 196.544770556 -7.3524048053 151.0 0.0 50132 4261220
0.0496565 1381.4220111370087 85 128.052448353 35.77401687 194.236839139 -7.35784445802 151.0 0.0 50132 4311352
0.0478142 1397.2881262302399 86 134.963429725 26.6651546718 194.454548263 5.70977726239 151.0 0.0 50132 4361484
0.0924642 1413.1531331539154 87 136.213102083 25.9502107517 191.550277921 7.42879903451 151.0 0.0 50132 4411616
-0.00198442 1428.998836517334 88 147.565589072 24.0131481892 201.679760648 13.513128648 151.0 0.0 50132 4461748
-0.0310461 1444.8442780971527 89 156.948905202 22.6537496866 217.69771094 37.6546074469 151.0 0.0 50132 4511880
0.0500035 1460.7586226463318 90 158.064444312 26.6572565049 209.557967109 34.3832976218 151.0 0.0 50132 4562012
0.0319117 1476.6869475841522 91 164.103193677 29.3934628212 218.489585414 25.8229741521 151.0 0.0 50132 4612144
0.018292 1492.6327545642853 92 154.910139111 27.5757373039 209.881530496 14.6025504371 151.0 0.0 50132 4662276
0.0548965 1508.5741333961487 93 160.094527516 34.2672127714 220.856103055 3.59092864054 151.0 0.0 50132 4712408
-0.02815 1524.54474568367 94 169.125404351 30.3557143004 221.326089617 34.2464946659 151.0 0.0 50132 4762540
0.0105981 1540.5418820381165 95 172.517137357 24.74626262 216.631143465 3.85256406492 151.0 0.0 50132 4812672
0.034569 1556.4314408302307 96 170.725648839 24.7441750658 221.565513713 0.620424055449 151.0 0.0 50132 4862804
0.0150373 1572.3306019306183 97 170.558405329 22.5123383908 220.845101031 24.5149662247 151.0 0.0 50132 4912936
0.0321328 1588.204396724701 98 173.092370678 26.0050740862 224.217380234 21.99377081 151.0 0.0 50132 4963068
0.0411354 1604.059950351715 99 177.459250372 22.9436883216 223.817000099 78.6554298667 151.0 0.0 50132 5013200
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/31/params.json
================================================
{"animate" : false,
"env_name" : "HalfCheetah-v1",
"exp_name" : "HalfCheetah_b50000_rtg_na_25bl",
"gamma" : 0.9,
"learning_rate" : 0.025,
"logdir" : "data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/31",
"max_path_length" : 150.0,
"min_timesteps_per_batch" : 50000,
"n_iter" : 100,
"n_layers" : 1,
"nn_baseline" : true,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 31,
"size" : 32}
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/41/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.12708 21.07270121574402 0 -220.923293543 41.8852142808 -84.9146953094 -365.906994022 151.0 0.0 50132 50132
0.0828835 36.975881814956665 1 -152.368270919 39.6479378083 -22.9116375258 -283.23569561 151.0 0.0 50132 100264
0.0541419 52.87936806678772 2 -114.866907633 38.2358158574 -6.42254067536 -256.0310684 151.0 0.0 50132 150396
0.0277502 68.8259904384613 3 -94.8193382495 36.6486287629 7.19623192755 -199.112537248 151.0 0.0 50132 200528
0.0162455 84.75660634040833 4 -91.404183621 31.5728078088 -0.122650228138 -197.039697753 151.0 0.0 50132 250660
0.0239755 100.7247838973999 5 -85.8920768798 28.9281341705 5.42222038764 -170.248391172 151.0 0.0 50132 300792
0.0262909 116.67663073539734 6 -79.5540397663 31.4061724845 -9.69072756 -200.215836372 151.0 0.0 50132 350924
0.027545 132.68261218070984 7 -71.5500120659 27.4276084428 13.3135340861 -173.290089517 151.0 0.0 50132 401056
0.0249774 148.73846530914307 8 -61.4299746673 28.860293709 19.6304159498 -163.082549169 151.0 0.0 50132 451188
0.0244535 164.80112862586975 9 -52.9705518414 28.522470284 49.8972559042 -156.563903992 151.0 0.0 50132 501320
0.0227323 180.86191201210022 10 -51.8480606971 26.5960537989 16.4989694823 -152.835243066 151.0 0.0 50132 551452
0.0238927 196.9472119808197 11 -50.1130165535 26.0689582863 37.1180119778 -121.637885079 151.0 0.0 50132 601584
0.0207184 213.1076855659485 12 -46.9240699581 22.3102127087 22.8842104682 -102.635826569 151.0 0.0 50132 651716
0.019643 229.25678372383118 13 -41.825285099 21.707630837 18.0107051749 -120.668560988 151.0 0.0 50132 701848
0.0155793 245.43969988822937 14 -36.2214581322 19.107626561 21.4518346061 -91.1213900714 151.0 0.0 50132 751980
0.0154737 261.58561658859253 15 -31.1491137562 19.9187426527 37.4903697888 -91.0116023285 151.0 0.0 50132 802112
0.0117041 277.74159717559814 16 -29.3128868326 18.6327247978 20.5705600009 -95.1644777332 151.0 0.0 50132 852244
0.0145751 293.88827538490295 17 -26.3793280215 20.6436833015 29.5633176691 -98.1250890621 151.0 0.0 50132 902376
0.0164439 310.3397397994995 18 -23.853876261 18.6645127205 19.4776324886 -102.910120506 151.0 0.0 50132 952508
0.0193119 326.73000836372375 19 -21.5594629322 17.8896655289 31.4067387932 -82.5182753938 151.0 0.0 50132 1002640
0.0175859 343.23356652259827 20 -18.498051078 17.2768930112 26.4530861336 -108.945833152 151.0 0.0 50132 1052772
0.026074 359.85977363586426 21 -16.6607530155 16.1084689108 44.595963184 -74.5210111382 151.0 0.0 50132 1102904
0.0221995 376.271719455719 22 -12.1129743153 16.1414910866 40.9557563369 -69.1437662123 151.0 0.0 50132 1153036
0.018561 392.67700958251953 23 -8.28632810328 15.9856287639 29.7609416178 -84.2809059242 151.0 0.0 50132 1203168
0.0140461 409.49606704711914 24 -4.71828370599 17.7622847842 56.5155861545 -79.4748900298 151.0 0.0 50132 1253300
0.0139689 425.8870892524719 25 -0.160263681851 16.996391833 43.9362404463 -64.0432616135 151.0 0.0 50132 1303432
0.0211519 442.1293981075287 26 3.39413040279 17.0561480412 48.7308724988 -57.7461385642 151.0 0.0 50132 1353564
0.018658 458.2996304035187 27 10.1604726403 16.8217040873 59.9784910839 -49.9519879026 151.0 0.0 50132 1403696
0.0138661 474.45201563835144 28 11.70998111 15.5889152149 58.0373833058 -35.9967093066 151.0 0.0 50132 1453828
0.015394 490.58042645454407 29 12.6324075463 15.3203529395 55.5502179713 -39.2071382881 151.0 0.0 50132 1503960
0.0230869 506.73017859458923 30 13.264358341 13.9385575392 68.7895095886 -30.1707473872 151.0 0.0 50132 1554092
0.0315863 522.8333723545074 31 19.1102750828 16.5549311779 66.3593414584 -65.2065182421 151.0 0.0 50132 1604224
0.00591601 538.9353394508362 32 29.6801084263 19.9664266463 75.302475314 -31.1851161586 151.0 0.0 50132 1654356
0.00235279 555.0165634155273 33 34.9358966389 24.1271391133 83.3020348805 -52.312587542 151.0 0.0 50132 1704488
0.0128948 571.0490593910217 34 40.3295941214 25.405476514 97.4659655494 -38.1299446801 151.0 0.0 50132 1754620
0.01702 587.1206603050232 35 45.4784098084 27.4554448471 102.602926817 -52.759024737 151.0 0.0 50132 1804752
0.00378656 603.1493413448334 36 49.8221469528 25.2981552597 98.8910932274 -38.8901626401 151.0 0.0 50132 1854884
0.0171225 619.1681020259857 37 52.6877079053 24.5837952235 104.609912348 -33.7148520043 151.0 0.0 50132 1905016
0.010039 635.2099323272705 38 55.3098856299 25.3984565006 97.458672425 -65.2413630983 151.0 0.0 50132 1955148
0.00850409 651.2657423019409 39 60.3025958911 26.6330333061 112.973069882 -38.7426859855 151.0 0.0 50132 2005280
0.0193669 667.2855832576752 40 66.8110061639 28.1214823154 126.114862883 -48.5702787866 151.0 0.0 50132 2055412
0.00486695 683.2934782505035 41 81.0035491191 26.9163143154 139.81491947 -27.0794689436 151.0 0.0 50132 2105544
0.00273382 699.2686598300934 42 85.7973019526 25.2992905599 135.911682186 -22.1127843511 151.0 0.0 50132 2155676
0.0102085 715.3002729415894 43 88.6107577173 26.5906177042 139.137772247 -19.5933510368 151.0 0.0 50132 2205808
0.00499843 731.3660926818848 44 87.1463180729 29.5942594498 137.851244223 -6.74959245858 151.0 0.0 50132 2255940
0.0277139 747.3739247322083 45 96.4381236548 25.3673564432 152.283941822 -25.3621564773 151.0 0.0 50132 2306072
0.0105407 763.3241529464722 46 100.186901199 24.5900919696 147.788789965 -16.9339933999 151.0 0.0 50132 2356204
0.0196431 779.3152277469635 47 101.332158321 19.2262274975 146.800582961 -23.2174938978 151.0 0.0 50132 2406336
0.00635867 795.3148963451385 48 106.501908075 25.2635106471 156.469493534 -20.7997518503 151.0 0.0 50132 2456468
0.0186458 811.2650392055511 49 107.974479572 25.478450392 153.289817991 -48.7780446696 151.0 0.0 50132 2506600
0.0111561 827.2407681941986 50 112.743347089 27.0198518045 162.030034891 -4.59662986033 151.0 0.0 50132 2556732
0.0205485 843.2783930301666 51 113.635470199 26.2936585283 163.802731554 11.1848641882 151.0 0.0 50132 2606864
0.00409291 859.2426459789276 52 115.34929309 29.343268563 170.435470171 -14.1801635789 151.0 0.0 50132 2656996
0.0106318 875.241016626358 53 116.195418488 31.1474951786 170.72265754 -2.41219586455 151.0 0.0 50132 2707128
0.0295054 891.2339463233948 54 116.788771949 26.0913063734 166.742123953 -4.31214301717 151.0 0.0 50132 2757260
0.0388965 907.2292096614838 55 124.827516243 22.8057192384 163.644975689 22.332689543 151.0 0.0 50132 2807392
0.0466941 923.2126085758209 56 134.472299284 22.9366132876 187.301125596 -23.6476558021 151.0 0.0 50132 2857524
0.0175752 939.1847531795502 57 130.700796534 31.5085679302 184.161942597 -15.3740634266 151.0 0.0 50132 2907656
0.0518835 955.1277551651001 58 132.321929808 34.3173247971 196.362167654 2.769473853 151.0 0.0 50132 2957788
0.0253744 971.2207598686218 59 137.906631678 30.1028870468 195.470557578 -0.326238371907 151.0 0.0 50132 3007920
0.0113437 987.234326839447 60 129.550997372 31.2339428421 179.166674367 0.819867278957 151.0 0.0 50132 3058052
0.0529975 1003.226350069046 61 124.037113408 25.8421159375 178.068351646 -9.49730122666 151.0 0.0 50132 3108184
0.00839074 1019.136340379715 62 142.346288249 28.6768034112 203.388516326 -11.4114011496 151.0 0.0 50132 3158316
0.0129732 1035.1205027103424 63 153.747901709 28.4112073031 203.734756138 -15.0993184067 151.0 0.0 50132 3208448
0.0162256 1051.0646121501923 64 144.399488384 23.7115014096 199.629631592 30.7307788076 151.0 0.0 50132 3258580
0.0649607 1067.0842497348785 65 133.729870392 21.7004689094 183.116755547 53.1446101439 151.0 0.0 50132 3308712
0.0375514 1083.0518975257874 66 148.576177426 17.0274998644 193.10616678 84.30155934 151.0 0.0 50132 3358844
0.0200252 1099.0055470466614 67 158.469828362 18.2338566601 207.738006256 43.6628336487 151.0 0.0 50132 3408976
-0.00331602 1114.9424951076508 68 158.264754595 21.2759091457 207.498901777 30.033308789 151.0 0.0 50132 3459108
0.00842844 1130.846292257309 69 151.0246263 24.2069982233 202.952190252 46.2487258606 151.0 0.0 50132 3509240
0.0422203 1146.7755208015442 70 154.319072567 26.4719532924 210.722339317 10.8817028456 151.0 0.0 50132 3559372
-0.0224658 1162.715653181076 71 160.935713817 27.6417918042 216.054729911 7.33029723716 151.0 0.0 50132 3609504
-0.00407064 1178.625952720642 72 157.529412134 26.4469426963 209.245472196 43.3499755084 151.0 0.0 50132 3659636
0.0312973 1194.5514740943909 73 152.999458578 33.1020789767 227.212425053 -8.93640639293 151.0 0.0 50132 3709768
-0.0102214 1210.4479343891144 74 156.461508201 25.5975478631 213.830874542 30.3287995339 151.0 0.0 50132 3759900
0.0268055 1226.378385066986 75 149.509954537 26.2837276067 211.832369072 -9.17958865547 151.0 0.0 50132 3810032
-0.0407771 1242.2990992069244 76 152.426630167 24.7909814718 208.919701534 10.0289425916 151.0 0.0 50132 3860164
0.311808 1258.2240889072418 77 127.806972193 19.7946826021 178.881778739 26.7269463047 151.0 0.0 50132 3910296
0.0462513 1274.0994503498077 78 153.88212704 20.0611854975 195.76003036 83.1346760376 151.0 0.0 50132 3960428
-0.0130791 1289.971929550171 79 155.123372645 23.9776277466 209.070200201 52.6926327132 151.0 0.0 50132 4010560
-0.0223044 1305.9302129745483 80 153.535803604 29.2030884149 205.026440956 -7.29082282524 151.0 0.0 50132 4060692
0.0300717 1321.8447451591492 81 138.520845632 38.9659452841 207.954292892 15.2333249848 151.0 0.0 50132 4110824
0.116867 1337.782033920288 82 146.044282625 33.6667796301 206.538708981 14.9616932891 151.0 0.0 50132 4160956
0.0752258 1353.6613144874573 83 152.455908379 27.7847639984 211.808457349 45.1002907526 151.0 0.0 50132 4211088
0.021047 1369.5640931129456 84 149.818281883 23.5539854646 212.543887418 31.1640895757 151.0 0.0 50132 4261220
-0.00357469 1385.4445531368256 85 154.807549201 23.7587891107 204.479418675 19.446904557 151.0 0.0 50132 4311352
-0.0831478 1401.3308084011078 86 165.073308438 25.2280173293 218.173918542 52.9616057052 151.0 0.0 50132 4361484
-0.0178012 1417.2557497024536 87 163.589466245 28.1610647173 227.463465479 34.3157710895 151.0 0.0 50132 4411616
0.224079 1433.194581747055 88 150.260310893 24.2015421728 209.349731243 55.8937025004 151.0 0.0 50132 4461748
0.0275891 1449.001769065857 89 124.070375088 23.3194163902 172.906351653 26.7539427537 151.0 0.0 50132 4511880
0.078205 1464.8036906719208 90 116.821878427 32.7447352801 170.800877753 -0.687701316085 151.0 0.0 50132 4562012
0.0712893 1480.580598115921 91 135.932953929 15.9698102843 185.040708545 85.733918891 151.0 0.0 50132 4612144
-0.271885 1496.284797668457 92 151.015589544 15.6587658302 190.302083375 102.404421608 151.0 0.0 50132 4662276
-0.295132 1512.0198142528534 93 149.186562335 16.7787220378 201.86569471 102.520195047 151.0 0.0 50132 4712408
-0.140751 1527.711982011795 94 148.141625711 23.4508556444 191.278971452 -31.6761775448 151.0 0.0 50132 4762540
0.0352029 1543.3818192481995 95 159.413002958 22.951530397 214.428208655 28.0422936655 151.0 0.0 50132 4812672
0.177194 1559.0737857818604 96 160.453372131 23.6332118447 213.81337905 48.6882801387 151.0 0.0 50132 4862804
0.105576 1574.8353633880615 97 166.211329149 27.5100657993 224.660564422 38.5172514877 151.0 0.0 50132 4912936
-0.0260659 1590.5823757648468 98 159.964576546 29.481163381 229.273469784 -0.00698520156626 151.0 0.0 50132 4963068
-0.0762075 1606.3994948863983 99 152.15949126 25.9411095777 212.084252709 56.2887050775 151.0 0.0 50132 5013200
================================================
FILE: hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/41/params.json
================================================
{"animate" : false,
"env_name" : "HalfCheetah-v1",
"exp_name" : "HalfCheetah_b50000_rtg_na_25bl",
"gamma" : 0.9,
"learning_rate" : 0.025,
"logdir" : "data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/41",
"max_path_length" : 150.0,
"min_timesteps_per_batch" : 50000,
"n_iter" : 100,
"n_layers" : 1,
"nn_baseline" : true,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 41,
"size" : 32}
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/1/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.602091 6.76475715637207 0 9.03571428571 5.57085619771 32.0 3.0 9.03571428571 5.57085619771 1012 1012
0.0882509 7.067959547042847 1 19.0555555556 9.58860563506 55.0 6.0 19.0555555556 9.58860563506 1029 2041
0.00134369 7.353352069854736 2 27.3513513514 11.7501379318 70.0 13.0 27.3513513514 11.7501379318 1012 3053
0.00994612 7.644820690155029 3 26.7631578947 13.8268126451 58.0 8.0 26.7631578947 13.8268126451 1017 4070
0.0226749 7.9196436405181885 4 28.25 13.0987170703 58.0 12.0 28.25 13.0987170703 1017 5087
0.016652 8.210463523864746 5 40.8076923077 24.5717461744 105.0 12.0 40.8076923077 24.5717461744 1061 6148
0.0273083 8.482748031616211 6 32.875 18.3042857003 81.0 12.0 32.875 18.3042857003 1052 7200
0.0178182 8.766393184661865 7 58.3333333333 43.4281015012 195.0 17.0 58.3333333333 43.4281015012 1050 8250
0.0218356 9.023099660873413 8 67.5333333333 52.6128839312 248.0 16.0 67.5333333333 52.6128839312 1013 9263
0.00636999 9.28016448020935 9 91.3636363636 51.793774156 203.0 39.0 91.3636363636 51.793774156 1005 10268
0.00664996 9.550451517105103 10 88.1666666667 27.8652511602 157.0 49.0 88.1666666667 27.8652511602 1058 11326
0.0220992 9.80831265449524 11 100.2 22.8814335215 134.0 62.0 100.2 22.8814335215 1002 12328
0.0100588 10.088838338851929 12 98.5454545455 18.941842266 137.0 70.0 98.5454545455 18.941842266 1084 13412
0.000466488 10.362831354141235 13 80.8461538462 11.4679899241 97.0 62.0 80.8461538462 11.4679899241 1051 14463
0.0059433 10.631492137908936 14 89.75 17.5647611237 120.0 62.0 89.75 17.5647611237 1077 15540
0.000191879 10.909849882125854 15 121.333333333 54.5832697201 244.0 32.0 121.333333333 54.5832697201 1092 16632
0.0298251 11.174779176712036 16 105.2 27.9134376242 151.0 72.0 105.2 27.9134376242 1052 17684
0.0120746 11.471116542816162 17 106.090909091 50.1949916833 208.0 30.0 106.090909091 50.1949916833 1167 18851
-0.00228642 11.737281322479248 18 68.3333333333 46.1557748885 192.0 20.0 68.3333333333 46.1557748885 1025 19876
0.0210015 12.016618490219116 19 48.5 29.9161707564 144.0 13.0 48.5 29.9161707564 1067 20943
0.0249124 12.29704737663269 20 72.1333333333 60.1452316388 203.0 12.0 72.1333333333 60.1452316388 1082 22025
0.0107623 12.561517477035522 21 114.0 49.9510871865 219.0 61.0 114.0 49.9510871865 1026 23051
0.0379473 12.825831413269043 22 149.428571429 49.1204267747 219.0 61.0 149.428571429 49.1204267747 1046 24097
0.0146601 13.269243240356445 23 857.0 143.0 1000.0 714.0 857.0 143.0 1714 25811
0.00387304 13.550962209701538 24 520.5 479.5 1000.0 41.0 520.5 479.5 1041 26852
-0.00445846 13.936067581176758 25 748.0 202.0 950.0 546.0 748.0 202.0 1496 28348
-0.00309289 14.287321329116821 26 343.5 259.08927033 720.0 47.0 343.5 259.08927033 1374 29722
0.00437352 14.56696891784668 27 120.777777778 30.89688146 167.0 73.0 120.777777778 30.89688146 1087 30809
0.013809 14.830055713653564 28 151.142857143 98.735474261 332.0 40.0 151.142857143 98.735474261 1058 31867
0.00959165 15.085086584091187 29 144.857142857 35.6708307061 189.0 98.0 144.857142857 35.6708307061 1014 32881
0.00861404 15.364528179168701 30 268.75 144.210566534 402.0 52.0 268.75 144.210566534 1075 33956
0.00630407 15.767969608306885 31 522.333333333 148.755578794 631.0 312.0 522.333333333 148.755578794 1567 35523
-0.00306702 16.293739795684814 32 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 37523
0.000587873 16.816981077194214 33 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 39523
0.0119693 17.338316917419434 34 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 41523
-0.00308531 17.867908716201782 35 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 43523
0.00772547 18.39204430580139 36 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 45523
0.00597157 18.91360378265381 37 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 47523
-0.00474364 19.187209367752075 38 348.333333333 165.387088036 571.0 175.0 348.333333333 165.387088036 1045 48568
0.00159111 19.456584692001343 39 513.5 132.5 646.0 381.0 513.5 132.5 1027 49595
0.000936993 19.9298152923584 40 307.0 314.23451964 903.0 26.0 307.0 314.23451964 1842 51437
0.0111146 20.18559980392456 41 145.428571429 103.453035781 272.0 5.0 145.428571429 103.453035781 1018 52455
0.00332006 20.538946390151978 42 442.333333333 357.172538449 904.0 34.0 442.333333333 357.172538449 1327 53782
0.00513766 20.93329095840454 43 505.0 355.404933374 1000.0 182.0 505.0 355.404933374 1515 55297
0.00254112 21.45427918434143 44 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 57297
0.00570995 21.979018688201904 45 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 59297
-0.00294264 22.50430154800415 46 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 61297
-0.000595535 23.031405925750732 47 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 63297
0.000307902 23.549055337905884 48 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 65297
0.002502 24.067930698394775 49 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 67297
0.00701737 24.5829815864563 50 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 69297
-7.87489e-05 25.10943102836609 51 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 71297
0.00197969 25.636685609817505 52 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 73297
0.00185306 26.166483879089355 53 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 75297
0.00235633 26.693531274795532 54 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 77297
0.003282 27.220266580581665 55 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 79297
0.00744414 27.7472984790802 56 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 81297
0.00106317 28.268240928649902 57 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 83297
0.00464073 28.79481077194214 58 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 85297
0.000234872 29.32617974281311 59 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 87297
-0.00017265 29.84268832206726 60 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 89297
-0.000419483 30.367334127426147 61 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 91297
-0.000131996 30.885836601257324 62 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 93297
0.00541081 31.39881730079651 63 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 95297
0.0027751 31.913252353668213 64 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 97297
0.00705485 32.43810844421387 65 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 99297
0.00756834 32.84289884567261 66 388.5 205.808770464 631.0 184.0 388.5 205.808770464 1554 100851
0.0282454 33.221885204315186 67 734.0 266.0 1000.0 468.0 734.0 266.0 1468 102319
0.00228373 33.74599099159241 68 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 104319
0.00151597 34.28067183494568 69 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 106319
0.00324522 34.82058072090149 70 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 108319
0.000135124 35.35584092140198 71 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 110319
0.00073885 35.906853914260864 72 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 112319
0.000151843 36.456477880477905 73 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 114319
0.00253202 37.01272749900818 74 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 116319
0.00139823 37.56826639175415 75 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 118319
0.00708318 38.12512946128845 76 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 120319
0.000988686 38.69096875190735 77 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 122319
0.00213949 39.30945086479187 78 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 124319
-0.000421284 39.91579341888428 79 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 126319
0.000149263 40.31728267669678 80 756.0 244.0 1000.0 512.0 756.0 244.0 1512 127831
-0.000102263 40.83227825164795 81 545.666666667 336.715837993 1000.0 195.0 545.666666667 336.715837993 1637 129468
0.00492216 41.186660289764404 82 436.0 399.462972836 1000.0 126.0 436.0 399.462972836 1308 130776
0.00174772 41.59078574180603 83 771.0 229.0 1000.0 542.0 771.0 229.0 1542 132318
0.000779479 42.13102340698242 84 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 134318
0.00434309 42.662580490112305 85 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 136318
0.00265961 43.18075108528137 86 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 138318
0.00101361 43.70223069190979 87 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 140318
0.00352905 44.22879147529602 88 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 142318
0.0036558 44.75788187980652 89 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 144318
0.00685947 45.286428451538086 90 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 146318
0.00302239 45.807966232299805 91 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 148318
0.0148254 46.31892967224121 92 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 150318
-0.002683 46.83810997009277 93 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 152318
0.00536365 47.38642716407776 94 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 154318
-0.00564025 47.920905351638794 95 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 156318
4.73112e-05 48.40345025062561 96 623.0 267.156633207 1000.0 413.0 623.0 267.156633207 1869 158187
0.0013827 48.83704614639282 97 833.0 167.0 1000.0 666.0 833.0 167.0 1666 159853
0.00276896 49.29080605506897 98 591.666666667 313.796466236 1000.0 237.0 591.666666667 313.796466236 1775 161628
-0.000452832 49.81399321556091 99 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 163628
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/1/params.json
================================================
{"animate" : false,
"env_name" : "InvertedPendulum-v1",
"exp_name" : "InvertedPendulum_sb_rtg_na_0.02",
"gamma" : 1.0,
"learning_rate" : 0.02,
"logdir" : "data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/1",
"max_path_length" : null,
"min_timesteps_per_batch" : 1000,
"n_iter" : 100,
"n_layers" : 2,
"nn_baseline" : false,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 1,
"size" : 32}
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/11/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
9.84485 7.2229578495025635 0 8.41666666667 2.47515431618 20.0 5.0 8.41666666667 2.47515431618 1010 1010
1.50212 7.51533055305481 1 20.22 3.79626131872 30.0 15.0 20.22 3.79626131872 1011 2021
1.25737 7.7772440910339355 2 57.0555555556 13.8342257633 98.0 37.0 57.0555555556 13.8342257633 1027 3048
0.535736 8.046581268310547 3 180.5 94.2368470044 376.0 96.0 180.5 94.2368470044 1083 4131
0.400931 8.321382999420166 4 83.2307692308 30.3724415729 161.0 53.0 83.2307692308 30.3724415729 1082 5213
-0.0160533 8.57603931427002 5 50.6 13.7164135254 85.0 25.0 50.6 13.7164135254 1012 6225
0.041017 8.83631682395935 6 38.6923076923 14.5675330016 68.0 13.0 38.6923076923 14.5675330016 1006 7231
0.0517083 9.095884084701538 7 40.8 10.4957134107 59.0 15.0 40.8 10.4957134107 1020 8251
0.0212874 9.370235204696655 8 45.5909090909 10.5729461402 82.0 32.0 45.5909090909 10.5729461402 1003 9254
0.0239413 9.634948968887329 9 44.3043478261 8.91230113558 64.0 31.0 44.3043478261 8.91230113558 1019 10273
0.2061 9.910567045211792 10 51.9 15.9715371834 102.0 32.0 51.9 15.9715371834 1038 11311
0.492431 10.165862798690796 11 53.2631578947 13.6337124931 80.0 37.0 53.2631578947 13.6337124931 1012 12323
0.0759064 10.429330110549927 12 69.4666666667 20.9089029416 136.0 45.0 69.4666666667 20.9089029416 1042 13365
-0.0115777 10.688966989517212 13 103.9 27.9086008248 159.0 68.0 103.9 27.9086008248 1039 14404
0.10276 10.948424816131592 14 116.111111111 27.5012906543 172.0 74.0 116.111111111 27.5012906543 1045 15449
0.107211 11.213691711425781 15 207.2 35.9410628669 261.0 165.0 207.2 35.9410628669 1036 16485
0.254836 11.547999382019043 16 342.5 94.7800084406 455.0 238.0 342.5 94.7800084406 1370 17855
0.180811 11.81119704246521 17 257.0 19.7610728454 287.0 232.0 257.0 19.7610728454 1028 18883
0.0363615 12.093258142471313 18 155.857142857 45.1772021249 242.0 113.0 155.857142857 45.1772021249 1091 19974
0.0222054 12.344907283782959 19 101.2 17.9710878914 139.0 68.0 101.2 17.9710878914 1012 20986
0.00786898 12.595535516738892 20 116.222222222 31.12261692 165.0 66.0 116.222222222 31.12261692 1046 22032
-0.00338071 12.849928379058838 21 129.0 35.2526594741 181.0 88.0 129.0 35.2526594741 1032 23064
0.0334179 13.097781896591187 22 170.333333333 18.3636113611 198.0 145.0 170.333333333 18.3636113611 1022 24086
0.0154595 13.49902081489563 23 326.4 166.928248059 639.0 185.0 326.4 166.928248059 1632 25718
0.0102083 13.915255784988403 24 550.666666667 289.196972475 877.0 174.0 550.666666667 289.196972475 1652 27370
0.00266306 14.426665544509888 25 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 29370
0.00419491 14.956173658370972 26 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 31370
0.000947851 15.548860311508179 27 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 33370
0.0491228 16.1032931804657 28 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 35370
0.000598127 16.35001277923584 29 201.4 17.9398996653 236.0 185.0 201.4 17.9398996653 1007 36377
0.205319 16.642978191375732 30 193.666666667 22.6175939382 236.0 159.0 193.666666667 22.6175939382 1162 37539
0.0578096 17.157249212265015 31 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 39539
-0.00536724 17.665299654006958 32 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 41539
0.00977675 18.185539722442627 33 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 43539
-0.0024191 18.71018099784851 34 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 45539
0.000597108 19.224491596221924 35 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 47539
-0.000200507 19.740996599197388 36 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 49539
0.00456004 20.31839609146118 37 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 51539
0.00168774 20.86813521385193 38 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 53539
0.0102562 21.422646522521973 39 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 55539
-0.000356285 21.972156763076782 40 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 57539
0.00434321 22.51257061958313 41 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 59539
0.00314676 23.06043553352356 42 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 61539
0.0119144 23.60994815826416 43 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 63539
0.00132382 24.147279977798462 44 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 65539
-0.00246824 24.692411184310913 45 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 67539
0.00578189 25.244147539138794 46 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 69539
0.00142292 25.791747093200684 47 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 71539
0.00720477 26.34340000152588 48 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 73539
-0.000640061 26.88925313949585 49 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 75539
-0.000631195 27.199026823043823 50 395.0 73.9729680356 491.0 311.0 395.0 73.9729680356 1185 76724
0.00845176 27.46857786178589 51 352.666666667 46.6357040141 414.0 301.0 352.666666667 46.6357040141 1058 77782
0.0327231 27.9066219329834 52 561.0 164.17673404 775.0 376.0 561.0 164.17673404 1683 79465
0.0178538 28.435946941375732 53 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 81465
-0.00175977 29.014880657196045 54 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 83465
-0.00183104 29.539748668670654 55 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 85465
0.0011446 30.06399655342102 56 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 87465
0.00530291 30.591025352478027 57 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 89465
0.00815219 31.11577296257019 58 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 91465
0.00294679 31.638994693756104 59 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 93465
0.00234366 32.16615128517151 60 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 95465
0.00537469 32.686542987823486 61 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 97465
0.00254938 33.2045464515686 62 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 99465
0.00851973 33.71687150001526 63 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 101465
-0.00166347 34.24772596359253 64 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 103465
0.00554468 34.76952028274536 65 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 105465
-0.00221764 35.286675453186035 66 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 107465
-0.000778406 35.80536460876465 67 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 109465
0.00339717 36.20728349685669 68 786.0 214.0 1000.0 572.0 786.0 214.0 1572 111037
0.0537435 36.46434426307678 69 144.857142857 64.0143478815 246.0 72.0 144.857142857 64.0143478815 1014 112051
0.0154534 36.77367043495178 70 608.0 231.0 839.0 377.0 608.0 231.0 1216 113267
0.00536814 37.2880973815918 71 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 115267
0.000375593 37.80867886543274 72 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 117267
0.00246562 38.328699827194214 73 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 119267
0.0235415 38.859750509262085 74 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 121267
0.00377482 39.377413511276245 75 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 123267
-0.00155859 39.93961238861084 76 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 125267
-0.000933135 40.46783971786499 77 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 127267
0.00555035 40.987693071365356 78 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 129267
0.0253226 41.50291180610657 79 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 131267
0.0373722 42.02027869224548 80 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 133267
-0.00281248 42.53746962547302 81 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 135267
0.0538512 43.04693007469177 82 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 137267
-0.0150499 43.56194829940796 83 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 139267
0.022479 44.06363844871521 84 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 141267
0.0127346 44.571359395980835 85 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 143267
0.0459498 45.08292746543884 86 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 145267
-0.0156565 45.60513257980347 87 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 147267
0.0348157 46.12567448616028 88 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 149267
-0.0149153 46.52139449119568 89 768.5 231.5 1000.0 537.0 768.5 231.5 1537 150804
0.0471378 46.77190947532654 90 169.5 90.4778978536 350.0 97.0 169.5 90.4778978536 1017 151821
0.00488421 47.03598880767822 91 74.7857142857 34.4926049454 131.0 34.0 74.7857142857 34.4926049454 1047 152868
0.00946848 47.2924861907959 92 86.25 31.5888297768 145.0 40.0 86.25 31.5888297768 1035 153903
0.0110376 47.55975818634033 93 82.0769230769 25.0920199955 129.0 41.0 82.0769230769 25.0920199955 1067 154970
0.0196664 47.811049461364746 94 114.0 49.8352842427 206.0 41.0 114.0 49.8352842427 1026 155996
0.0104369 48.06812262535095 95 147.428571429 21.2861936667 177.0 112.0 147.428571429 21.2861936667 1032 157028
0.0215529 48.36423707008362 96 242.2 23.7267781209 276.0 203.0 242.2 23.7267781209 1211 158239
-0.00103157 48.64035892486572 97 262.75 42.3755530937 332.0 218.0 262.75 42.3755530937 1051 159290
0.00117358 49.18579912185669 98 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 161290
0.00522744 49.70541977882385 99 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 163290
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/11/params.json
================================================
{"animate" : false,
"env_name" : "InvertedPendulum-v1",
"exp_name" : "InvertedPendulum_sb_rtg_na_0.02",
"gamma" : 1.0,
"learning_rate" : 0.02,
"logdir" : "data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/11",
"max_path_length" : null,
"min_timesteps_per_batch" : 1000,
"n_iter" : 100,
"n_layers" : 2,
"nn_baseline" : false,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 11,
"size" : 32}
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/21/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.350545 7.623704195022583 0 8.73043478261 5.11149044737 30.0 3.0 8.73043478261 5.11149044737 1004 1004
0.152718 7.933123350143433 1 19.0740740741 11.3379922644 55.0 4.0 19.0740740741 11.3379922644 1030 2034
0.0429787 8.199016094207764 2 37.6296296296 23.3906409713 129.0 10.0 37.6296296296 23.3906409713 1016 3050
0.0148822 8.450114250183105 3 37.2962962963 21.1831778381 80.0 6.0 37.2962962963 21.1831778381 1007 4057
0.0104328 8.721006631851196 4 28.2162162162 20.3962571224 121.0 9.0 28.2162162162 20.3962571224 1044 5101
0.018495 8.987627506256104 5 29.5588235294 16.2339353651 68.0 8.0 29.5588235294 16.2339353651 1005 6106
0.00639337 9.252012729644775 6 30.6060606061 23.7179632652 106.0 8.0 30.6060606061 23.7179632652 1010 7116
0.0222873 9.539709329605103 7 39.7777777778 21.0631033086 90.0 12.0 39.7777777778 21.0631033086 1074 8190
0.0089298 9.825225114822388 8 47.1818181818 43.3481008954 206.0 9.0 47.1818181818 43.3481008954 1038 9228
0.0281346 10.153348445892334 9 53.35 19.5378478856 82.0 11.0 53.35 19.5378478856 1067 10295
-0.0016746 10.516669750213623 10 77.3571428571 12.2801066937 97.0 57.0 77.3571428571 12.2801066937 1083 11378
0.01356 10.803219079971313 11 86.0 16.5025250593 108.0 42.0 86.0 16.5025250593 1032 12410
0.0040417 11.082040071487427 12 72.5 19.452322667 88.0 7.0 72.5 19.452322667 1015 13425
0.0136989 11.35108470916748 13 77.2857142857 6.45233702322 88.0 68.0 77.2857142857 6.45233702322 1082 14507
0.0160684 11.614316701889038 14 74.1428571429 5.42292939889 82.0 66.0 74.1428571429 5.42292939889 1038 15545
0.0569116 11.87187647819519 15 86.75 9.9676560267 99.0 67.0 86.75 9.9676560267 1041 16586
0.0433088 12.153593301773071 16 89.3333333333 46.7552730241 137.0 6.0 89.3333333333 46.7552730241 1072 17658
0.0112266 12.42675232887268 17 68.1875 30.6251913259 100.0 9.0 68.1875 30.6251913259 1091 18749
0.0558085 12.747072696685791 18 79.5333333333 70.9684123787 253.0 10.0 79.5333333333 70.9684123787 1193 19942
0.007598 13.030377864837646 19 167.857142857 16.0394920781 190.0 140.0 167.857142857 16.0394920781 1175 21117
-0.00639769 13.296810626983643 20 134.125 10.6587698634 155.0 121.0 134.125 10.6587698634 1073 22190
0.003167 13.558030605316162 21 117.666666667 6.41179468722 127.0 107.0 117.666666667 6.41179468722 1059 23249
0.00376649 13.822790145874023 22 118.555555556 9.2269064409 138.0 104.0 118.555555556 9.2269064409 1067 24316
0.0175271 14.091765403747559 23 116.111111111 6.11817773242 129.0 109.0 116.111111111 6.11817773242 1045 25361
-0.000405248 14.353772640228271 24 104.6 4.82078831728 112.0 98.0 104.6 4.82078831728 1046 26407
-0.000334972 14.619886875152588 25 97.3636363636 4.55816404972 105.0 89.0 97.3636363636 4.55816404972 1071 27478
0.00266277 14.882812738418579 26 95.5454545455 5.03377025451 105.0 89.0 95.5454545455 5.03377025451 1051 28529
0.0051884 15.143433570861816 27 98.1818181818 3.29788311768 103.0 89.0 98.1818181818 3.29788311768 1080 29609
0.00461172 15.427077531814575 28 102.5 5.76628129734 110.0 90.0 102.5 5.76628129734 1025 30634
0.0143375 15.703834772109985 29 108.9 8.75728268357 125.0 95.0 108.9 8.75728268357 1089 31723
0.0164663 16.047930002212524 30 123.111111111 10.0933910659 142.0 113.0 123.111111111 10.0933910659 1108 32831
0.0734041 16.371893882751465 31 160.142857143 25.4301765144 216.0 131.0 160.142857143 25.4301765144 1121 33952
0.0209195 16.675029754638672 32 315.75 40.8495716012 360.0 249.0 315.75 40.8495716012 1263 35215
0.0045359 16.970834493637085 33 301.0 33.6229088569 351.0 266.0 301.0 33.6229088569 1204 36419
-0.0162555 17.29827642440796 34 199.666666667 28.2823069938 229.0 140.0 199.666666667 28.2823069938 1198 37617
0.00934858 17.58111262321472 35 127.111111111 64.2986857382 193.0 24.0 127.111111111 64.2986857382 1144 38761
0.00314428 17.835707187652588 36 176.666666667 12.3782964184 198.0 164.0 176.666666667 12.3782964184 1060 39821
0.011018 18.11056923866272 37 157.857142857 23.4668676553 177.0 103.0 157.857142857 23.4668676553 1105 40926
0.00261322 18.374802827835083 38 152.714285714 33.779597755 190.0 88.0 152.714285714 33.779597755 1069 41995
0.00184643 18.63509750366211 39 175.166666667 19.8948625418 200.0 140.0 175.166666667 19.8948625418 1051 43046
0.00787114 18.91007685661316 40 110.0 55.0036362434 197.0 36.0 110.0 55.0036362434 1100 44146
0.0023341 19.169974327087402 41 147.571428571 38.1981942088 186.0 60.0 147.571428571 38.1981942088 1033 45179
-0.000996647 19.444185972213745 42 183.5 10.6105293616 202.0 172.0 183.5 10.6105293616 1101 46280
0.0361872 19.763829469680786 43 196.166666667 5.98377435701 207.0 188.0 196.166666667 5.98377435701 1177 47457
-0.00198973 20.01742458343506 44 263.0 6.204836823 273.0 256.0 263.0 6.204836823 1052 48509
0.0185856 20.284472942352295 45 364.0 24.1246761636 388.0 331.0 364.0 24.1246761636 1092 49601
0.00496694 20.546913623809814 46 115.444444444 91.4975544435 317.0 39.0 115.444444444 91.4975544435 1039 50640
0.0330575 20.873517274856567 47 169.857142857 99.9220104044 272.0 10.0 169.857142857 99.9220104044 1189 51829
0.0344604 21.179991006851196 48 167.428571429 70.6275080826 257.0 30.0 167.428571429 70.6275080826 1172 53001
0.0205699 21.45786952972412 49 175.833333333 15.464116169 206.0 155.0 175.833333333 15.464116169 1055 54056
-0.00851114 21.756019592285156 50 188.333333333 13.1613913483 205.0 171.0 188.333333333 13.1613913483 1130 55186
0.00929796 22.021973848342896 51 203.2 23.0599219426 234.0 175.0 203.2 23.0599219426 1016 56202
0.00810973 22.326077699661255 52 290.0 33.3016516107 335.0 243.0 290.0 33.3016516107 1160 57362
0.0143014 22.624312162399292 53 378.333333333 61.4780900448 465.0 329.0 378.333333333 61.4780900448 1135 58497
0.0230448 22.979187488555908 54 665.5 93.5 759.0 572.0 665.5 93.5 1331 59828
0.0323814 23.37579083442688 55 506.333333333 98.4084458887 605.0 372.0 506.333333333 98.4084458887 1519 61347
0.00398331 23.916831016540527 56 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 63347
-0.00126615 24.45587158203125 57 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 65347
0.00829495 24.990619659423828 58 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 67347
0.00470978 25.51140570640564 59 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 69347
0.00532265 26.039437294006348 60 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 71347
0.00514702 26.562456130981445 61 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 73347
0.00158479 27.10531449317932 62 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 75347
0.014562 27.638699293136597 63 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 77347
0.00113166 28.16949987411499 64 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 79347
0.00471543 28.56187129020691 65 757.0 243.0 1000.0 514.0 757.0 243.0 1514 80861
0.0417658 28.829689025878906 66 93.6363636364 115.90613187 439.0 9.0 93.6363636364 115.90613187 1030 81891
0.0360292 29.110421895980835 67 185.333333333 156.388050126 401.0 14.0 185.333333333 156.388050126 1112 83003
0.00307429 29.627554655075073 68 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 85003
-0.00208698 30.15225338935852 69 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 87003
0.000237605 30.684929609298706 70 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 89003
-0.00365509 31.202202558517456 71 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 91003
0.00270619 31.72530221939087 72 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 93003
-0.00141715 32.236183643341064 73 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 95003
0.00174803 32.75935506820679 74 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 97003
0.00294137 33.27057981491089 75 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 99003
0.00612107 33.54245471954346 76 515.0 485.0 1000.0 30.0 515.0 485.0 1030 100033
0.000486661 33.83180022239685 77 514.5 485.5 1000.0 29.0 514.5 485.5 1029 101062
0.00178058 34.346622467041016 78 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 103062
0.00217812 34.86988806724548 79 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 105062
0.00209597 35.203787326812744 80 640.0 360.0 1000.0 280.0 640.0 360.0 1280 106342
0.0025814 35.72733783721924 81 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 108342
0.0004908 36.24655270576477 82 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 110342
0.000522666 36.767661333084106 83 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 112342
0.000994201 37.29701209068298 84 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 114342
0.00246152 37.8122718334198 85 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 116342
-5.16772e-05 38.32043981552124 86 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 118342
0.00437572 38.847431659698486 87 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 120342
-8.2776e-06 39.36740565299988 88 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 122342
0.00191971 39.879302740097046 89 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 124342
0.00125631 40.395965576171875 90 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 126342
0.00755961 40.91072130203247 91 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 128342
-0.000780614 41.42177677154541 92 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 130342
0.00119386 41.950058460235596 93 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 132342
0.0029334 42.470048666000366 94 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 134342
0.00528921 42.79389667510986 95 584.5 415.5 1000.0 169.0 584.5 415.5 1169 135511
-0.00271168 43.321409463882446 96 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 137511
0.000188325 43.83671522140503 97 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 139511
0.0047191 44.36561918258667 98 1000.0 0.0 1000.0 1000.0 1000.0 0.0 2000 141511
-0.000626978 44.703309297561646 99 623.0 377.0 1000.0 246.0 623.0 377.0 1246 142757
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/21/params.json
================================================
{"animate" : false,
"env_name" : "InvertedPendulum-v1",
"exp_name" : "InvertedPendulum_sb_rtg_na_0.02",
"gamma" : 1.0,
"learning_rate" : 0.02,
"logdir" : "data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/21",
"max_path_length" : null,
"min_timesteps_per_batch" : 1000,
"n_iter" : 100,
"n_layers" : 2,
"nn_baseline" : false,
"normalize_advantages" : true,
"reward_to_go" : true,
"seed" : 21,
"size" : 32}
================================================
FILE: hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/31/log.txt
================================================
LossDelta Time Iteration AverageReturn StdReturn MaxReturn MinReturn EpLenMean EpLenStd TimestepsThisBatch TimestepsSoFar
0.282001 7.312387466430664 0 5.01 2.78206757646 18.0 2.0 5.01 2.78206757646 1002 1002
0.172524 7.62332010269165 1 8.59829059829 4.49322344281 24.0 2.0 8.59829059829 4.49322344281 1006 2008
0.0445865 7.896857023239136 2 17.5614035088 14.9596152777 66.0 2.0 17.5614035088 14.9596152777 1001 3009
-0.000305377 8.180015563964844 3 16.9830508475 9.81436430058 49.0 4.0 16.9830508475 9.81436430058 1002 4011
-0.00812558 8.459576606750488 4 13.012987013 9.04748441251 56.0 3.0 13.012987013 9.04748441251 1002 5013
0.00731168 8.742552042007446 5 12.8333333333 8.00867585116 42.0 4.0 12.8333333333 8.00867585116 1001 6014
0.0139483 9.034139156341553 6 13.3866666667 8.82102538761 48.0 3.0 13.3866666667 8.82102538761 1004 7018
0.0345713 9.342700242996216 7 15.2575757576 11.9218549071 64.0 3.0 15.2575757576 11.9218549071 1007 8025
0.0259883 9.668273448944092 8 20.9791666667 13.0391870646 63.0 4.0 20.9791666667 13.0391870646 1007 9032
0.0146477 9.962856769561768 9 26.9473684211 19.7136706086 84.0 4.0 26.9473684211 19.7136706086 1024 10056
0.00605139 10.227054357528687 10 26.6315789474 19.1358317144 72.0 3.0 26.6315789474 19.1358317144 1012 11068
0.0055303 10.511155366897583 11 35.7586206897 27.0741703759 123.0 2.0 35.7586206897 27.0741703759 1037 12105
0.00416327 10.784699201583862 12 43.6956521739 25.6488575601 98.0 12.0 43.6956521739 25.6488575601 1005 13110
0.00450304 11.054237604141235 13 33.7666666667 17.0287665111 75.0 6.0 33.7666666667 17.0287665111 1013 14123
0.00688669 11.369384288787842 14 38.2222222222 22.7959277282 87.0 2.0 38.2222222222 22.7959277282 1032 15155
0.0123785 11.676459550857544 15 46.9090909091 26.1375889041 92.0 4.0 46.9090909091 26.1375889041 1032 16187
0.0229735 12.007859468460083 16 57.7222222222 24.4217697865 112.0 13.0 57.7222222222 24.4217697865 1039 17226
9.02265e-05 12.395692825317383 17 67.8666666667 28.7468645633 141.0 33.0 67.8666666667 28.7468645633 1018 18244
0.0
gitextract_ql1s9b61/
├── README.md
├── hw1/
│ ├── .idea/
│ │ ├── hw1.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── BehavioralCloning.py
│ ├── DAgger.py
│ ├── README.md
│ ├── data/
│ │ ├── Ant-v1.train.npz
│ │ ├── HalfCheetah-v1.train.npz
│ │ ├── Hopper-v1.train.npz
│ │ ├── Humanoid-v1.train.npz
│ │ ├── Reacher-v1.train.npz
│ │ └── Walker2d-v1.train.npz
│ ├── demo.bash
│ ├── experts/
│ │ ├── Ant-v1.pkl
│ │ ├── HalfCheetah-v1.pkl
│ │ ├── Hopper-v1.pkl
│ │ ├── Humanoid-v1.pkl
│ │ ├── Reacher-v1.pkl
│ │ └── Walker2d-v1.pkl
│ ├── load_policy.py
│ ├── log/
│ │ ├── Ant-v1_BC_30-01-2018_10-32-45/
│ │ │ └── log.txt
│ │ ├── Ant-v1_DA_30-01-2018_10-51-03/
│ │ │ └── log.txt
│ │ ├── HalfCheetah-v1_BC_30-01-2018_10-51-19/
│ │ │ └── log.txt
│ │ ├── HalfCheetah-v1_DA_30-01-2018_11-11-33/
│ │ │ └── log.txt
│ │ ├── Hopper-v1_BC_30-01-2018_10-55-39/
│ │ │ └── log.txt
│ │ ├── Hopper-v1_BCbig_30-01-2018_11-02-29/
│ │ │ └── log.txt
│ │ ├── Hopper-v1_DA_30-01-2018_11-24-58/
│ │ │ └── log.txt
│ │ ├── Humanoid-v1_BC_30-01-2018_10-28-53/
│ │ │ └── log.txt
│ │ ├── Humanoid-v1_DA_30-01-2018_10-31-26/
│ │ │ └── log.txt
│ │ ├── Reacher-v1_BC_30-01-2018_10-57-25/
│ │ │ └── log.txt
│ │ ├── Reacher-v1_DA_30-01-2018_11-27-44/
│ │ │ └── log.txt
│ │ ├── Walker2d-v1_BC_30-01-2018_10-58-02/
│ │ │ └── log.txt
│ │ └── Walker2d-v1_DA_30-01-2018_11-28-50/
│ │ └── log.txt
│ ├── logz.py
│ ├── plot.py
│ ├── run_expert.py
│ └── tf_util.py
├── hw2/
│ ├── .idea/
│ │ ├── hw2.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── data/
│ │ ├── HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/
│ │ │ ├── 1/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 11/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 21/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ ├── 31/
│ │ │ │ ├── log.txt
│ │ │ │ ├── params.json
│ │ │ │ └── vars.pkl
│ │ │ └── 41/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ └── sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/
│ │ ├── 1/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── 11/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── 21/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ ├── 31/
│ │ │ ├── log.txt
│ │ │ ├── params.json
│ │ │ └── vars.pkl
│ │ └── 41/
│ │ ├── log.txt
│ │ ├── params.json
│ │ └── vars.pkl
│ ├── logz.py
│ ├── plot.py
│ └── train_pg.py
├── hw3/
│ ├── .idea/
│ │ ├── hw3.iml
│ │ ├── misc.xml
│ │ ├── modules.xml
│ │ └── workspace.xml
│ ├── README
│ ├── atari_wrappers.py
│ ├── dqn.py
│ ├── dqn_utils.py
│ ├── log/
│ │ ├── _RAM_30-01-2018_15-20-56/
│ │ │ └── log.txt
│ │ ├── _RAM_30-01-2018_22-29-12/
│ │ │ └── log.txt
│ │ └── _RAM_31-01-2018_08-28-28/
│ │ └── log.txt
│ ├── logz.py
│ ├── plot.py
│ ├── run_dqn_atari.py
│ └── run_dqn_ram.py
└── hw4/
├── .idea/
│ ├── hw4.iml
│ ├── misc.xml
│ ├── modules.xml
│ └── workspace.xml
├── cheetah_env.py
├── controllers.py
├── cost_functions.py
├── data/
│ ├── mb_mpc_HalfCheetah-v1_28-01-2018_16-06-09/
│ │ └── log.txt
│ └── mb_mpc_HalfCheetah-v1_30-01-2018_09-57-32/
│ └── log.txt
├── dynamics.py
├── logz.py
├── main.py
└── plot.py
SYMBOL INDEX (231 symbols across 24 files)
FILE: hw1/BehavioralCloning.py
class Config (line 10) | class Config(object):
class NN (line 24) | class NN(object):
method add_placeholders (line 25) | def add_placeholders(self):
method create_feed_dict (line 31) | def create_feed_dict(self, inputs_batch, labels_batch=None, dropout=1,...
method add_prediction_op (line 40) | def add_prediction_op(self):
method add_loss_op (line 57) | def add_loss_op(self, pred):
method add_training_op (line 62) | def add_training_op(self, loss):
method train_on_batch (line 69) | def train_on_batch(self, sess, inputs_batch, labels_batch, merged, tra...
method __init__ (line 75) | def __init__(self, config):
method fit (line 79) | def fit(self, sess, train_x, train_y):
method build (line 82) | def build(self):
method get_pred (line 92) | def get_pred(self, sess, inputs_batch):
function load (line 97) | def load(path):
function main (line 105) | def main():
FILE: hw1/DAgger.py
class Config (line 11) | class Config(object):
class NN (line 25) | class NN(object):
method add_placeholders (line 26) | def add_placeholders(self):
method create_feed_dict (line 32) | def create_feed_dict(self, inputs_batch, labels_batch=None, dropout=1,...
method add_prediction_op (line 41) | def add_prediction_op(self):
method add_loss_op (line 57) | def add_loss_op(self, pred):
method add_training_op (line 62) | def add_training_op(self, loss):
method train_on_batch (line 69) | def train_on_batch(self, sess, inputs_batch, labels_batch, merged, tra...
method __init__ (line 75) | def __init__(self, config):
method fit (line 79) | def fit(self, sess, train_x, train_y):
method build (line 82) | def build(self):
method get_pred (line 92) | def get_pred(self, sess, inputs_batch):
function load (line 97) | def load(path):
function run_env (line 104) | def run_env(env, nn,session):
function shuffle (line 122) | def shuffle(X_train, y_train):
function main (line 130) | def main():
FILE: hw1/load_policy.py
function load_policy (line 3) | def load_policy(filename):
FILE: hw1/logz.py
function colorize (line 34) | def colorize(string, color, bold=False, highlight=False):
class G (line 42) | class G:
function configure_output_dir (line 49) | def configure_output_dir(d=None):
function log_tabular (line 62) | def log_tabular(key, val):
function save_params (line 74) | def save_params(params):
function pickle_tf_vars (line 78) | def pickle_tf_vars():
function dump_tabular (line 88) | def dump_tabular():
FILE: hw1/plot.py
function plot_data (line 51) | def plot_data(data, value="AverageReturn"):
function get_datasets (line 60) | def get_datasets(fpath, condition=None):
function main (line 91) | def main():
FILE: hw1/run_expert.py
function main (line 21) | def main():
FILE: hw1/tf_util.py
function sum (line 18) | def sum(x, axis=None, keepdims=False):
function mean (line 20) | def mean(x, axis=None, keepdims=False):
function var (line 22) | def var(x, axis=None, keepdims=False):
function std (line 25) | def std(x, axis=None, keepdims=False):
function max (line 27) | def max(x, axis=None, keepdims=False):
function min (line 29) | def min(x, axis=None, keepdims=False):
function concatenate (line 31) | def concatenate(arrs, axis=0):
function argmax (line 33) | def argmax(x, axis=None):
function switch (line 36) | def switch(condition, then_expression, else_expression):
function l2loss (line 55) | def l2loss(params):
function lrelu (line 60) | def lrelu(x, leak=0.2):
function categorical_sample_logits (line 64) | def categorical_sample_logits(X):
function get_session (line 73) | def get_session():
function single_threaded_session (line 76) | def single_threaded_session():
function make_session (line 82) | def make_session(num_cpu):
function initialize (line 90) | def initialize():
function eval (line 96) | def eval(expr, feed_dict=None):
function set_value (line 100) | def set_value(v, val):
function load_state (line 103) | def load_state(fname):
function save_state (line 107) | def save_state(fname):
function normc_initializer (line 117) | def normc_initializer(std=1.0):
function conv2d (line 125) | def conv2d(x, num_filters, name, filter_size=(3, 3), stride=(1, 1), pad=...
function dense (line 155) | def dense(x, size, name, weight_init=None, bias=True):
function wndense (line 164) | def wndense(x, size, name, init_scale=1.0):
function densenobias (line 175) | def densenobias(x, size, name, weight_init=None):
function dropout (line 178) | def dropout(x, pkeep, phase=None, mask=None):
function batchnorm (line 185) | def batchnorm(x, name, phase, updates, gamma=0.96):
function function (line 213) | def function(inputs, outputs, updates=None, givens=None):
class _Function (line 223) | class _Function(object):
method __init__ (line 224) | def __init__(self, inputs, outputs, updates, givens, check_nan=False):
method __call__ (line 232) | def __call__(self, *inputvals):
function mem_friendly_function (line 242) | def mem_friendly_function(nondata_inputs, data_inputs, outputs, batch_si...
class _MemFriendlyFunction (line 249) | class _MemFriendlyFunction(object):
method __init__ (line 250) | def __init__(self, nondata_inputs, data_inputs, outputs, batch_size):
method __call__ (line 255) | def __call__(self, *inputvals):
class Module (line 281) | class Module(object):
method __init__ (line 282) | def __init__(self, name):
method __call__ (line 287) | def __call__(self, *args):
method _call (line 303) | def _call(self, *args):
method trainable_variables (line 307) | def trainable_variables(self):
method variables (line 312) | def variables(self):
function module (line 317) | def module(name):
function get_parents (line 333) | def get_parents(node):
function topsorted (line 336) | def topsorted(outputs):
function var_shape (line 377) | def var_shape(x):
function numel (line 383) | def numel(x):
function intprod (line 386) | def intprod(x):
function flatgrad (line 389) | def flatgrad(loss, var_list):
class SetFromFlat (line 394) | class SetFromFlat(object):
method __init__ (line 395) | def __init__(self, var_list, dtype=tf.float32):
method __call__ (line 408) | def __call__(self, theta):
class GetFlat (line 411) | class GetFlat(object):
method __init__ (line 412) | def __init__(self, var_list):
method __call__ (line 414) | def __call__(self):
function fancy_slice_2d (line 422) | def fancy_slice_2d(X, inds0, inds1):
function scope_vars (line 435) | def scope_vars(scope, trainable_only):
function lengths_to_mask (line 445) | def lengths_to_mask(lengths_b, max_length):
function in_session (line 463) | def in_session(f):
function get_placeholder (line 472) | def get_placeholder(name, dtype, shape):
function get_placeholder_cached (line 482) | def get_placeholder_cached(name):
function flattenallbut0 (line 485) | def flattenallbut0(x):
function reset (line 488) | def reset():
FILE: hw2/logz.py
function colorize (line 34) | def colorize(string, color, bold=False, highlight=False):
class G (line 42) | class G:
function configure_output_dir (line 49) | def configure_output_dir(d=None):
function log_tabular (line 60) | def log_tabular(key, val):
function save_params (line 72) | def save_params(params):
function pickle_tf_vars (line 76) | def pickle_tf_vars():
function dump_tabular (line 86) | def dump_tabular():
FILE: hw2/plot.py
function plot_data (line 52) | def plot_data(data, value="AverageReturn"):
function get_datasets (line 62) | def get_datasets(fpath, condition=None):
function main (line 91) | def main():
FILE: hw2/train_pg.py
function build_mlp (line 15) | def build_mlp(
function pathlength (line 44) | def pathlength(path):
function train_PG (line 53) | def train_PG(exp_name='',
function main (line 434) | def main():
FILE: hw3/atari_wrappers.py
class NoopResetEnv (line 8) | class NoopResetEnv(gym.Wrapper):
method __init__ (line 9) | def __init__(self, env=None, noop_max=30):
method _reset (line 17) | def _reset(self):
class FireResetEnv (line 25) | class FireResetEnv(gym.Wrapper):
method __init__ (line 26) | def __init__(self, env=None):
method _reset (line 32) | def _reset(self):
class EpisodicLifeEnv (line 38) | class EpisodicLifeEnv(gym.Wrapper):
method __init__ (line 39) | def __init__(self, env=None):
method _step (line 48) | def _step(self, action):
method _reset (line 62) | def _reset(self):
class MaxAndSkipEnv (line 77) | class MaxAndSkipEnv(gym.Wrapper):
method __init__ (line 78) | def __init__(self, env=None, skip=4):
method _step (line 85) | def _step(self, action):
method _reset (line 99) | def _reset(self):
function _process_frame84 (line 106) | def _process_frame84(frame):
class ProcessFrame84 (line 114) | class ProcessFrame84(gym.Wrapper):
method __init__ (line 115) | def __init__(self, env=None):
method _step (line 119) | def _step(self, action):
method _reset (line 123) | def _reset(self):
class ClippedRewardsWrapper (line 126) | class ClippedRewardsWrapper(gym.Wrapper):
method _step (line 127) | def _step(self, action):
function wrap_deepmind_ram (line 131) | def wrap_deepmind_ram(env):
function wrap_deepmind (line 140) | def wrap_deepmind(env):
FILE: hw3/dqn.py
function learn (line 14) | def learn(env,
FILE: hw3/dqn_utils.py
function huber_loss (line 8) | def huber_loss(x, delta=1.0):
function sample_n_unique (line 16) | def sample_n_unique(sampling_f, n):
class Schedule (line 27) | class Schedule(object):
method value (line 28) | def value(self, t):
class ConstantSchedule (line 32) | class ConstantSchedule(object):
method __init__ (line 33) | def __init__(self, value):
method value (line 42) | def value(self, t):
function linear_interpolation (line 46) | def linear_interpolation(l, r, alpha):
class PiecewiseSchedule (line 49) | class PiecewiseSchedule(object):
method __init__ (line 50) | def __init__(self, endpoints, interpolation=linear_interpolation, outs...
method value (line 74) | def value(self, t):
class LinearSchedule (line 85) | class LinearSchedule(object):
method __init__ (line 86) | def __init__(self, schedule_timesteps, final_p, initial_p=1.0):
method value (line 104) | def value(self, t):
function compute_exponential_averages (line 109) | def compute_exponential_averages(variables, decay):
function minimize_and_clip (line 130) | def minimize_and_clip(optimizer, objective, var_list, clip_val=10):
function initialize_interdependent_variables (line 141) | def initialize_interdependent_variables(session, vars_list, feed_dict):
function get_wrapper_by_name (line 164) | def get_wrapper_by_name(env, classname):
class ReplayBuffer (line 174) | class ReplayBuffer(object):
method __init__ (line 175) | def __init__(self, size, frame_history_len):
method can_sample (line 212) | def can_sample(self, batch_size):
method _encode_sample (line 216) | def _encode_sample(self, idxes):
method sample (line 226) | def sample(self, batch_size):
method encode_recent_observation (line 263) | def encode_recent_observation(self):
method _encode_observation (line 276) | def _encode_observation(self, idx):
method store_frame (line 302) | def store_frame(self, frame):
method store_effect (line 330) | def store_effect(self, idx, action, reward, done):
FILE: hw3/logz.py
function colorize (line 34) | def colorize(string, color, bold=False, highlight=False):
class G (line 42) | class G:
function configure_output_dir (line 49) | def configure_output_dir(d=None):
function log_tabular (line 60) | def log_tabular(key, val):
function save_params (line 72) | def save_params(params):
function pickle_tf_vars (line 76) | def pickle_tf_vars():
function dump_tabular (line 86) | def dump_tabular():
FILE: hw3/plot.py
function plot_data (line 51) | def plot_data(data, value="MeanReward"):
function get_datasets (line 61) | def get_datasets(fpath, condition=None):
function main (line 90) | def main():
FILE: hw3/run_dqn_atari.py
function atari_model (line 18) | def atari_model(img_in, num_actions, scope, reuse=False):
function atari_learn (line 34) | def atari_learn(env,
function get_available_gpus (line 84) | def get_available_gpus():
function set_global_seeds (line 89) | def set_global_seeds(i):
function get_session (line 99) | def get_session():
function get_env (line 108) | def get_env(task, seed):
function main (line 122) | def main():
FILE: hw3/run_dqn_ram.py
function atari_model (line 17) | def atari_model(ram_in, num_actions, scope, reuse=False):
function atari_learn (line 29) | def atari_learn(env,
function get_available_gpus (line 79) | def get_available_gpus():
function set_global_seeds (line 84) | def set_global_seeds(i):
function get_session (line 94) | def get_session():
function get_env (line 103) | def get_env(seed):
function main (line 115) | def main():
FILE: hw4/cheetah_env.py
class HalfCheetahEnvNew (line 5) | class HalfCheetahEnvNew(mujoco_env.MujocoEnv, utils.EzPickle):
method __init__ (line 6) | def __init__(self):
method _step (line 10) | def _step(self, action):
method _get_obs (line 21) | def _get_obs(self):
method reset_model (line 29) | def reset_model(self):
method viewer_setup (line 35) | def viewer_setup(self):
FILE: hw4/controllers.py
class Controller (line 6) | class Controller():
method __init__ (line 7) | def __init__(self):
method get_action (line 11) | def get_action(self, state):
class RandomController (line 15) | class RandomController(Controller):
method __init__ (line 16) | def __init__(self, env):
method get_action (line 20) | def get_action(self, state):
class MPCcontroller (line 26) | class MPCcontroller(Controller):
method __init__ (line 28) | def __init__(self,
method get_action (line 41) | def get_action(self, state):
FILE: hw4/cost_functions.py
function cheetah_cost_fn (line 9) | def cheetah_cost_fn(state, action, next_state):
function trajectory_cost_fn (line 58) | def trajectory_cost_fn(cost_fn, states, actions, next_states):
FILE: hw4/dynamics.py
function build_mlp (line 5) | def build_mlp(input_placeholder,
class NNDynamicsModel (line 20) | class NNDynamicsModel():
method __init__ (line 21) | def __init__(self,
method fit (line 51) | def fit(self, data):
method predict (line 82) | def predict(self, states, actions):
FILE: hw4/logz.py
function colorize (line 34) | def colorize(string, color, bold=False, highlight=False):
class G (line 42) | class G:
function configure_output_dir (line 49) | def configure_output_dir(d=None):
function log_tabular (line 62) | def log_tabular(key, val):
function save_params (line 74) | def save_params(params):
function pickle_tf_vars (line 78) | def pickle_tf_vars():
function dump_tabular (line 88) | def dump_tabular():
FILE: hw4/main.py
function sample (line 15) | def sample(env,
function path_cost (line 51) | def path_cost(cost_fn, path):
function compute_normalization (line 54) | def compute_normalization(data):
function plot_comparison (line 76) | def plot_comparison(env, dyn_model):
function train (line 84) | def train(env,
function main (line 227) | def main():
FILE: hw4/plot.py
function plot_data (line 51) | def plot_data(data, value="AverageReturn"):
function get_datasets (line 60) | def get_datasets(fpath, condition=None):
function main (line 89) | def main():
Condensed preview — 224 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (1,021K chars).
[
{
"path": "README.md",
"chars": 37,
"preview": "# CS294\nhomework for CS294 Fall 2017\n"
},
{
"path": "hw1/.idea/hw1.iml",
"chars": 542,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n <component name=\"NewModuleRootManager"
},
{
"path": "hw1/.idea/misc.xml",
"chars": 212,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectRootManager\" version=\"2\" project-"
},
{
"path": "hw1/.idea/modules.xml",
"chars": 258,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectModuleManager\">\n <modules>\n "
},
{
"path": "hw1/.idea/workspace.xml",
"chars": 26692,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ChangeListManager\">\n <list default=\"t"
},
{
"path": "hw1/BehavioralCloning.py",
"chars": 7343,
"preview": "import tensorflow as tf\nimport os\nimport numpy as np\nimport tqdm\nimport gym\nimport logz\nimport time\nimport math\n\nclass C"
},
{
"path": "hw1/DAgger.py",
"chars": 8509,
"preview": "import tensorflow as tf\nimport os\nimport numpy as np\nimport tqdm\nimport gym\nimport load_policy\nimport math\nimport logz\ni"
},
{
"path": "hw1/README.md",
"chars": 719,
"preview": "# CS294-112 HW 1: Imitation Learning\n\nDependencies: TensorFlow, MuJoCo version 1.31, OpenAI Gym\n\n**Note**: MuJoCo versio"
},
{
"path": "hw1/demo.bash",
"chars": 174,
"preview": "#!/bin/bash\nset -eux\nfor e in Hopper-v1 Ant-v1 HalfCheetah-v1 Humanoid-v1 Reacher-v1 Walker2d-v1\ndo\n python run_exper"
},
{
"path": "hw1/load_policy.py",
"chars": 2511,
"preview": "import pickle, tensorflow as tf, tf_util, numpy as np\n\ndef load_policy(filename):\n with open(filename, 'rb') as f:\n "
},
{
"path": "hw1/log/Ant-v1_BC_30-01-2018_10-32-45/log.txt",
"chars": 639,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t4460.34148867\t504.993919545\n1\t4361.62089192\t787.910341533\n2\t4238.35328327\t1021.28077"
},
{
"path": "hw1/log/Ant-v1_DA_30-01-2018_10-51-03/log.txt",
"chars": 641,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t4543.92556297\t163.700815617\n1\t4532.86754019\t651.078836035\n2\t4718.40341227\t334.079528"
},
{
"path": "hw1/log/HalfCheetah-v1_BC_30-01-2018_10-51-19/log.txt",
"chars": 638,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t3817.96495454\t121.424448089\n1\t3936.04442016\t149.995208225\n2\t3852.30571246\t120.093945"
},
{
"path": "hw1/log/HalfCheetah-v1_DA_30-01-2018_11-11-33/log.txt",
"chars": 641,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t3976.17700506\t114.034047864\n1\t4080.46221353\t84.1893455539\n2\t4133.95250959\t81.0167961"
},
{
"path": "hw1/log/Hopper-v1_BC_30-01-2018_10-55-39/log.txt",
"chars": 635,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t696.648776946\t27.07925021\n1\t748.47888752\t151.429815664\n2\t700.073941219\t72.9177435482"
},
{
"path": "hw1/log/Hopper-v1_BCbig_30-01-2018_11-02-29/log.txt",
"chars": 639,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t1138.58195909\t332.371517359\n1\t896.423291123\t254.391048933\n2\t1151.49985307\t695.596874"
},
{
"path": "hw1/log/Hopper-v1_DA_30-01-2018_11-24-58/log.txt",
"chars": 639,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t1054.7477548\t44.5027933049\n1\t2294.09469315\t467.288023567\n2\t3707.44200499\t260.0300210"
},
{
"path": "hw1/log/Humanoid-v1_BC_30-01-2018_10-28-53/log.txt",
"chars": 641,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t295.814730365\t88.5813632879\n1\t238.349607493\t76.3277190139\n2\t243.129245237\t81.1495698"
},
{
"path": "hw1/log/Humanoid-v1_DA_30-01-2018_10-31-26/log.txt",
"chars": 639,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t277.763885265\t74.5699695095\n1\t340.013799005\t83.8174345466\n2\t281.151168831\t58.8467520"
},
{
"path": "hw1/log/Reacher-v1_BC_30-01-2018_10-57-25/log.txt",
"chars": 657,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t-9.14313732169\t2.1057737525\n1\t-10.2464604937\t2.91303780186\n2\t-9.36505170342\t2.891340"
},
{
"path": "hw1/log/Reacher-v1_DA_30-01-2018_11-27-44/log.txt",
"chars": 660,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t-9.514077968\t4.67060656967\n1\t-6.8483366701\t2.22806332751\n2\t-7.28919411373\t3.00020247"
},
{
"path": "hw1/log/Walker2d-v1_BC_30-01-2018_10-58-02/log.txt",
"chars": 639,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t731.896679135\t356.841079416\n1\t245.597115443\t370.464617925\n2\t517.360851842\t524.222617"
},
{
"path": "hw1/log/Walker2d-v1_DA_30-01-2018_11-28-50/log.txt",
"chars": 640,
"preview": "Iteration\tAverageReturn\tStdReturn\n0\t672.000153601\t623.031121462\n1\t1867.64727184\t877.010515332\n2\t5434.69323044\t45.0795878"
},
{
"path": "hw1/logz.py",
"chars": 3435,
"preview": "import json\n\n\"\"\"\n\nSome simple logging functionality, inspired by rllab's logging.\nAssumes that each diagnostic gets logg"
},
{
"path": "hw1/plot.py",
"chars": 3518,
"preview": "import seaborn as sns\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport json\nimport os\n\n\"\"\"\nUsing the plotter:\n"
},
{
"path": "hw1/run_expert.py",
"chars": 2553,
"preview": "#!/usr/bin/env python\n\n\"\"\"\nCode to load an expert policy and generate roll-out data for behavioral cloning.\nExample usag"
},
{
"path": "hw1/tf_util.py",
"chars": 17788,
"preview": "import numpy as np\nimport tensorflow as tf # pylint: ignore-module\n#import builtins\nimport functools\nimport copy\nimport "
},
{
"path": "hw2/.idea/hw2.iml",
"chars": 459,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n <component name=\"NewModuleRootManager"
},
{
"path": "hw2/.idea/misc.xml",
"chars": 212,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectRootManager\" version=\"2\" project-"
},
{
"path": "hw2/.idea/modules.xml",
"chars": 258,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectModuleManager\">\n <modules>\n "
},
{
"path": "hw2/.idea/workspace.xml",
"chars": 18957,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ChangeListManager\">\n <list default=\"t"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/1/log.txt",
"chars": 11331,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/1/params.json",
"chars": 419,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"HalfCheetah-v1\",\n\"exp_name\"\t:\t\"HalfCheetah_b50000_rtg_na_25bl\",\n\"gamma\"\t:\t0.9,\n\"learni"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/11/log.txt",
"chars": 11326,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/11/params.json",
"chars": 421,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"HalfCheetah-v1\",\n\"exp_name\"\t:\t\"HalfCheetah_b50000_rtg_na_25bl\",\n\"gamma\"\t:\t0.9,\n\"learni"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/21/log.txt",
"chars": 11326,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/21/params.json",
"chars": 421,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"HalfCheetah-v1\",\n\"exp_name\"\t:\t\"HalfCheetah_b50000_rtg_na_25bl\",\n\"gamma\"\t:\t0.9,\n\"learni"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/31/log.txt",
"chars": 11305,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/31/params.json",
"chars": 421,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"HalfCheetah-v1\",\n\"exp_name\"\t:\t\"HalfCheetah_b50000_rtg_na_25bl\",\n\"gamma\"\t:\t0.9,\n\"learni"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/41/log.txt",
"chars": 11299,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/HalfCheetah_b50000_rtg_na_25bl_HalfCheetah-v1_31-01-2018_19-35-00/41/params.json",
"chars": 421,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"HalfCheetah-v1\",\n\"exp_name\"\t:\t\"HalfCheetah_b50000_rtg_na_25bl\",\n\"gamma\"\t:\t0.9,\n\"learni"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/1/log.txt",
"chars": 9059,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/1/params.json",
"chars": 429,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_0.02\",\n\"gamma\"\t:\t1.0,\n\""
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/11/log.txt",
"chars": 8923,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/11/params.json",
"chars": 431,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_0.02\",\n\"gamma\"\t:\t1.0,\n\""
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/21/log.txt",
"chars": 9545,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/21/params.json",
"chars": 431,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_0.02\",\n\"gamma\"\t:\t1.0,\n\""
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/31/log.txt",
"chars": 9230,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/31/params.json",
"chars": 431,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_0.02\",\n\"gamma\"\t:\t1.0,\n\""
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/41/log.txt",
"chars": 9929,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_0.02_InvertedPendulum-v1_02-02-2018_10-42-58/41/params.json",
"chars": 431,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_0.02\",\n\"gamma\"\t:\t1.0,\n\""
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/1/log.txt",
"chars": 8870,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/1/params.json",
"chars": 434,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_bl_0.02\",\n\"gamma\"\t:\t1.0"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/11/log.txt",
"chars": 10447,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/11/params.json",
"chars": 436,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_bl_0.02\",\n\"gamma\"\t:\t1.0"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/21/log.txt",
"chars": 8889,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/21/params.json",
"chars": 436,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_bl_0.02\",\n\"gamma\"\t:\t1.0"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/31/log.txt",
"chars": 9213,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/31/params.json",
"chars": 436,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_bl_0.02\",\n\"gamma\"\t:\t1.0"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/41/log.txt",
"chars": 9914,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/InvertedPendulum_sb_rtg_na_bl_0.02_InvertedPendulum-v1_02-02-2018_10-42-44/41/params.json",
"chars": 436,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"InvertedPendulum-v1\",\n\"exp_name\"\t:\t\"InvertedPendulum_sb_rtg_na_bl_0.02\",\n\"gamma\"\t:\t1.0"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/1/log.txt",
"chars": 10997,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/1/params.json",
"chars": 380,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/11/log.txt",
"chars": 10687,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/11/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/21/log.txt",
"chars": 11021,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/21/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/31/log.txt",
"chars": 10946,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/31/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/41/log.txt",
"chars": 10658,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_no_rtg_dna_CartPole-v0_24-01-2018_09-28-29/41/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/1/log.txt",
"chars": 10934,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/1/params.json",
"chars": 373,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/11/log.txt",
"chars": 11035,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/11/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/21/log.txt",
"chars": 10145,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/21/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/31/log.txt",
"chars": 10940,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/31/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/41/log.txt",
"chars": 10649,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_dna_CartPole-v0_24-01-2018_09-20-37/41/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/1/log.txt",
"chars": 11059,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/1/params.json",
"chars": 370,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/11/log.txt",
"chars": 11183,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/11/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/21/log.txt",
"chars": 10971,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/21/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/31/log.txt",
"chars": 11145,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/31/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/41/log.txt",
"chars": 10964,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/lb_rtg_na_CartPole-v0_24-01-2018_09-11-55/41/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"lb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/1/log.txt",
"chars": 10581,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/1/params.json",
"chars": 380,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/11/log.txt",
"chars": 10512,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/11/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/21/log.txt",
"chars": 10428,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/21/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/31/log.txt",
"chars": 10500,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/31/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/41/log.txt",
"chars": 10120,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_no_rtg_dna_CartPole-v0_24-01-2018_09-00-15/41/params.json",
"chars": 382,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_no_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"l"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/1/log.txt",
"chars": 9755,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/1/params.json",
"chars": 373,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/11/log.txt",
"chars": 10278,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/11/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/21/log.txt",
"chars": 9748,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/21/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/31/log.txt",
"chars": 10558,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/31/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/41/log.txt",
"chars": 10298,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_dna_CartPole-v0_24-01-2018_09-04-19/41/params.json",
"chars": 375,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_dna\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logd"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/1/log.txt",
"chars": 10330,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/1/params.json",
"chars": 370,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/11/log.txt",
"chars": 10614,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/11/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/21/log.txt",
"chars": 10102,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/21/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/31/log.txt",
"chars": 10312,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/31/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/41/log.txt",
"chars": 10448,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_CartPole-v0_24-01-2018_09-08-49/41/params.json",
"chars": 372,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"logdi"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/1/log.txt",
"chars": 9927,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/1/params.json",
"chars": 376,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na_l2\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"lo"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/11/log.txt",
"chars": 10108,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/11/params.json",
"chars": 378,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na_l2\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"lo"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/21/log.txt",
"chars": 9553,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/21/params.json",
"chars": 378,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na_l2\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"lo"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/31/log.txt",
"chars": 9333,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/31/params.json",
"chars": 378,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na_l2\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"lo"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/41/log.txt",
"chars": 9313,
"preview": "LossDelta\tTime\tIteration\tAverageReturn\tStdReturn\tMaxReturn\tMinReturn\tEpLenMean\tEpLenStd\tTimestepsThisBatch\tTimestepsSoFa"
},
{
"path": "hw2/data/sb_rtg_na_l2_CartPole-v0_25-01-2018_09-22-07/41/params.json",
"chars": 378,
"preview": "{\"animate\"\t:\tfalse,\n\"env_name\"\t:\t\"CartPole-v0\",\n\"exp_name\"\t:\t\"sb_rtg_na_l2\",\n\"gamma\"\t:\t1.0,\n\"learning_rate\"\t:\t0.005,\n\"lo"
},
{
"path": "hw2/logz.py",
"chars": 3414,
"preview": "import json\n\n\"\"\"\n\nSome simple logging functionality, inspired by rllab's logging.\nAssumes that each diagnostic gets logg"
},
{
"path": "hw2/plot.py",
"chars": 3468,
"preview": "import seaborn as sns\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport json\nimport numpy as np\nimport os\n\n\"\"\"\n"
},
{
"path": "hw2/train_pg.py",
"chars": 20984,
"preview": "import numpy as np\nimport tensorflow as tf\nimport gym\nimport logz\nimport scipy.signal\nimport os\nimport time\nimport inspe"
},
{
"path": "hw3/.idea/hw3.iml",
"chars": 459,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n <component name=\"NewModuleRootManager"
},
{
"path": "hw3/.idea/misc.xml",
"chars": 212,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectRootManager\" version=\"2\" project-"
},
{
"path": "hw3/.idea/modules.xml",
"chars": 258,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectModuleManager\">\n <modules>\n "
},
{
"path": "hw3/.idea/workspace.xml",
"chars": 16580,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ChangeListManager\">\n <list default=\"t"
},
{
"path": "hw3/README",
"chars": 196,
"preview": "See http://rll.berkeley.edu/deeprlcourse/f17docs/hw3.pdf for instructions\n\nThe starter code was based on an implementati"
},
{
"path": "hw3/atari_wrappers.py",
"chars": 5290,
"preview": "import cv2\nimport numpy as np\nfrom collections import deque\nimport gym\nfrom gym import spaces\n\n\nclass NoopResetEnv(gym.W"
},
{
"path": "hw3/dqn.py",
"chars": 14983,
"preview": "import sys\nimport gym.spaces\nimport itertools\nimport numpy as np\nimport random\nimport tensorflow as tf\nim"
},
{
"path": "hw3/dqn_utils.py",
"chars": 13986,
"preview": "\"\"\"This file includes a collection of utility functions that are useful for\nimplementing DQN.\"\"\"\nimport gym\nimport tenso"
},
{
"path": "hw3/log/_RAM_30-01-2018_15-20-56/log.txt",
"chars": 44756,
"preview": "Timestep\tMeanReward\tBestMeanReward\tepisodes\texploration\tlearning_rate\n60000\t-20.69\t-20.57\t207\t0.194\t0.0001\n70000\t-20.72\t"
},
{
"path": "hw3/log/_RAM_30-01-2018_22-29-12/log.txt",
"chars": 44908,
"preview": "Timestep\tMeanReward\tBestMeanReward\tepisodes\texploration\tlearning_rate\n60000\t-20.73\t-20.57\t208\t0.194\t0.0001\n70000\t-20.8\t-"
},
{
"path": "hw3/log/_RAM_31-01-2018_08-28-28/log.txt",
"chars": 44833,
"preview": "Timestep\tMeanReward\tBestMeanReward\tepisodes\texploration\tlearning_rate\n60000\t-20.72\t-20.57\t207\t0.194\t0.0001\n70000\t-20.74\t"
},
{
"path": "hw3/logz.py",
"chars": 3414,
"preview": "import json\n\n\"\"\"\n\nSome simple logging functionality, inspired by rllab's logging.\nAssumes that each diagnostic gets logg"
},
{
"path": "hw3/plot.py",
"chars": 3502,
"preview": "import seaborn as sns\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport json\nimport os\n\n\"\"\"\nUsing the plotter:\n"
},
{
"path": "hw3/run_dqn_atari.py",
"chars": 4424,
"preview": "import argparse\nimport gym\nfrom gym import wrappers\nimport os.path as osp\nimport random\nimport numpy as np\nimport tensor"
},
{
"path": "hw3/run_dqn_ram.py",
"chars": 3993,
"preview": "import argparse\nimport gym\nfrom gym import wrappers\nimport os.path as osp\nimport random\nimport numpy as np\nimport tensor"
},
{
"path": "hw4/.idea/hw4.iml",
"chars": 459,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<module type=\"PYTHON_MODULE\" version=\"4\">\n <component name=\"NewModuleRootManager"
},
{
"path": "hw4/.idea/misc.xml",
"chars": 212,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectRootManager\" version=\"2\" project-"
},
{
"path": "hw4/.idea/modules.xml",
"chars": 258,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ProjectModuleManager\">\n <modules>\n "
},
{
"path": "hw4/.idea/workspace.xml",
"chars": 16815,
"preview": "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<project version=\"4\">\n <component name=\"ChangeListManager\">\n <list default=\"t"
},
{
"path": "hw4/cheetah_env.py",
"chars": 1333,
"preview": "import numpy as np\nfrom gym import utils\nfrom gym.envs.mujoco import mujoco_env\n\nclass HalfCheetahEnvNew(mujoco_env.Mujo"
},
{
"path": "hw4/controllers.py",
"chars": 2845,
"preview": "import numpy as np\nfrom cost_functions import trajectory_cost_fn\nimport time\n\n\nclass Controller():\n def __init__(self"
},
{
"path": "hw4/cost_functions.py",
"chars": 1677,
"preview": "import numpy as np\n\n\n#========================================================\n# \n# Environment-specific cost functions:"
},
{
"path": "hw4/data/mb_mpc_HalfCheetah-v1_28-01-2018_16-06-09/log.txt",
"chars": 1853,
"preview": "Iteration\tAverageCost\tStdCost\tMinimumCost\tMaximumCost\tAverageReturn\tStdReturn\tMinimumReturn\tMaximumReturn\n0\t-493.5481766"
},
{
"path": "hw4/data/mb_mpc_HalfCheetah-v1_30-01-2018_09-57-32/log.txt",
"chars": 1858,
"preview": "Iteration\tAverageCost\tStdCost\tMinimumCost\tMaximumCost\tAverageReturn\tStdReturn\tMinimumReturn\tMaximumReturn\n0\t4949.7974528"
},
{
"path": "hw4/dynamics.py",
"chars": 3888,
"preview": "import tensorflow as tf\nimport numpy as np\nimport math\n# Predefined function to build a feedforward neural network\ndef b"
},
{
"path": "hw4/logz.py",
"chars": 3435,
"preview": "import json\n\n\"\"\"\n\nSome simple logging functionality, inspired by rllab's logging.\nAssumes that each diagnostic gets logg"
},
{
"path": "hw4/main.py",
"chars": 10698,
"preview": "import numpy as np\nimport tensorflow as tf\nimport gym\nfrom dynamics import NNDynamicsModel\nfrom controllers import MPCco"
},
{
"path": "hw4/plot.py",
"chars": 3437,
"preview": "import seaborn as sns\nimport pandas as pd\nimport matplotlib.pyplot as plt\nimport json\nimport os\n\n\"\"\"\nUsing the plotter:\n"
}
]
// ... and 62 more files (download for full content)
About this extraction
This page contains the full source code of the Observerspy/CS294 GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 224 files (77.1 MB), approximately 442.2k tokens, and a symbol index with 231 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.