gitextract_jqc__b22/

├── 01. Fundamentals of Reinforcement Learning/
│   ├── .ipynb_checkpoints/
│   │   ├── 1.01. Basic Idea of Reinforcement Learning -checkpoint.ipynb
│   │   ├── 1.02. Key Elements of Reinforcement Learning -checkpoint.ipynb
│   │   ├── 1.03. Reinforcement Learning Algorithm-checkpoint.ipynb
│   │   ├── 1.04. RL agent in the Grid World -checkpoint.ipynb
│   │   ├── 1.05. How RL differs from other ML paradigms?-checkpoint.ipynb
│   │   ├── 1.06. Markov Decision Processes-checkpoint.ipynb
│   │   └── 1.07. Action space, Policy, Episode and Horizon-checkpoint.ipynb
│   ├── 1.01. Key Elements of Reinforcement Learning .ipynb
│   ├── 1.02. Basic Idea of Reinforcement Learning.ipynb
│   ├── 1.03. Reinforcement Learning Algorithm.ipynb
│   ├── 1.04. RL agent in the Grid World .ipynb
│   ├── 1.05. How RL differs from other ML paradigms?.ipynb
│   ├── 1.06. Markov Decision Processes.ipynb
│   ├── 1.07. Action space, Policy, Episode and Horizon.ipynb
│   ├── 1.08.  Return, Discount Factor and Math Essentials.ipynb
│   ├── 1.09 Value function and Q function.ipynb
│   ├── 1.10. Model-Based and Model-Free Learning .ipynb
│   ├── 1.11. Different Types of Environments.ipynb
│   ├── 1.12. Applications of Reinforcement Learning.ipynb
│   └── 1.13. Reinforcement Learning Glossary.ipynb
├── 02. A Guide to the Gym Toolkit/
│   ├── 2.02.  Creating our First Gym Environment.ipynb
│   ├── 2.05. Cart Pole Balancing with Random Policy.ipynb
│   └── README.md
├── 03. Bellman Equation and Dynamic Programming/
│   ├── .ipynb_checkpoints/
│   │   ├── 3.06. Solving the Frozen Lake Problem with Value Iteration-checkpoint.ipynb
│   │   └── 3.08. Solving the Frozen Lake Problem with Policy Iteration-checkpoint.ipynb
│   ├── 3.06. Solving the Frozen Lake Problem with Value Iteration.ipynb
│   ├── 3.08. Solving the Frozen Lake Problem with Policy Iteration.ipynb
│   └── README.md
├── 04. Monte Carlo Methods/
│   ├── .ipynb_checkpoints/
│   │   ├── 4.01. Understanding the Monte Carlo Method-checkpoint.ipynb
│   │   ├── 4.02.  Prediction and control tasks-checkpoint.ipynb
│   │   ├── 4.05. Every-visit MC Prediction with Blackjack Game-checkpoint.ipynb
│   │   ├── 4.06. First-visit MC Prediction with Blackjack Game-checkpoint.ipynb
│   │   └── 4.13. Implementing On-Policy MC Control-checkpoint.ipynb
│   ├── 4.13. Implementing On-Policy MC Control.ipynb
│   └── README.md
├── 05. Understanding Temporal Difference Learning/
│   ├── .ipynb_checkpoints/
│   │   ├── 5.03. Predicting the Value of States in a Frozen Lake Environment-checkpoint.ipynb
│   │   ├── 5.06. Computing Optimal Policy using SARSA-checkpoint.ipynb
│   │   └── 5.08. Computing the Optimal Policy using Q Learning-checkpoint.ipynb
│   ├── 5.03. Predicting the Value of States in a Frozen Lake Environment.ipynb
│   ├── 5.06. Computing Optimal Policy using SARSA.ipynb
│   ├── 5.08. Computing the Optimal Policy using Q Learning.ipynb
│   └── README.md
├── 06. Case Study: The MAB Problem/
│   ├── .ipynb_checkpoints/
│   │   ├── 6.01 .The MAB Problem-checkpoint.ipynb
│   │   ├── 6.04. Implementing epsilon-greedy -checkpoint.ipynb
│   │   ├── 6.06. Implementing Softmax Exploration-checkpoint.ipynb
│   │   ├── 6.08. Implementing UCB-checkpoint.ipynb
│   │   ├── 6.1-checkpoint.ipynb
│   │   ├── 6.10. Implementing Thompson Sampling-checkpoint.ipynb
│   │   └── 6.12. Finding the Best Advertisement Banner using Bandits-checkpoint.ipynb
│   ├── 6.01 .The MAB Problem.ipynb
│   ├── 6.03. Epsilon-Greedy.ipynb
│   ├── 6.04. Implementing epsilon-greedy .ipynb
│   ├── 6.06. Implementing Softmax Exploration.ipynb
│   ├── 6.08. Implementing UCB.ipynb
│   ├── 6.10. Implementing Thompson Sampling.ipynb
│   ├── 6.12. Finding the Best Advertisement Banner using Bandits.ipynb
│   └── README.md
├── 07. Deep learning foundations/
│   ├── .ipynb_checkpoints/
│   │   └── 7.05 Building Neural Network from scratch-checkpoint.ipynb
│   ├── 7.05 Building Neural Network from scratch.ipynb
│   └── README.md
├── 08. A primer on TensorFlow/
│   ├── .ipynb_checkpoints/
│   │   ├── 8.05 Handwritten digits classification using TensorFlow-checkpoint.ipynb
│   │   └── 8.10 MNIST digits classification in TensorFlow 2.0-checkpoint.ipynb
│   ├── 8.05 Handwritten digits classification using TensorFlow.ipynb
│   ├── 8.08 Math operations in TensorFlow.ipynb
│   ├── 8.10 MNIST digits classification in TensorFlow 2.0.ipynb
│   ├── README.md
│   └── graphs/
│       └── events.out.tfevents.1559122983.ml-dev
├── 09.  Deep Q Network and its Variants/
│   ├── .ipynb_checkpoints/
│   │   ├── 7.03. Playing Atari Games using DQN-Copy1-checkpoint.ipynb
│   │   ├── 7.03. Playing Atari Games using DQN-checkpoint.ipynb
│   │   └── 9.03. Playing Atari Games using DQN-checkpoint.ipynb
│   ├── 9.03. Playing Atari Games using DQN.ipynb
│   └── READEME.md
├── 10. Policy Gradient Method/
│   ├── .ipynb_checkpoints/
│   │   ├── 10.01. Why Policy based Methods-checkpoint.ipynb
│   │   ├── 10.02. Policy Gradient Intuition-checkpoint.ipynb
│   │   ├── 10.07. Cart Pole Balancing with Policy Gradient-checkpoint.ipynb
│   │   └── 8.07. Cart Pole Balancing with Policy Gradient-checkpoint.ipynb
│   ├── 10.07. Cart Pole Balancing with Policy Gradient.ipynb
│   └── README.md
├── 11. Actor Critic Methods - A2C and A3C/
│   ├── .ipynb_checkpoints/
│   │   ├── 11.01. Overview of actor critic method-checkpoint.ipynb
│   │   ├── 11.05. Mountain Car Climbing using A3C-checkpoint.ipynb
│   │   └── 9.05. Mountain Car Climbing using A3C-checkpoint.ipynb
│   ├── 11.05. Mountain Car Climbing using A3C.ipynb
│   ├── README.md
│   └── logs/
│       └── events.out.tfevents.1596718791.Sudharsan
├── 12. Learning DDPG, TD3 and SAC/
│   ├── .ipynb_checkpoints/
│   │   ├── 10.02. Swinging Up the Pendulum using DDPG -checkpoint.ipynb
│   │   ├── 12.01. DDPG-checkpoint.ipynb
│   │   ├── 12.02. Swinging Up the Pendulum using DDPG -checkpoint.ipynb
│   │   ├── 12.03. Twin delayed DDPG-checkpoint.ipynb
│   │   └── Swinging up the pendulum using DDPG -checkpoint.ipynb
│   ├── 12.05. Swinging Up the Pendulum using DDPG .ipynb
│   └── README.md
├── 13. TRPO, PPO and ACKTR Methods/
│   ├── .ipynb_checkpoints/
│   │   ├──  Implementing PPO-clipped method-checkpoint.ipynb
│   │   ├── 11.09. Implementing PPO-Clipped Method-checkpoint.ipynb
│   │   ├── 13.01. Trust Region Policy Optimization-checkpoint.ipynb
│   │   └── 13.09. Implementing PPO-Clipped Method-checkpoint.ipynb
│   ├── 13.09. Implementing PPO-Clipped Method.ipynb
│   └── README.md
├── 14. Distributional Reinforcement Learning/
│   ├── .ipynb_checkpoints/
│   │   ├── 12.03. Playing Atari games using Categorical DQN-checkpoint.ipynb
│   │   ├── 14.03. Playing Atari games using Categorical DQN-checkpoint.ipynb
│   │   ├── Playing Atari games using Categorical DQN-checkpoint.ipynb
│   │   └── c51 done-Copy1-checkpoint.ipynb
│   ├── 14.03. Playing Atari games using Categorical DQN.ipynb
│   └── README.md
├── 15. Imitation Learning and Inverse RL/
│   ├── .ipynb_checkpoints/
│   │   ├── 13.01. Supervised Imitation Learning -checkpoint.ipynb
│   │   └── 13.02. DAgger-checkpoint.ipynb
│   ├── 15.02. DAgger.ipynb
│   └── README.md
├── 16. Deep Reinforcement Learning with Stable Baselines/
│   ├── .ipynb_checkpoints/
│   │   ├── 14.01. Creating our First Agent with Baseline-checkpoint.ipynb
│   │   ├── 14.04. Playing Atari games with DQN and its variants-checkpoint.ipynb
│   │   ├── 14.05. Implementing DQN variants-checkpoint.ipynb
│   │   ├── 14.06. Lunar Lander using A2C-checkpoint.ipynb
│   │   ├── 14.07. Creating a custom network-checkpoint.ipynb
│   │   ├── 14.08. Swinging up a pendulum using DDPG-checkpoint.ipynb
│   │   ├── 16.01. Creating our First Agent with Stable Baseline-checkpoint.ipynb
│   │   ├── 16.04. Playing Atari games with DQN and its variants-checkpoint.ipynb
│   │   ├── 16.05. Implementing DQN variants-checkpoint.ipynb
│   │   ├── 16.06. Lunar Lander using A2C-checkpoint.ipynb
│   │   ├── 16.07. Creating a custom network-checkpoint.ipynb
│   │   ├── 16.08. Swinging up a pendulum using DDPG-checkpoint.ipynb
│   │   ├── 16.09. Training an agent to walk using TRPO-checkpoint.ipynb
│   │   ├── 16.10. Training cheetah bot to run using PPO-checkpoint.ipynb
│   │   ├── Creating a custom network-checkpoint.ipynb
│   │   ├── Implementing DQN variants-checkpoint.ipynb
│   │   ├── Lunar Lander using A2C-checkpoint.ipynb
│   │   ├── Playing Atari games with DQN and its variants-checkpoint.ipynb
│   │   ├── Swinging up a pendulum using DDPG-checkpoint.ipynb
│   │   ├── Training an agent to walk using TRPO-checkpoint.ipynb
│   │   ├── Training cheetah bot to run using PPO-checkpoint.ipynb
│   │   └── Untitled-checkpoint.ipynb
│   ├── 16.04. Playing Atari games with DQN and its variants.ipynb
│   ├── 16.05. Implementing DQN variants.ipynb
│   ├── 16.06. Lunar Lander using A2C.ipynb
│   ├── 16.07. Creating a custom network.ipynb
│   ├── 16.08. Swinging up a pendulum using DDPG.ipynb
│   ├── 16.09. Training an agent to walk using TRPO.ipynb
│   ├── 16.10. Training cheetah bot to run using PPO.ipynb
│   ├── README.md
│   └── logs/
│       └── DDPG_1/
│           └── events.out.tfevents.1582974711.Sudharsan
├── 17. Reinforcement Learning Frontiers/
│   ├── .ipynb_checkpoints/
│   │   └── 15.01. Meta Reinforcement Learning-checkpoint.ipynb
│   └── README.md
└── README.md