gitextract_jqc__b22/ ├── 01. Fundamentals of Reinforcement Learning/ │ ├── .ipynb_checkpoints/ │ │ ├── 1.01. Basic Idea of Reinforcement Learning -checkpoint.ipynb │ │ ├── 1.02. Key Elements of Reinforcement Learning -checkpoint.ipynb │ │ ├── 1.03. Reinforcement Learning Algorithm-checkpoint.ipynb │ │ ├── 1.04. RL agent in the Grid World -checkpoint.ipynb │ │ ├── 1.05. How RL differs from other ML paradigms?-checkpoint.ipynb │ │ ├── 1.06. Markov Decision Processes-checkpoint.ipynb │ │ └── 1.07. Action space, Policy, Episode and Horizon-checkpoint.ipynb │ ├── 1.01. Key Elements of Reinforcement Learning .ipynb │ ├── 1.02. Basic Idea of Reinforcement Learning.ipynb │ ├── 1.03. Reinforcement Learning Algorithm.ipynb │ ├── 1.04. RL agent in the Grid World .ipynb │ ├── 1.05. How RL differs from other ML paradigms?.ipynb │ ├── 1.06. Markov Decision Processes.ipynb │ ├── 1.07. Action space, Policy, Episode and Horizon.ipynb │ ├── 1.08. Return, Discount Factor and Math Essentials.ipynb │ ├── 1.09 Value function and Q function.ipynb │ ├── 1.10. Model-Based and Model-Free Learning .ipynb │ ├── 1.11. Different Types of Environments.ipynb │ ├── 1.12. Applications of Reinforcement Learning.ipynb │ └── 1.13. Reinforcement Learning Glossary.ipynb ├── 02. A Guide to the Gym Toolkit/ │ ├── 2.02. Creating our First Gym Environment.ipynb │ ├── 2.05. Cart Pole Balancing with Random Policy.ipynb │ └── README.md ├── 03. Bellman Equation and Dynamic Programming/ │ ├── .ipynb_checkpoints/ │ │ ├── 3.06. Solving the Frozen Lake Problem with Value Iteration-checkpoint.ipynb │ │ └── 3.08. Solving the Frozen Lake Problem with Policy Iteration-checkpoint.ipynb │ ├── 3.06. Solving the Frozen Lake Problem with Value Iteration.ipynb │ ├── 3.08. Solving the Frozen Lake Problem with Policy Iteration.ipynb │ └── README.md ├── 04. Monte Carlo Methods/ │ ├── .ipynb_checkpoints/ │ │ ├── 4.01. Understanding the Monte Carlo Method-checkpoint.ipynb │ │ ├── 4.02. Prediction and control tasks-checkpoint.ipynb │ │ ├── 4.05. Every-visit MC Prediction with Blackjack Game-checkpoint.ipynb │ │ ├── 4.06. First-visit MC Prediction with Blackjack Game-checkpoint.ipynb │ │ └── 4.13. Implementing On-Policy MC Control-checkpoint.ipynb │ ├── 4.13. Implementing On-Policy MC Control.ipynb │ └── README.md ├── 05. Understanding Temporal Difference Learning/ │ ├── .ipynb_checkpoints/ │ │ ├── 5.03. Predicting the Value of States in a Frozen Lake Environment-checkpoint.ipynb │ │ ├── 5.06. Computing Optimal Policy using SARSA-checkpoint.ipynb │ │ └── 5.08. Computing the Optimal Policy using Q Learning-checkpoint.ipynb │ ├── 5.03. Predicting the Value of States in a Frozen Lake Environment.ipynb │ ├── 5.06. Computing Optimal Policy using SARSA.ipynb │ ├── 5.08. Computing the Optimal Policy using Q Learning.ipynb │ └── README.md ├── 06. Case Study: The MAB Problem/ │ ├── .ipynb_checkpoints/ │ │ ├── 6.01 .The MAB Problem-checkpoint.ipynb │ │ ├── 6.04. Implementing epsilon-greedy -checkpoint.ipynb │ │ ├── 6.06. Implementing Softmax Exploration-checkpoint.ipynb │ │ ├── 6.08. Implementing UCB-checkpoint.ipynb │ │ ├── 6.1-checkpoint.ipynb │ │ ├── 6.10. Implementing Thompson Sampling-checkpoint.ipynb │ │ └── 6.12. Finding the Best Advertisement Banner using Bandits-checkpoint.ipynb │ ├── 6.01 .The MAB Problem.ipynb │ ├── 6.03. Epsilon-Greedy.ipynb │ ├── 6.04. Implementing epsilon-greedy .ipynb │ ├── 6.06. Implementing Softmax Exploration.ipynb │ ├── 6.08. Implementing UCB.ipynb │ ├── 6.10. Implementing Thompson Sampling.ipynb │ ├── 6.12. Finding the Best Advertisement Banner using Bandits.ipynb │ └── README.md ├── 07. Deep learning foundations/ │ ├── .ipynb_checkpoints/ │ │ └── 7.05 Building Neural Network from scratch-checkpoint.ipynb │ ├── 7.05 Building Neural Network from scratch.ipynb │ └── README.md ├── 08. A primer on TensorFlow/ │ ├── .ipynb_checkpoints/ │ │ ├── 8.05 Handwritten digits classification using TensorFlow-checkpoint.ipynb │ │ └── 8.10 MNIST digits classification in TensorFlow 2.0-checkpoint.ipynb │ ├── 8.05 Handwritten digits classification using TensorFlow.ipynb │ ├── 8.08 Math operations in TensorFlow.ipynb │ ├── 8.10 MNIST digits classification in TensorFlow 2.0.ipynb │ ├── README.md │ └── graphs/ │ └── events.out.tfevents.1559122983.ml-dev ├── 09. Deep Q Network and its Variants/ │ ├── .ipynb_checkpoints/ │ │ ├── 7.03. Playing Atari Games using DQN-Copy1-checkpoint.ipynb │ │ ├── 7.03. Playing Atari Games using DQN-checkpoint.ipynb │ │ └── 9.03. Playing Atari Games using DQN-checkpoint.ipynb │ ├── 9.03. Playing Atari Games using DQN.ipynb │ └── READEME.md ├── 10. Policy Gradient Method/ │ ├── .ipynb_checkpoints/ │ │ ├── 10.01. Why Policy based Methods-checkpoint.ipynb │ │ ├── 10.02. Policy Gradient Intuition-checkpoint.ipynb │ │ ├── 10.07. Cart Pole Balancing with Policy Gradient-checkpoint.ipynb │ │ └── 8.07. Cart Pole Balancing with Policy Gradient-checkpoint.ipynb │ ├── 10.07. Cart Pole Balancing with Policy Gradient.ipynb │ └── README.md ├── 11. Actor Critic Methods - A2C and A3C/ │ ├── .ipynb_checkpoints/ │ │ ├── 11.01. Overview of actor critic method-checkpoint.ipynb │ │ ├── 11.05. Mountain Car Climbing using A3C-checkpoint.ipynb │ │ └── 9.05. Mountain Car Climbing using A3C-checkpoint.ipynb │ ├── 11.05. Mountain Car Climbing using A3C.ipynb │ ├── README.md │ └── logs/ │ └── events.out.tfevents.1596718791.Sudharsan ├── 12. Learning DDPG, TD3 and SAC/ │ ├── .ipynb_checkpoints/ │ │ ├── 10.02. Swinging Up the Pendulum using DDPG -checkpoint.ipynb │ │ ├── 12.01. DDPG-checkpoint.ipynb │ │ ├── 12.02. Swinging Up the Pendulum using DDPG -checkpoint.ipynb │ │ ├── 12.03. Twin delayed DDPG-checkpoint.ipynb │ │ └── Swinging up the pendulum using DDPG -checkpoint.ipynb │ ├── 12.05. Swinging Up the Pendulum using DDPG .ipynb │ └── README.md ├── 13. TRPO, PPO and ACKTR Methods/ │ ├── .ipynb_checkpoints/ │ │ ├── Implementing PPO-clipped method-checkpoint.ipynb │ │ ├── 11.09. Implementing PPO-Clipped Method-checkpoint.ipynb │ │ ├── 13.01. Trust Region Policy Optimization-checkpoint.ipynb │ │ └── 13.09. Implementing PPO-Clipped Method-checkpoint.ipynb │ ├── 13.09. Implementing PPO-Clipped Method.ipynb │ └── README.md ├── 14. Distributional Reinforcement Learning/ │ ├── .ipynb_checkpoints/ │ │ ├── 12.03. Playing Atari games using Categorical DQN-checkpoint.ipynb │ │ ├── 14.03. Playing Atari games using Categorical DQN-checkpoint.ipynb │ │ ├── Playing Atari games using Categorical DQN-checkpoint.ipynb │ │ └── c51 done-Copy1-checkpoint.ipynb │ ├── 14.03. Playing Atari games using Categorical DQN.ipynb │ └── README.md ├── 15. Imitation Learning and Inverse RL/ │ ├── .ipynb_checkpoints/ │ │ ├── 13.01. Supervised Imitation Learning -checkpoint.ipynb │ │ └── 13.02. DAgger-checkpoint.ipynb │ ├── 15.02. DAgger.ipynb │ └── README.md ├── 16. Deep Reinforcement Learning with Stable Baselines/ │ ├── .ipynb_checkpoints/ │ │ ├── 14.01. Creating our First Agent with Baseline-checkpoint.ipynb │ │ ├── 14.04. Playing Atari games with DQN and its variants-checkpoint.ipynb │ │ ├── 14.05. Implementing DQN variants-checkpoint.ipynb │ │ ├── 14.06. Lunar Lander using A2C-checkpoint.ipynb │ │ ├── 14.07. Creating a custom network-checkpoint.ipynb │ │ ├── 14.08. Swinging up a pendulum using DDPG-checkpoint.ipynb │ │ ├── 16.01. Creating our First Agent with Stable Baseline-checkpoint.ipynb │ │ ├── 16.04. Playing Atari games with DQN and its variants-checkpoint.ipynb │ │ ├── 16.05. Implementing DQN variants-checkpoint.ipynb │ │ ├── 16.06. Lunar Lander using A2C-checkpoint.ipynb │ │ ├── 16.07. Creating a custom network-checkpoint.ipynb │ │ ├── 16.08. Swinging up a pendulum using DDPG-checkpoint.ipynb │ │ ├── 16.09. Training an agent to walk using TRPO-checkpoint.ipynb │ │ ├── 16.10. Training cheetah bot to run using PPO-checkpoint.ipynb │ │ ├── Creating a custom network-checkpoint.ipynb │ │ ├── Implementing DQN variants-checkpoint.ipynb │ │ ├── Lunar Lander using A2C-checkpoint.ipynb │ │ ├── Playing Atari games with DQN and its variants-checkpoint.ipynb │ │ ├── Swinging up a pendulum using DDPG-checkpoint.ipynb │ │ ├── Training an agent to walk using TRPO-checkpoint.ipynb │ │ ├── Training cheetah bot to run using PPO-checkpoint.ipynb │ │ └── Untitled-checkpoint.ipynb │ ├── 16.04. Playing Atari games with DQN and its variants.ipynb │ ├── 16.05. Implementing DQN variants.ipynb │ ├── 16.06. Lunar Lander using A2C.ipynb │ ├── 16.07. Creating a custom network.ipynb │ ├── 16.08. Swinging up a pendulum using DDPG.ipynb │ ├── 16.09. Training an agent to walk using TRPO.ipynb │ ├── 16.10. Training cheetah bot to run using PPO.ipynb │ ├── README.md │ └── logs/ │ └── DDPG_1/ │ └── events.out.tfevents.1582974711.Sudharsan ├── 17. Reinforcement Learning Frontiers/ │ ├── .ipynb_checkpoints/ │ │ └── 15.01. Meta Reinforcement Learning-checkpoint.ipynb │ └── README.md └── README.md