Repository: EmbersArc/gym_rocketLander
Branch: master
Commit: c421e2013014
Files: 6
Total size: 23.7 KB

Directory structure:
gitextract_j9lb45dy/

├── LICENSE
├── README.md
├── gym_rocketlander/
│   ├── __init__.py
│   └── envs/
│       ├── __init__.py
│       └── rocket_lander_env.py
└── setup.py

================================================
FILE CONTENTS
================================================

================================================
FILE: LICENSE
================================================
MIT License

Copyright (c) 2021 Sven Niederberger

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.


================================================
FILE: README.md
================================================
# ![GIF](https://thumbs.gfycat.com/CoarseEmbellishedIsopod-max-14mb.gif)
[Click here for higher quality video](https://gfycat.com/CoarseEmbellishedIsopod)

The objective of this environment is to land a rocket on a ship. The environment is highly customizable and takes discrete or continuous inputs.

### Installation
```
cd gym-rocketlander
pip install -e .
```

### Usage
Once the has been installed, it can be used like any other Gym environment:
```py
env = gym.make("gym_rocketlander:rocketlander-v0")
```

### STATE VARIABLES  
The state consists of the following variables:
  * x position  
  * y position  
  * angle  
  * first leg ground contact indicator  
  * second leg ground contact indicator  
  * throttle  
  * engine gimbal  
  
If VEL_STATE is set to true, the velocities are included:  
  * x velocity  
  * y velocity  
  * angular velocity  
  
### CONTROL INPUTS  
Discrete control inputs are:  
  * gimbal left  
  * gimbal right  
  * throttle up  
  * throttle down  
  * use first control thruster  
  * use second control thruster  
  * no action  
    
Continuous control inputs are:  
  * gimbal (left/right)  
  * throttle (up/down)  
  * control thruster (left/right)  


================================================
FILE: gym_rocketlander/__init__.py
================================================
from gym.envs.registration import register

register(
    id="rocketlander-v0",
    entry_point="gym_rocketlander.envs:RocketLander",
)

================================================
FILE: gym_rocketlander/envs/__init__.py
================================================
from gym_rocketlander.envs.rocket_lander_env import RocketLander


================================================
FILE: gym_rocketlander/envs/rocket_lander_env.py
================================================
import numpy as np
import Box2D
from Box2D.b2 import (
    fixtureDef,
    polygonShape,
    revoluteJointDef,
    distanceJointDef,
    contactListener,
)
import gym
from gym import spaces
from gym.utils import seeding
from gym.envs.classic_control import rendering

"""
The objective of this environment is to land a rocket on a ship.

STATE VARIABLES
The state consists of the following variables:
    - x position
    - y position
    - angle
    - first leg ground contact indicator
    - second leg ground contact indicator
    - throttle
    - engine gimbal
If VEL_STATE is set to true, the velocities are included:
    - x velocity
    - y velocity
    - angular velocity
all state variables are roughly in the range [-1, 1]

CONTROL INPUTS
Discrete control inputs are:
    - gimbal left
    - gimbal right
    - throttle up
    - throttle down
    - use first control thruster
    - use second control thruster
    - no action

Continuous control inputs are:
    - gimbal (left/right)
    - throttle (up/down)
    - control thruster (left/right)
"""

CONTINUOUS = True
VEL_STATE = True  # Add velocity info to state
FPS = 60
SCALE_S = 0.35  # Temporal Scaling, lower is faster - adjust forces appropriately
INITIAL_RANDOM = 0.4  # Random scaling of initial velocity, higher is more difficult

START_HEIGHT = 800.0
START_SPEED = 80.0

# ROCKET
MIN_THROTTLE = 0.4
GIMBAL_THRESHOLD = 0.4
MAIN_ENGINE_POWER = 1600 * SCALE_S
SIDE_ENGINE_POWER = 100 / FPS * SCALE_S

ROCKET_WIDTH = 3.66 * SCALE_S
ROCKET_HEIGHT = ROCKET_WIDTH / 3.7 * 47.9
ENGINE_HEIGHT = ROCKET_WIDTH * 0.5
ENGINE_WIDTH = ENGINE_HEIGHT * 0.7
THRUSTER_HEIGHT = ROCKET_HEIGHT * 0.86

# LEGS
LEG_LENGTH = ROCKET_WIDTH * 2.2
BASE_ANGLE = -0.27
SPRING_ANGLE = 0.27
LEG_AWAY = ROCKET_WIDTH / 2

# SHIP
SHIP_HEIGHT = ROCKET_WIDTH
SHIP_WIDTH = SHIP_HEIGHT * 40

# VIEWPORT
VIEWPORT_H = 720
VIEWPORT_W = 500
H = 1.1 * START_HEIGHT * SCALE_S
W = float(VIEWPORT_W) / VIEWPORT_H * H

# SMOKE FOR VISUALS
MAX_SMOKE_LIFETIME = 2 * FPS


class ContactDetector(contactListener):
    def __init__(self, env):
        contactListener.__init__(self)
        self.env = env

    def BeginContact(self, contact):
        if (
            self.env.water in [contact.fixtureA.body, contact.fixtureB.body]
            or self.env.lander in [contact.fixtureA.body, contact.fixtureB.body]
            or self.env.containers[0] in [contact.fixtureA.body, contact.fixtureB.body]
            or self.env.containers[1] in [contact.fixtureA.body, contact.fixtureB.body]
        ):
            self.env.game_over = True
        else:
            for i in range(2):
                if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                    self.env.legs[i].ground_contact = True

    def EndContact(self, contact):
        for i in range(2):
            if self.env.legs[i] in [contact.fixtureA.body, contact.fixtureB.body]:
                self.env.legs[i].ground_contact = False


class RocketLander(gym.Env):
    metadata = {"render.modes": ["human", "rgb_array"], "video.frames_per_second": FPS}

    def __init__(self):
        self._seed()
        self.viewer = None
        self.episode_number = 0

        self.world = Box2D.b2World()
        self.water = None
        self.lander = None
        self.engine = None
        self.ship = None
        self.legs = []

        high = np.array([1, 1, 1, 1, 1, 1, 1, np.inf, np.inf, np.inf], dtype=np.float32)
        low = -high
        if not VEL_STATE:
            high = high[0:7]
            low = low[0:7]

        self.observation_space = spaces.Box(low, high, dtype=np.float32)

        if CONTINUOUS:
            self.action_space = spaces.Box(-1.0, +1.0, (3,), dtype=np.float32)
        else:
            self.action_space = spaces.Discrete(7)

        self.reset()

    def _seed(self, seed=None):
        self.np_random, seed = seeding.np_random(seed)
        return [seed]

    def _destroy(self):
        if not self.water:
            return
        self.world.contactListener = None
        self.world.DestroyBody(self.water)
        self.water = None
        self.world.DestroyBody(self.lander)
        self.lander = None
        self.world.DestroyBody(self.ship)
        self.ship = None
        self.world.DestroyBody(self.legs[0])
        self.world.DestroyBody(self.legs[1])
        self.legs = []
        self.world.DestroyBody(self.containers[0])
        self.world.DestroyBody(self.containers[1])
        self.containers = []

    def reset(self):
        self._destroy()
        self.world.contactListener_keepref = ContactDetector(self)
        self.world.contactListener = self.world.contactListener_keepref
        self.game_over = False
        self.prev_shaping = None
        self.throttle = 0
        self.gimbal = 0.0
        self.landed_ticks = 0
        self.stepnumber = 0
        self.smoke = []

        # self.terrainheigth = self.np_random.uniform(H / 20, H / 10)
        self.terrainheigth = H / 20
        self.shipheight = self.terrainheigth + SHIP_HEIGHT
        # ship_pos = self.np_random.uniform(0, SHIP_WIDTH / SCALE) + SHIP_WIDTH / SCALE
        ship_pos = W / 2
        self.helipad_x1 = ship_pos - SHIP_WIDTH / 2
        self.helipad_x2 = self.helipad_x1 + SHIP_WIDTH
        self.helipad_y = self.terrainheigth + SHIP_HEIGHT

        self.water = self.world.CreateStaticBody(
            fixtures=fixtureDef(
                shape=polygonShape(
                    vertices=(
                        (0, 0),
                        (W, 0),
                        (W, self.terrainheigth),
                        (0, self.terrainheigth),
                    )
                ),
                friction=0.1,
                restitution=0.0,
            )
        )
        self.water.color1 = rgb(70, 96, 176)

        self.ship = self.world.CreateStaticBody(
            fixtures=fixtureDef(
                shape=polygonShape(
                    vertices=(
                        (self.helipad_x1, self.terrainheigth),
                        (self.helipad_x2, self.terrainheigth),
                        (self.helipad_x2, self.terrainheigth + SHIP_HEIGHT),
                        (self.helipad_x1, self.terrainheigth + SHIP_HEIGHT),
                    )
                ),
                friction=0.5,
                restitution=0.0,
            )
        )

        self.containers = []
        for side in [-1, 1]:
            self.containers.append(
                self.world.CreateStaticBody(
                    fixtures=fixtureDef(
                        shape=polygonShape(
                            vertices=(
                                (
                                    ship_pos + side * 0.95 * SHIP_WIDTH / 2,
                                    self.helipad_y,
                                ),
                                (
                                    ship_pos + side * 0.95 * SHIP_WIDTH / 2,
                                    self.helipad_y + SHIP_HEIGHT,
                                ),
                                (
                                    ship_pos
                                    + side * 0.95 * SHIP_WIDTH / 2
                                    - side * SHIP_HEIGHT,
                                    self.helipad_y + SHIP_HEIGHT,
                                ),
                                (
                                    ship_pos
                                    + side * 0.95 * SHIP_WIDTH / 2
                                    - side * SHIP_HEIGHT,
                                    self.helipad_y,
                                ),
                            )
                        ),
                        friction=0.2,
                        restitution=0.0,
                    )
                )
            )
            self.containers[-1].color1 = rgb(206, 206, 2)

        self.ship.color1 = (0.2, 0.2, 0.2)

        initial_x = W / 2 + W * np.random.uniform(-0.3, 0.3)
        initial_y = H * 0.95
        self.lander = self.world.CreateDynamicBody(
            position=(initial_x, initial_y),
            angle=0.0,
            fixtures=fixtureDef(
                shape=polygonShape(
                    vertices=(
                        (-ROCKET_WIDTH / 2, 0),
                        (+ROCKET_WIDTH / 2, 0),
                        (ROCKET_WIDTH / 2, +ROCKET_HEIGHT),
                        (-ROCKET_WIDTH / 2, +ROCKET_HEIGHT),
                    )
                ),
                density=1.0,
                friction=0.5,
                categoryBits=0x0010,
                maskBits=0x001,
                restitution=0.0,
            ),
        )

        self.lander.color1 = rgb(230, 230, 230)

        for i in [-1, +1]:
            leg = self.world.CreateDynamicBody(
                position=(initial_x - i * LEG_AWAY, initial_y + ROCKET_WIDTH * 0.2),
                angle=(i * BASE_ANGLE),
                fixtures=fixtureDef(
                    shape=polygonShape(
                        vertices=(
                            (0, 0),
                            (0, LEG_LENGTH / 25),
                            (i * LEG_LENGTH, 0),
                            (i * LEG_LENGTH, -LEG_LENGTH / 20),
                            (i * LEG_LENGTH / 3, -LEG_LENGTH / 7),
                        )
                    ),
                    density=1,
                    restitution=0.0,
                    friction=0.2,
                    categoryBits=0x0020,
                    maskBits=0x001,
                ),
            )
            leg.ground_contact = False
            leg.color1 = (0.25, 0.25, 0.25)
            rjd = revoluteJointDef(
                bodyA=self.lander,
                bodyB=leg,
                localAnchorA=(i * LEG_AWAY, ROCKET_WIDTH * 0.2),
                localAnchorB=(0, 0),
                enableLimit=True,
                maxMotorTorque=2500.0,
                motorSpeed=-0.05 * i,
                enableMotor=True,
            )
            djd = distanceJointDef(
                bodyA=self.lander,
                bodyB=leg,
                anchorA=(i * LEG_AWAY, ROCKET_HEIGHT / 8),
                anchorB=leg.fixtures[0].body.transform * (i * LEG_LENGTH, 0),
                collideConnected=False,
                frequencyHz=0.01,
                dampingRatio=0.9,
            )
            if i == 1:
                rjd.lowerAngle = -SPRING_ANGLE
                rjd.upperAngle = 0
            else:
                rjd.lowerAngle = 0
                rjd.upperAngle = +SPRING_ANGLE
            leg.joint = self.world.CreateJoint(rjd)
            leg.joint2 = self.world.CreateJoint(djd)

            self.legs.append(leg)

        self.lander.linearVelocity = (
            -self.np_random.uniform(0, INITIAL_RANDOM)
            * START_SPEED
            * (initial_x - W / 2)
            / W,
            -START_SPEED,
        )

        self.lander.angularVelocity = (1 + INITIAL_RANDOM) * np.random.uniform(-1, 1)

        self.drawlist = (
            self.legs + [self.water] + [self.ship] + self.containers + [self.lander]
        )

        if CONTINUOUS:
            return self.step([0, 0, 0])[0]
        else:
            return self.step(6)[0]

    def step(self, action):

        self.force_dir = 0

        if CONTINUOUS:
            np.clip(action, -1, 1)
            self.gimbal += action[0] * 0.15 / FPS
            self.throttle += action[1] * 0.5 / FPS
            if action[2] > 0.5:
                self.force_dir = 1
            elif action[2] < -0.5:
                self.force_dir = -1
        else:
            if action == 0:
                self.gimbal += 0.01
            elif action == 1:
                self.gimbal -= 0.01
            elif action == 2:
                self.throttle += 0.01
            elif action == 3:
                self.throttle -= 0.01
            elif action == 4:  # left
                self.force_dir = -1
            elif action == 5:  # right
                self.force_dir = 1

        self.gimbal = np.clip(self.gimbal, -GIMBAL_THRESHOLD, GIMBAL_THRESHOLD)
        self.throttle = np.clip(self.throttle, 0.0, 1.0)
        self.power = (
            0
            if self.throttle == 0.0
            else MIN_THROTTLE + self.throttle * (1 - MIN_THROTTLE)
        )

        # main engine force
        force_pos = (self.lander.position[0], self.lander.position[1])
        force = (
            -np.sin(self.lander.angle + self.gimbal) * MAIN_ENGINE_POWER * self.power,
            np.cos(self.lander.angle + self.gimbal) * MAIN_ENGINE_POWER * self.power,
        )
        self.lander.ApplyForce(force=force, point=force_pos, wake=False)

        # control thruster force
        force_pos_c = self.lander.position + THRUSTER_HEIGHT * np.array(
            (np.sin(self.lander.angle), np.cos(self.lander.angle))
        )
        force_c = (
            -self.force_dir * np.cos(self.lander.angle) * SIDE_ENGINE_POWER,
            self.force_dir * np.sin(self.lander.angle) * SIDE_ENGINE_POWER,
        )
        self.lander.ApplyLinearImpulse(impulse=force_c, point=force_pos_c, wake=False)

        self.world.Step(1.0 / FPS, 60, 60)

        pos = self.lander.position
        vel_l = np.array(self.lander.linearVelocity) / START_SPEED
        vel_a = self.lander.angularVelocity
        x_distance = (pos.x - W / 2) / W
        y_distance = (pos.y - self.shipheight) / (H - self.shipheight)

        angle = (self.lander.angle / np.pi) % 2
        if angle > 1:
            angle -= 2

        state = [
            2 * x_distance,
            2 * (y_distance - 0.5),
            angle,
            1.0 if self.legs[0].ground_contact else 0.0,
            1.0 if self.legs[1].ground_contact else 0.0,
            2 * (self.throttle - 0.5),
            (self.gimbal / GIMBAL_THRESHOLD),
        ]
        if VEL_STATE:
            state.extend([vel_l[0], vel_l[1], vel_a])

        # REWARD -------------------------------------------------------------------------------------------------------

        # state variables for reward
        distance = np.linalg.norm(
            (3 * x_distance, y_distance)
        )  # weight x position more
        speed = np.linalg.norm(vel_l)
        groundcontact = self.legs[0].ground_contact or self.legs[1].ground_contact
        brokenleg = (
            self.legs[0].joint.angle < 0 or self.legs[1].joint.angle > -0
        ) and groundcontact
        outside = abs(pos.x - W / 2) > W / 2 or pos.y > H
        fuelcost = 0.1 * (0.5 * self.power + abs(self.force_dir)) / FPS
        landed = (
            self.legs[0].ground_contact and self.legs[1].ground_contact and speed < 0.1
        )
        done = False

        reward = -fuelcost

        if outside or brokenleg:
            self.game_over = True

        if self.game_over:
            done = True
        else:
            # reward shaping
            shaping = -0.5 * (distance + speed + abs(angle) ** 2 + abs(vel_a) ** 2)
            shaping += 0.1 * (self.legs[0].ground_contact + self.legs[1].ground_contact)
            if self.prev_shaping is not None:
                reward += shaping - self.prev_shaping
            self.prev_shaping = shaping

            if landed:
                self.landed_ticks += 1
            else:
                self.landed_ticks = 0
            if self.landed_ticks == FPS:
                reward = 1.0
                done = True

        if done:
            reward += max(-1, 0 - 2 * (speed + distance + abs(angle) + abs(vel_a)))

        reward = np.clip(reward, -1, 1)

        # REWARD -------------------------------------------------------------------------------------------------------

        self.stepnumber += 1

        return np.array(state), reward, done, {}

    def render(self, mode="human", close=False):
        if close:
            if self.viewer is not None:
                self.viewer.close()
                self.viewer = None
            return

        if self.viewer is None:

            self.viewer = rendering.Viewer(VIEWPORT_W, VIEWPORT_H)
            self.viewer.set_bounds(0, W, 0, H)

            sky = rendering.FilledPolygon(((0, 0), (0, H), (W, H), (W, 0)))
            self.sky_color = rgb(126, 150, 233)
            sky.set_color(*self.sky_color)
            self.sky_color_half_transparent = (
                np.array((np.array(self.sky_color) + rgb(255, 255, 255))) / 2
            )
            self.viewer.add_geom(sky)

            self.rockettrans = rendering.Transform()

            engine = rendering.FilledPolygon(
                (
                    (0, 0),
                    (ENGINE_WIDTH / 2, -ENGINE_HEIGHT),
                    (-ENGINE_WIDTH / 2, -ENGINE_HEIGHT),
                )
            )
            self.enginetrans = rendering.Transform()
            engine.add_attr(self.enginetrans)
            engine.add_attr(self.rockettrans)
            engine.set_color(0.4, 0.4, 0.4)
            self.viewer.add_geom(engine)

            self.fire = rendering.FilledPolygon(
                (
                    (ENGINE_WIDTH * 0.4, 0),
                    (-ENGINE_WIDTH * 0.4, 0),
                    (-ENGINE_WIDTH * 1.2, -ENGINE_HEIGHT * 5),
                    (0, -ENGINE_HEIGHT * 8),
                    (ENGINE_WIDTH * 1.2, -ENGINE_HEIGHT * 5),
                )
            )
            self.fire.set_color(*rgb(255, 230, 107))
            self.firescale = rendering.Transform(scale=(1, 1))
            self.firetrans = rendering.Transform(translation=(0, -ENGINE_HEIGHT))
            self.fire.add_attr(self.firescale)
            self.fire.add_attr(self.firetrans)
            self.fire.add_attr(self.enginetrans)
            self.fire.add_attr(self.rockettrans)

            smoke = rendering.FilledPolygon(
                (
                    (ROCKET_WIDTH / 2, THRUSTER_HEIGHT * 1),
                    (ROCKET_WIDTH * 3, THRUSTER_HEIGHT * 1.03),
                    (ROCKET_WIDTH * 4, THRUSTER_HEIGHT * 1),
                    (ROCKET_WIDTH * 3, THRUSTER_HEIGHT * 0.97),
                )
            )
            smoke.set_color(*self.sky_color_half_transparent)
            self.smokescale = rendering.Transform(scale=(1, 1))
            smoke.add_attr(self.smokescale)
            smoke.add_attr(self.rockettrans)
            self.viewer.add_geom(smoke)

            self.gridfins = []
            for i in (-1, 1):
                finpoly = (
                    (i * ROCKET_WIDTH * 1.1, THRUSTER_HEIGHT * 1.01),
                    (i * ROCKET_WIDTH * 0.4, THRUSTER_HEIGHT * 1.01),
                    (i * ROCKET_WIDTH * 0.4, THRUSTER_HEIGHT * 0.99),
                    (i * ROCKET_WIDTH * 1.1, THRUSTER_HEIGHT * 0.99),
                )
                gridfin = rendering.FilledPolygon(finpoly)
                gridfin.add_attr(self.rockettrans)
                gridfin.set_color(0.25, 0.25, 0.25)
                self.gridfins.append(gridfin)

        if self.stepnumber % round(FPS / 10) == 0 and self.power > 0:
            s = [
                MAX_SMOKE_LIFETIME * self.power,  # total lifetime
                0,  # current lifetime
                self.power * (1 + 0.2 * np.random.random()),  # size
                np.array(self.lander.position)
                + self.power
                * ROCKET_WIDTH
                * 10
                * np.array(
                    (
                        np.sin(self.lander.angle + self.gimbal),
                        -np.cos(self.lander.angle + self.gimbal),
                    )
                )
                + self.power * 5 * (np.random.random(2) - 0.5),
            ]  # position
            self.smoke.append(s)

        for s in self.smoke:
            s[1] += 1
            if s[1] > s[0]:
                self.smoke.remove(s)
                continue
            t = rendering.Transform(translation=(s[3][0], s[3][1] + H * s[1] / 2000))
            self.viewer.draw_circle(
                radius=0.05 * s[1] + s[2],
                color=self.sky_color
                + (1 - (2 * s[1] / s[0] - 1) ** 2)
                / 3
                * (self.sky_color_half_transparent - self.sky_color),
            ).add_attr(t)

        self.viewer.add_onetime(self.fire)
        for g in self.gridfins:
            self.viewer.add_onetime(g)

        for obj in self.drawlist:
            for f in obj.fixtures:
                trans = f.body.transform
                path = [trans * v for v in f.shape.vertices]
                self.viewer.draw_polygon(path, color=obj.color1)

        for l in zip(self.legs, [-1, 1]):
            path = [
                self.lander.fixtures[0].body.transform
                * (l[1] * ROCKET_WIDTH / 2, ROCKET_HEIGHT / 8),
                l[0].fixtures[0].body.transform * (l[1] * LEG_LENGTH * 0.8, 0),
            ]
            self.viewer.draw_polyline(
                path, color=self.ship.color1, linewidth=1 if START_HEIGHT > 500 else 2
            )

        self.viewer.draw_polyline(
            (
                (self.helipad_x2, self.terrainheigth + SHIP_HEIGHT),
                (self.helipad_x1, self.terrainheigth + SHIP_HEIGHT),
            ),
            color=rgb(206, 206, 2),
            linewidth=1,
        )

        self.rockettrans.set_translation(*self.lander.position)
        self.rockettrans.set_rotation(self.lander.angle)
        self.enginetrans.set_rotation(self.gimbal)
        self.firescale.set_scale(newx=1, newy=self.power * np.random.uniform(1, 1.3))
        self.smokescale.set_scale(newx=self.force_dir, newy=1)

        return self.viewer.render(return_rgb_array=mode == "rgb_array")


def rgb(r, g, b):
    return float(r) / 255, float(g) / 255, float(b) / 255


================================================
FILE: setup.py
================================================
from setuptools import setup

setup(
    name="gym_rocketlander",
    version="0.0.1",
    install_requires=["gym", "Box2D"],
)