Full Code of NathanEpstein/reinforce for AI

master 06a698c91da1 cached
9 files
7.8 KB
2.1k tokens
20 symbols
1 requests
Download .txt
Repository: NathanEpstein/reinforce
Branch: master
Commit: 06a698c91da1
Files: 9
Total size: 7.8 KB

Directory structure:
gitextract_z1jayhda/

├── .gitignore
├── README.md
├── reinforce/
│   ├── __init__.py
│   ├── encoding.py
│   ├── learn.py
│   ├── policy.py
│   ├── rewards.py
│   └── transitions.py
└── setup.py

================================================
FILE CONTENTS
================================================

================================================
FILE: .gitignore
================================================
*.pyc
*.egg-info
dist
.DS_Store


================================================
FILE: README.md
================================================
# reinforce

<img src="./MDP.png">

A 'plug and play' reinforcement learning library in Python.

Infers a Markov Decision Process from data and solves for the optimal policy.

Implementation based on Andrew Ng's <a href="https://web.cs.wpi.edu/~kmlee/cs539/cs229-notes12.pdf">notes.</a>

More information related to this project can be found <a href="https://github.com/NathanEpstein/pydata-reinforce">here.</a>

## Example Usage

```python

observations = [
  { 'state_transitions': [
      { 'state': 'low', 'action': 'climb', 'state_': 'mid' },
      { 'state': 'mid', 'action': 'climb', 'state_': 'high' },
      { 'state': 'high', 'action': 'sink', 'state_': 'mid' },
      { 'state': 'mid', 'action': 'sink', 'state_': 'low' },
      { 'state': 'low', 'action': 'sink', 'state_': 'bottom' }
    ],
    'reward': 0
  },
  { 'state_transitions': [
      { 'state': 'low', 'action': 'climb', 'state_': 'mid' },
      { 'state': 'mid', 'action': 'climb', 'state_': 'high' },
      { 'state': 'high', 'action': 'climb', 'state_': 'top' },
    ],
    'reward': 0
  }
]

trap_states = [
  {
    'state_transitions': [
      { 'state': 'bottom', 'action': 'sink', 'state_': 'bottom' },
      { 'state': 'bottom', 'action': 'climb', 'state_': 'bottom' }
    ],
    'reward': 0
  },
  {
    'state_transitions': [
      { 'state': 'top', 'action': 'sink', 'state_': 'top' },
      { 'state': 'top', 'action': 'climb', 'state_': 'top' },
    ],
    'reward': 1
  },
]

from learn import MarkovAgent
mark = MarkovAgent(observations + trap_states)
mark.learn()

print(mark.policy)
# {'high': 'climb', 'top': 'sink', 'bottom': 'sink', 'low': 'climb', 'mid': 'climb'}
# NOTE: policy in top and bottom states is chosen randomly (doesn't affect state)

```


================================================
FILE: reinforce/__init__.py
================================================
from .learn import *

================================================
FILE: reinforce/encoding.py
================================================
class StateActionEncoder:
  def __init__(self, observations):
    self.observations = observations
    self._parse_states_and_actions()

  def parse_dimensions(self):
    return {
      'state_count': len(self.int_to_state),
      'action_count': len(self.int_to_action)
    }

  def observations_to_int(self):
    for observation in self.observations:
      for transition in observation['state_transitions']:
        transition['state'] = self.state_to_int[transition['state']]
        transition['state_'] = self.state_to_int[transition['state_']]
        transition['action'] = self.action_to_int[transition['action']]

  def parse_encoded_policy(self, encoded_policy):
    policy = {}
    for index, encoded_action in enumerate(encoded_policy):
      state = self.int_to_state[index]
      action = self.int_to_action[int(encoded_action)]
      policy[state] = action

    return policy

  def _parse_states_and_actions(self):
    state_dict, action_dict = {}, {}
    state_array, action_array = [], []
    state_index, action_index = 0, 0

    for observation in self.observations:
      for transition in observation['state_transitions']:
        state = transition['state']
        action = transition['action']

        if state not in state_dict.keys():
          state_dict[state] = state_index
          state_array.append(state)
          state_index += 1

        if action not in action_dict.keys():
          action_dict[action] = action_index
          action_array.append(action)
          action_index += 1

    self.state_to_int = state_dict
    self.action_to_int = action_dict
    self.int_to_state = state_array
    self.int_to_action = action_array



================================================
FILE: reinforce/learn.py
================================================
from encoding import StateActionEncoder
from rewards import RewardParser
from transitions import TransitionParser
from policy import PolicyParser

class MarkovAgent:
  def __init__(self, observations):
    # encode observation data as int values
    self.state_action_encoder = StateActionEncoder(observations)
    self.state_action_encoder.observations_to_int()
    dimensions = self.state_action_encoder.parse_dimensions()

    # create reward, transition, and policy parsers
    self.reward_parser = RewardParser(observations, dimensions)
    self.transition_parser = TransitionParser(observations, dimensions)
    self.policy_parser = PolicyParser(dimensions)

  def learn(self):
    R = self.reward_parser.rewards()
    P = self.transition_parser.transition_probabilities()

    # learn int-encoded policy and convert to readable dictionary
    encoded_policy = self.policy_parser.policy(P, R)
    self.policy = self.state_action_encoder.parse_encoded_policy(encoded_policy)


================================================
FILE: reinforce/policy.py
================================================
import numpy as np

class PolicyParser:
  def __init__(self, dimensions):
    self.state_count = dimensions['state_count']
    self.action_count = dimensions['action_count']

  def policy(self, P, rewards):
    print('COMPUTING POLICY')

    best_policy = np.zeros(self.state_count)
    state_values = np.zeros(self.state_count)

    GAMMA = 0.9
    ITERATIONS = 125
    for i in range(ITERATIONS):
      print ("iteration: {0} / {1}".format(i + 1, ITERATIONS))

      for state in range(0, self.state_count):
        state_value = -float('Inf')

        for action in range(0, self.action_count):
          action_value = 0

          for state_ in range(0, self.state_count):
            action_value += (P[state][action][state_] * state_values[state_] * GAMMA)

          if (action_value >= state_value):
            state_value = action_value
            best_policy[state] = action

        state_values[state] = rewards[state] + state_value

    return best_policy

================================================
FILE: reinforce/rewards.py
================================================
import numpy as np

class RewardParser:
  def __init__(self, observations, dimensions):
    self.observations = observations
    self.state_count = dimensions['state_count']

  def rewards(self):
    print('COMPUTING REWARDS')
    total_state_rewards = np.zeros(self.state_count)
    total_state_visits = np.zeros(self.state_count)

    for observation in self.observations:
      visits = float(len(observation['state_transitions']))
      reward_per_visit = observation['reward'] / visits

      for state_transition in observation['state_transitions']:
        state = state_transition['state']
        total_state_rewards[state] += reward_per_visit
        total_state_visits[state] += 1

    average_state_rewards = total_state_rewards / total_state_visits
    average_state_rewards = np.nan_to_num(average_state_rewards)

    return average_state_rewards

================================================
FILE: reinforce/transitions.py
================================================
import numpy as np

class TransitionParser:
  def __init__(self, observations, dimensions):
    self.observations = observations
    self.state_count = dimensions['state_count']
    self.action_count = dimensions['action_count']

  def transition_probabilities(self):
    print('COMPUTING TRANSITIONS')
    transition_count = self._count_transitions()
    return self._parse_probabilities(transition_count)

  def _count_transitions(self):
    transition_count = np.zeros((self.state_count, self.action_count, self.state_count))

    for observation in self.observations:
      for state_transition in observation['state_transitions']:
        state = state_transition['state']
        action = state_transition['action']
        state_ = state_transition['state_']

        transition_count[state][action][state_] += 1

    return transition_count

  def _parse_probabilities(self, transition_count):
    P = np.zeros((self.state_count, self.action_count, self.state_count))

    for state in range(0, self.state_count):
      for action in range(0, self.action_count):

        total_transitions = float(sum(transition_count[state][action]))

        if (total_transitions > 0):
          P[state][action] = transition_count[state][action] / total_transitions
        else:
          P[state][action] = 1.0 / self.state_count

    return P

================================================
FILE: setup.py
================================================
from setuptools import setup

setup(name='reinforce',
      version='0.2.0',
      description='plug and play reinforcement learning',
      url='http://github.com/nathanepstein/reinforce',
      author='Nathan Epstein',
      author_email='ne2210@columbia.edu',
      license='MIT',
      packages=['reinforce'],
      )
Download .txt
gitextract_z1jayhda/

├── .gitignore
├── README.md
├── reinforce/
│   ├── __init__.py
│   ├── encoding.py
│   ├── learn.py
│   ├── policy.py
│   ├── rewards.py
│   └── transitions.py
└── setup.py
Download .txt
SYMBOL INDEX (20 symbols across 5 files)

FILE: reinforce/encoding.py
  class StateActionEncoder (line 1) | class StateActionEncoder:
    method __init__ (line 2) | def __init__(self, observations):
    method parse_dimensions (line 6) | def parse_dimensions(self):
    method observations_to_int (line 12) | def observations_to_int(self):
    method parse_encoded_policy (line 19) | def parse_encoded_policy(self, encoded_policy):
    method _parse_states_and_actions (line 28) | def _parse_states_and_actions(self):

FILE: reinforce/learn.py
  class MarkovAgent (line 6) | class MarkovAgent:
    method __init__ (line 7) | def __init__(self, observations):
    method learn (line 18) | def learn(self):

FILE: reinforce/policy.py
  class PolicyParser (line 3) | class PolicyParser:
    method __init__ (line 4) | def __init__(self, dimensions):
    method policy (line 8) | def policy(self, P, rewards):

FILE: reinforce/rewards.py
  class RewardParser (line 3) | class RewardParser:
    method __init__ (line 4) | def __init__(self, observations, dimensions):
    method rewards (line 8) | def rewards(self):

FILE: reinforce/transitions.py
  class TransitionParser (line 3) | class TransitionParser:
    method __init__ (line 4) | def __init__(self, observations, dimensions):
    method transition_probabilities (line 9) | def transition_probabilities(self):
    method _count_transitions (line 14) | def _count_transitions(self):
    method _parse_probabilities (line 27) | def _parse_probabilities(self, transition_count):
Condensed preview — 9 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (9K chars).
[
  {
    "path": ".gitignore",
    "chars": 32,
    "preview": "*.pyc\n*.egg-info\ndist\n.DS_Store\n"
  },
  {
    "path": "README.md",
    "chars": 1746,
    "preview": "# reinforce\n\n<img src=\"./MDP.png\">\n\nA 'plug and play' reinforcement learning library in Python.\n\nInfers a Markov Decisio"
  },
  {
    "path": "reinforce/__init__.py",
    "chars": 20,
    "preview": "from .learn import *"
  },
  {
    "path": "reinforce/encoding.py",
    "chars": 1674,
    "preview": "class StateActionEncoder:\n  def __init__(self, observations):\n    self.observations = observations\n    self._parse_state"
  },
  {
    "path": "reinforce/learn.py",
    "chars": 980,
    "preview": "from encoding import StateActionEncoder\nfrom rewards import RewardParser\nfrom transitions import TransitionParser\nfrom p"
  },
  {
    "path": "reinforce/policy.py",
    "chars": 971,
    "preview": "import numpy as np\n\nclass PolicyParser:\n  def __init__(self, dimensions):\n    self.state_count = dimensions['state_count"
  },
  {
    "path": "reinforce/rewards.py",
    "chars": 860,
    "preview": "import numpy as np\n\nclass RewardParser:\n  def __init__(self, observations, dimensions):\n    self.observations = observat"
  },
  {
    "path": "reinforce/transitions.py",
    "chars": 1341,
    "preview": "import numpy as np\n\nclass TransitionParser:\n  def __init__(self, observations, dimensions):\n    self.observations = obse"
  },
  {
    "path": "setup.py",
    "chars": 321,
    "preview": "from setuptools import setup\n\nsetup(name='reinforce',\n      version='0.2.0',\n      description='plug and play reinforcem"
  }
]

About this extraction

This page contains the full source code of the NathanEpstein/reinforce GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 9 files (7.8 KB), approximately 2.1k tokens, and a symbol index with 20 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.

Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.

Copied to clipboard!