Repository: NathanEpstein/reinforce
Branch: master
Commit: 06a698c91da1
Files: 9
Total size: 7.8 KB
Directory structure:
gitextract_z1jayhda/
├── .gitignore
├── README.md
├── reinforce/
│ ├── __init__.py
│ ├── encoding.py
│ ├── learn.py
│ ├── policy.py
│ ├── rewards.py
│ └── transitions.py
└── setup.py
================================================
FILE CONTENTS
================================================
================================================
FILE: .gitignore
================================================
*.pyc
*.egg-info
dist
.DS_Store
================================================
FILE: README.md
================================================
# reinforce
<img src="./MDP.png">
A 'plug and play' reinforcement learning library in Python.
Infers a Markov Decision Process from data and solves for the optimal policy.
Implementation based on Andrew Ng's <a href="https://web.cs.wpi.edu/~kmlee/cs539/cs229-notes12.pdf">notes.</a>
More information related to this project can be found <a href="https://github.com/NathanEpstein/pydata-reinforce">here.</a>
## Example Usage
```python
observations = [
{ 'state_transitions': [
{ 'state': 'low', 'action': 'climb', 'state_': 'mid' },
{ 'state': 'mid', 'action': 'climb', 'state_': 'high' },
{ 'state': 'high', 'action': 'sink', 'state_': 'mid' },
{ 'state': 'mid', 'action': 'sink', 'state_': 'low' },
{ 'state': 'low', 'action': 'sink', 'state_': 'bottom' }
],
'reward': 0
},
{ 'state_transitions': [
{ 'state': 'low', 'action': 'climb', 'state_': 'mid' },
{ 'state': 'mid', 'action': 'climb', 'state_': 'high' },
{ 'state': 'high', 'action': 'climb', 'state_': 'top' },
],
'reward': 0
}
]
trap_states = [
{
'state_transitions': [
{ 'state': 'bottom', 'action': 'sink', 'state_': 'bottom' },
{ 'state': 'bottom', 'action': 'climb', 'state_': 'bottom' }
],
'reward': 0
},
{
'state_transitions': [
{ 'state': 'top', 'action': 'sink', 'state_': 'top' },
{ 'state': 'top', 'action': 'climb', 'state_': 'top' },
],
'reward': 1
},
]
from learn import MarkovAgent
mark = MarkovAgent(observations + trap_states)
mark.learn()
print(mark.policy)
# {'high': 'climb', 'top': 'sink', 'bottom': 'sink', 'low': 'climb', 'mid': 'climb'}
# NOTE: policy in top and bottom states is chosen randomly (doesn't affect state)
```
================================================
FILE: reinforce/__init__.py
================================================
from .learn import *
================================================
FILE: reinforce/encoding.py
================================================
class StateActionEncoder:
def __init__(self, observations):
self.observations = observations
self._parse_states_and_actions()
def parse_dimensions(self):
return {
'state_count': len(self.int_to_state),
'action_count': len(self.int_to_action)
}
def observations_to_int(self):
for observation in self.observations:
for transition in observation['state_transitions']:
transition['state'] = self.state_to_int[transition['state']]
transition['state_'] = self.state_to_int[transition['state_']]
transition['action'] = self.action_to_int[transition['action']]
def parse_encoded_policy(self, encoded_policy):
policy = {}
for index, encoded_action in enumerate(encoded_policy):
state = self.int_to_state[index]
action = self.int_to_action[int(encoded_action)]
policy[state] = action
return policy
def _parse_states_and_actions(self):
state_dict, action_dict = {}, {}
state_array, action_array = [], []
state_index, action_index = 0, 0
for observation in self.observations:
for transition in observation['state_transitions']:
state = transition['state']
action = transition['action']
if state not in state_dict.keys():
state_dict[state] = state_index
state_array.append(state)
state_index += 1
if action not in action_dict.keys():
action_dict[action] = action_index
action_array.append(action)
action_index += 1
self.state_to_int = state_dict
self.action_to_int = action_dict
self.int_to_state = state_array
self.int_to_action = action_array
================================================
FILE: reinforce/learn.py
================================================
from encoding import StateActionEncoder
from rewards import RewardParser
from transitions import TransitionParser
from policy import PolicyParser
class MarkovAgent:
def __init__(self, observations):
# encode observation data as int values
self.state_action_encoder = StateActionEncoder(observations)
self.state_action_encoder.observations_to_int()
dimensions = self.state_action_encoder.parse_dimensions()
# create reward, transition, and policy parsers
self.reward_parser = RewardParser(observations, dimensions)
self.transition_parser = TransitionParser(observations, dimensions)
self.policy_parser = PolicyParser(dimensions)
def learn(self):
R = self.reward_parser.rewards()
P = self.transition_parser.transition_probabilities()
# learn int-encoded policy and convert to readable dictionary
encoded_policy = self.policy_parser.policy(P, R)
self.policy = self.state_action_encoder.parse_encoded_policy(encoded_policy)
================================================
FILE: reinforce/policy.py
================================================
import numpy as np
class PolicyParser:
def __init__(self, dimensions):
self.state_count = dimensions['state_count']
self.action_count = dimensions['action_count']
def policy(self, P, rewards):
print('COMPUTING POLICY')
best_policy = np.zeros(self.state_count)
state_values = np.zeros(self.state_count)
GAMMA = 0.9
ITERATIONS = 125
for i in range(ITERATIONS):
print ("iteration: {0} / {1}".format(i + 1, ITERATIONS))
for state in range(0, self.state_count):
state_value = -float('Inf')
for action in range(0, self.action_count):
action_value = 0
for state_ in range(0, self.state_count):
action_value += (P[state][action][state_] * state_values[state_] * GAMMA)
if (action_value >= state_value):
state_value = action_value
best_policy[state] = action
state_values[state] = rewards[state] + state_value
return best_policy
================================================
FILE: reinforce/rewards.py
================================================
import numpy as np
class RewardParser:
def __init__(self, observations, dimensions):
self.observations = observations
self.state_count = dimensions['state_count']
def rewards(self):
print('COMPUTING REWARDS')
total_state_rewards = np.zeros(self.state_count)
total_state_visits = np.zeros(self.state_count)
for observation in self.observations:
visits = float(len(observation['state_transitions']))
reward_per_visit = observation['reward'] / visits
for state_transition in observation['state_transitions']:
state = state_transition['state']
total_state_rewards[state] += reward_per_visit
total_state_visits[state] += 1
average_state_rewards = total_state_rewards / total_state_visits
average_state_rewards = np.nan_to_num(average_state_rewards)
return average_state_rewards
================================================
FILE: reinforce/transitions.py
================================================
import numpy as np
class TransitionParser:
def __init__(self, observations, dimensions):
self.observations = observations
self.state_count = dimensions['state_count']
self.action_count = dimensions['action_count']
def transition_probabilities(self):
print('COMPUTING TRANSITIONS')
transition_count = self._count_transitions()
return self._parse_probabilities(transition_count)
def _count_transitions(self):
transition_count = np.zeros((self.state_count, self.action_count, self.state_count))
for observation in self.observations:
for state_transition in observation['state_transitions']:
state = state_transition['state']
action = state_transition['action']
state_ = state_transition['state_']
transition_count[state][action][state_] += 1
return transition_count
def _parse_probabilities(self, transition_count):
P = np.zeros((self.state_count, self.action_count, self.state_count))
for state in range(0, self.state_count):
for action in range(0, self.action_count):
total_transitions = float(sum(transition_count[state][action]))
if (total_transitions > 0):
P[state][action] = transition_count[state][action] / total_transitions
else:
P[state][action] = 1.0 / self.state_count
return P
================================================
FILE: setup.py
================================================
from setuptools import setup
setup(name='reinforce',
version='0.2.0',
description='plug and play reinforcement learning',
url='http://github.com/nathanepstein/reinforce',
author='Nathan Epstein',
author_email='ne2210@columbia.edu',
license='MIT',
packages=['reinforce'],
)
gitextract_z1jayhda/ ├── .gitignore ├── README.md ├── reinforce/ │ ├── __init__.py │ ├── encoding.py │ ├── learn.py │ ├── policy.py │ ├── rewards.py │ └── transitions.py └── setup.py
SYMBOL INDEX (20 symbols across 5 files)
FILE: reinforce/encoding.py
class StateActionEncoder (line 1) | class StateActionEncoder:
method __init__ (line 2) | def __init__(self, observations):
method parse_dimensions (line 6) | def parse_dimensions(self):
method observations_to_int (line 12) | def observations_to_int(self):
method parse_encoded_policy (line 19) | def parse_encoded_policy(self, encoded_policy):
method _parse_states_and_actions (line 28) | def _parse_states_and_actions(self):
FILE: reinforce/learn.py
class MarkovAgent (line 6) | class MarkovAgent:
method __init__ (line 7) | def __init__(self, observations):
method learn (line 18) | def learn(self):
FILE: reinforce/policy.py
class PolicyParser (line 3) | class PolicyParser:
method __init__ (line 4) | def __init__(self, dimensions):
method policy (line 8) | def policy(self, P, rewards):
FILE: reinforce/rewards.py
class RewardParser (line 3) | class RewardParser:
method __init__ (line 4) | def __init__(self, observations, dimensions):
method rewards (line 8) | def rewards(self):
FILE: reinforce/transitions.py
class TransitionParser (line 3) | class TransitionParser:
method __init__ (line 4) | def __init__(self, observations, dimensions):
method transition_probabilities (line 9) | def transition_probabilities(self):
method _count_transitions (line 14) | def _count_transitions(self):
method _parse_probabilities (line 27) | def _parse_probabilities(self, transition_count):
Condensed preview — 9 files, each showing path, character count, and a content snippet. Download the .json file or copy for the full structured content (9K chars).
[
{
"path": ".gitignore",
"chars": 32,
"preview": "*.pyc\n*.egg-info\ndist\n.DS_Store\n"
},
{
"path": "README.md",
"chars": 1746,
"preview": "# reinforce\n\n<img src=\"./MDP.png\">\n\nA 'plug and play' reinforcement learning library in Python.\n\nInfers a Markov Decisio"
},
{
"path": "reinforce/__init__.py",
"chars": 20,
"preview": "from .learn import *"
},
{
"path": "reinforce/encoding.py",
"chars": 1674,
"preview": "class StateActionEncoder:\n def __init__(self, observations):\n self.observations = observations\n self._parse_state"
},
{
"path": "reinforce/learn.py",
"chars": 980,
"preview": "from encoding import StateActionEncoder\nfrom rewards import RewardParser\nfrom transitions import TransitionParser\nfrom p"
},
{
"path": "reinforce/policy.py",
"chars": 971,
"preview": "import numpy as np\n\nclass PolicyParser:\n def __init__(self, dimensions):\n self.state_count = dimensions['state_count"
},
{
"path": "reinforce/rewards.py",
"chars": 860,
"preview": "import numpy as np\n\nclass RewardParser:\n def __init__(self, observations, dimensions):\n self.observations = observat"
},
{
"path": "reinforce/transitions.py",
"chars": 1341,
"preview": "import numpy as np\n\nclass TransitionParser:\n def __init__(self, observations, dimensions):\n self.observations = obse"
},
{
"path": "setup.py",
"chars": 321,
"preview": "from setuptools import setup\n\nsetup(name='reinforce',\n version='0.2.0',\n description='plug and play reinforcem"
}
]
About this extraction
This page contains the full source code of the NathanEpstein/reinforce GitHub repository, extracted and formatted as plain text for AI agents and large language models (LLMs). The extraction includes 9 files (7.8 KB), approximately 2.1k tokens, and a symbol index with 20 extracted functions, classes, methods, constants, and types. Use this with OpenClaw, Claude, ChatGPT, Cursor, Windsurf, or any other AI tool that accepts text input. You can copy the full output to your clipboard or download it as a .txt file.
Extracted by GitExtract — free GitHub repo to text converter for AI. Built by Nikandr Surkov.