To generate new sentences, run the utility in 'generate' mode, using the name specified during the parse operation
python markov.py gen <name> <count>
* The `name` argument should match the name used with the earlier `parse` command
* The `count` argument is a numeric value indicating how many sentences to generate
For example:
>python markov.py gen hitchhikers_guide 3
Look, I can't speak Vogon! You don't need to touch the water
He frowned, then smiled, then tried to gauge the speed at which they were able to pick up hitch hikers
The hatchway sealed itself tight, and all the streets around it
================================================
FILE: db.py
================================================
class Db:
DEPTH_PARAM_NAME = 'depth'
def __init__(self, conn, sql):
self.conn = conn
self.cursor = conn.cursor()
self.sql = sql
self.depth = None
def setup(self, depth):
self.depth = depth
self.cursor.execute(self.sql.create_word_table_sql(depth))
self.cursor.execute(self.sql.create_index_sql(depth))
self.cursor.execute(self.sql.create_param_table_sql())
self.cursor.execute(self.sql.set_param_sql(), (self.DEPTH_PARAM_NAME, depth))
def _get_word_list_count(self, word_list):
if len(word_list) != self.get_depth():
raise ValueError('Expected %s words in list but found %s' % (self.get_depth(), len(word_list)))
self.cursor.execute(self.sql.select_count_for_words_sql(self.get_depth()), word_list)
r = self.cursor.fetchone()
if r:
return r[0]
else:
return 0
def get_depth(self):
if self.depth == None:
self.cursor.execute(self.sql.get_param_sql(), (self.DEPTH_PARAM_NAME,))
r = self.cursor.fetchone()
if r:
self.depth = int(r[0])
else:
raise ValueError('No depth value found in database, db does not seem to have been created by this utility')
return self.depth
def add_word(self, word_list):
count = self._get_word_list_count(word_list)
if count:
self.cursor.execute(self.sql.update_count_for_words_sql(self.get_depth()), [count + 1] + word_list)
else:
self.cursor.execute(self.sql.insert_row_for_words_sql(self.get_depth()), word_list + [1])
def commit(self):
self.conn.commit()
def get_word_count(self, word_list):
counts = {}
sql = self.sql.select_words_and_counts_sql(self.get_depth())
for row in self.cursor.execute(sql, word_list):
counts[row[0]] = row[1]
return counts
================================================
FILE: gen.py
================================================
from parse import Parser
class Generator:
def __init__(self, name, db, rnd):
self.name = name
self.db = db
self.rnd = rnd
def _get_next_word(self, word_list):
candidate_words = self.db.get_word_count(word_list)
total_next_words = sum(candidate_words.values())
i = self.rnd.randint(total_next_words)
t=0
for w in candidate_words.keys():
t += candidate_words[w]
if (i <= t):
return w
assert False
def generate(self, word_separator):
depth = self.db.get_depth()
sentence = [Parser.SENTENCE_START_SYMBOL] * (depth - 1)
end_symbol = [Parser.SENTENCE_END_SYMBOL] * (depth - 1)
while True:
tail = sentence[(-depth+1):]
if tail == end_symbol:
break
word = self._get_next_word(tail)
sentence.append(word)
return word_separator.join(sentence[depth-1:][:1-depth])
================================================
FILE: markov.py
================================================
from db import Db
from gen import Generator
from parse import Parser
from sql import Sql
from rnd import Rnd
import sys
import sqlite3
import codecs
SENTENCE_SEPARATOR = '.'
WORD_SEPARATOR = ' '
if __name__ == '__main__':
args = sys.argv
usage = 'Usage: %s (parse