-
Notifications
You must be signed in to change notification settings - Fork 1
/
generation.py
85 lines (67 loc) · 3.05 KB
/
generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import random
import re
class MarkovChains:
START_STATE = 0
def __init__(self, delimiters):
self.delimiters = delimiters # delimits sentences
self.cardinalities = dict() # cardinality of each state's sample space
self.occurrences = dict() # state occurrences
self.cardinalities[MarkovChains.START_STATE] = 0
self.occurrences[MarkovChains.START_STATE] = dict()
def _add_chain(self, chain):
states = chain.split()
if len(states) > 0:
prevState = MarkovChains.START_STATE
self.cardinalities[MarkovChains.START_STATE] += 1
for state in states:
if prevState in self.occurrences:
if state in self.occurrences[prevState]:
self.occurrences[prevState][state] += 1
else:
self.occurrences[prevState][state] = 1
else:
self.occurrences[prevState] = dict()
self.occurrences[prevState][state] = 1
prevState = state
if state in self.cardinalities:
self.cardinalities[state] += 1
else:
self.cardinalities[state] = 1
def add_text(self, text):
for chain in re.split(r"[" + self.delimiters + "]+", text):
self._add_chain(chain)
def generate_text(self, sentenceCount):
text = ""
for i in range(0, sentenceCount):
text += " ".join(self._generate_text_sentence()) + ".\n"
return text
def _generate_text_sentence(self):
sentence = []
state = self._get_random_state(MarkovChains.START_STATE)
while not state is None:
sentence.append(state)
if state in self.occurrences:
state = self._get_random_state(state)
else:
state = None
return sentence
def _get_random_state(self, state):
states = list(self.occurrences[state].keys())
cardinalities = list(self.occurrences[state].values())
if len(states) > 0:
mix = []
for i in range(0, len(states)):
j = cardinalities[i]
while j >= 0:
mix.append(states[i])
j -= 1
return mix[random.randint(0, len(mix) - 1)]
else:
return None
def print_chains(self):
for tail, pairs in self.occurrences.items():
for head, occurrence in pairs.items():
if tail == MarkovChains.START_STATE:
print("[Start] -> {0} ({1}/{2})".format(head, occurrence, self.cardinalities[tail]))
else:
print("{0} -> {1} ({2}/{3})".format(tail, head, occurrence, self.cardinalities[tail]))