Updating code

AllenDowney · AllenDowney · commit 86e1a2eceee0 · 2015-07-03T14:44:22.000-04:00
diff --git a/code/Map.py b/code/Map.py
@@ -1,12 +1,17 @@
-"""This module contains code from
-Think Python by Allen B. Downey
-http://thinkpython.com
+"""This module contains a code example related to
 
-Copyright 2012 Allen B. Downey
-License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+Think Python, 2nd Edition
+by Allen Downey
+http://thinkpython2.com
 
+Copyright 2015 Allen Downey
+
+License: http://creativecommons.org/licenses/by/4.0/
 """
 
+from __future__ import print_function, division
+
+
 class LinearMap(object):
     """A simple implementation of a map using a list of tuples
     where each tuple is a key-value pair."""
@@ -92,7 +97,7 @@ def resize(self):
         self.maps = new_map
 
 
-def main(script):
+def main():
     import string
 
     m = HashMap()
@@ -102,9 +107,8 @@ def main(script):
         m.add(k, v)
 
     for k in range(len(s)):
-        print k, m.get(k)
+        print(k, m.get(k))
 
 
 if __name__ == '__main__':
-    import sys
-    main(*sys.argv)
+    main()
diff --git a/code/Markov.py b/code/Markov.py
@@ -0,0 +1,102 @@
+"""This module contains a code example related to
+
+Think Python, 2nd Edition
+by Allen Downey
+http://thinkpython2.com
+
+Copyright 2015 Allen Downey
+
+License: http://creativecommons.org/licenses/by/4.0/
+"""
+
+from __future__ import print_function, division
+
+
+import sys
+import random
+
+from markov import skip_gutenberg_header, shift
+
+
+class Markov(object):
+    """Encapsulates the statistical summary of a text."""
+
+    def __init__(self):
+        self.suffix_map = {}        # map from prefixes to a list of suffixes
+        self.prefix = ()            # current tuple of words
+
+    def process_file(self, filename, order=2):
+        """Reads a file and performs Markov analysis.
+
+        filename: string
+        order: integer number of words in the prefix
+
+        Returns: map from prefix to list of possible suffixes.
+        """
+        fp = open(filename)
+        skip_gutenberg_header(fp)
+
+        for line in fp:
+            for word in line.rstrip().split():
+                self.process_word(word, order)
+
+    def process_word(self, word, order=2):
+        """Processes each word.
+
+        word: string
+        order: integer
+
+        During the first few iterations, all we do is store up the words; 
+        after that we start adding entries to the dictionary.
+        """
+        if len(self.prefix) < order:
+            self.prefix += (word,)
+            return
+
+        try:
+            self.suffix_map[self.prefix].append(word)
+        except KeyError:
+            # if there is no entry for this prefix, make one
+            self.suffix_map[self.prefix] = [word]
+
+        self.prefix = shift(self.prefix, word)        
+
+    def random_text(self, n=100):
+        """Generates random wordsfrom the analyzed text.
+
+        Starts with a random prefix from the dictionary.
+
+        n: number of words to generate
+        """
+        # choose a random prefix (not weighted by frequency)
+        start = random.choice(self.suffix_map.keys())
+
+        for i in range(n):
+            suffixes = self.suffix_map.get(start, None)
+            if suffixes == None:
+                # if the prefix isn't in map, we got to the end of the
+                # original text, so we have to start again.
+                self.random_text(n-i)
+                return
+
+            # choose a random suffix
+            word = random.choice(suffixes)
+            print(word, end=' ')
+            start = shift(start, word)
+
+
+def main(script, filename='emma.txt', n=100, order=2):
+    try:
+        n = int(n)
+        order = int(order)
+    except ValueError:
+        print('Usage: %d filename [# of words] [prefix length]' % script)
+    else: 
+        markov = Markov()
+        markov.process_file(filename, order)
+        markov.random_text(n)
+
+
+if __name__ == '__main__':
+    main(*sys.argv)
+
diff --git a/code/markov.py b/code/markov.py
@@ -1,12 +1,16 @@
-"""This module contains code from
-Think Python by Allen B. Downey
-http://thinkpython.com
+"""This module contains a code example related to
 
-Copyright 2012 Allen B. Downey
-License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html
+Think Python, 2nd Edition
+by Allen Downey
+http://thinkpython2.com
 
+Copyright 2015 Allen Downey
+
+License: http://creativecommons.org/licenses/by/4.0/
 """
 
+from __future__ import print_function, division
+
 import sys
 import string
 import random
@@ -22,7 +26,7 @@ def process_file(filename, order=2):
     filename: string
     order: integer number of words in the prefix
 
-    Returns: map from prefix to list of possible suffixes.
+    returns: map from prefix to list of possible suffixes.
     """
     fp = open(filename)
     skip_gutenberg_header(fp)
@@ -85,7 +89,7 @@ def random_text(n=100):
 
         # choose a random suffix
         word = random.choice(suffixes)
-        print word,
+        print(word, end=' ')
         start = shift(start, word)
 
 
@@ -100,15 +104,16 @@ def shift(t, word):
     return t[1:] + (word,)
 
 
-def main(name, filename='', n=100, order=2, *args):
+def main(script, filename='emma.txt', n=100, order=2):
     try:
         n = int(n)
         order = int(order)
-    except:
-        print 'Usage: randomtext.py filename [# of words] [prefix length]'
+    except ValueError:
+        print('Usage: %d filename [# of words] [prefix length]' % script)
     else: 
         process_file(filename, order)
         random_text(n)
+        print()
 
 
 if __name__ == '__main__':