forked from AllenDowney/ThinkPython2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyze_book3.py
57 lines (37 loc) · 1.24 KB
/
analyze_book3.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
"""This module contains a code example related to
Think Python, 2nd Edition
by Allen Downey
http://thinkpython2.com
Copyright 2015 Allen Downey
License: http://creativecommons.org/licenses/by/4.0/
"""
from __future__ import print_function, division
import random
from bisect import bisect
from analyze_book1 import process_file
def random_word(hist):
"""Chooses a random word from a histogram.
The probability of each word is proportional to its frequency.
hist: map from word to frequency
"""
# TODO: This could be made faster by computing the cumulative
# frequencies once and reusing them.
words = []
freqs = []
total_freq = 0
# make a list of words and a list of cumulative frequencies
for word, freq in hist.items():
total_freq += freq
words.append(word)
freqs.append(total_freq)
# choose a random value and find its location in the cumulative list
x = random.randint(0, total_freq-1)
index = bisect(freqs, x)
return words[index]
def main():
hist = process_file('158-0.txt', skip_header=True)
print("\n\nHere are some random words from the book")
for i in range(100):
print(random_word(hist), end=' ')
if __name__ == '__main__':
main()