Skip to content

Commit

Permalink
initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
dabeaz committed Apr 8, 2013
1 parent c706ea1 commit 24ff777
Show file tree
Hide file tree
Showing 364 changed files with 25,786 additions and 0 deletions.
25 changes: 25 additions & 0 deletions src/1/calculating_with_dictionaries/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# example.py
#
# Example of calculating with dictionaries

prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}

# Find min and max price
min_price = min(zip(prices.values(), prices.keys()))
max_price = max(zip(prices.values(), prices.keys()))

print('min price:', min_price)
print('max price:', max_price)

print('sorted prices:')
prices_sorted = sorted(zip(prices.values(), prices.keys()))
for price, name in prices_sorted:
print(' ', name, price)


25 changes: 25 additions & 0 deletions src/1/determine_the_top_n_items_occurring_in_a_list/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# example.py
#
# Determine the most common words in a list

words = [
'look', 'into', 'my', 'eyes', 'look', 'into', 'my', 'eyes',
'the', 'eyes', 'the', 'eyes', 'the', 'eyes', 'not', 'around', 'the',
'eyes', "don't", 'look', 'around', 'the', 'eyes', 'look', 'into',
'my', 'eyes', "you're", 'under'
]

from collections import Counter
word_counts = Counter(words)
top_three = word_counts.most_common(3)
print(top_three)
# outputs [('eyes', 8), ('the', 5), ('look', 4)]

# Example of merging in more words

morewords = ['why','are','you','not','looking','in','my','eyes']
word_counts.update(morewords)
print(word_counts.most_common(3))



23 changes: 23 additions & 0 deletions src/1/extracting_a_subset_of_a_dictionary/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# example of extracting a subset from a dictionary
from pprint import pprint

prices = {
'ACME': 45.23,
'AAPL': 612.78,
'IBM': 205.55,
'HPQ': 37.20,
'FB': 10.75
}

# Make a dictionary of all prices over 200
p1 = { key:value for key, value in prices.items() if value > 200 }

print("All prices over 200")
pprint(p1)

# Make a dictionary of tech stocks
tech_names = { 'AAPL', 'IBM', 'HPQ', 'MSFT' }
p2 = { key:value for key,value in prices.items() if key in tech_names }

print("All techs")
pprint(p2)
43 changes: 43 additions & 0 deletions src/1/filtering_list_elements/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Examples of different ways to filter data

mylist = [1, 4, -5, 10, -7, 2, 3, -1]

# All positive values
pos = [n for n in mylist if n > 0]
print(pos)

# All negative values
neg = [n for n in mylist if n < 0]
print(neg)

# Negative values clipped to 0
neg_clip = [n if n > 0 else 0 for n in mylist]
print(neg_clip)

# Positive values clipped to 0
pos_clip = [n if n < 0 else 0 for n in mylist]
print(pos_clip)

# Compressing example

addresses = [
'5412 N CLARK',
'5148 N CLARK',
'5800 E 58TH',
'2122 N CLARK',
'5645 N RAVENSWOOD',
'1060 W ADDISON',
'4801 N BROADWAY',
'1039 W GRANVILLE',
]

counts = [ 0, 3, 10, 4, 1, 7, 6, 1]

from itertools import compress

more5 = [ n > 5 for n in counts ]
a = list(compress(addresses, more5))
print(a)



20 changes: 20 additions & 0 deletions src/1/finding_out_what_two_dictionaries_have_in_common/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# example.py
#
# Find out what two dictionaries have in common

a = {
'x' : 1,
'y' : 2,
'z' : 3
}

b = {
'w' : 10,
'x' : 11,
'y' : 2
}

print('Common keys:', a.keys() & b.keys())
print('Keys in a not in b:', a.keys() - b.keys())
print('(key,value) pairs in common:', a.items() & b.items())

20 changes: 20 additions & 0 deletions src/1/finding_the_largest_or_smallest_n_items/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# example.py
#
# Example of using heapq to find the N smallest or largest items

import heapq

portfolio = [
{'name': 'IBM', 'shares': 100, 'price': 91.1},
{'name': 'AAPL', 'shares': 50, 'price': 543.22},
{'name': 'FB', 'shares': 200, 'price': 21.09},
{'name': 'HPQ', 'shares': 35, 'price': 31.75},
{'name': 'YHOO', 'shares': 45, 'price': 16.35},
{'name': 'ACME', 'shares': 75, 'price': 115.65}
]

cheap = heapq.nsmallest(3, portfolio, key=lambda s: s['price'])
expensive = heapq.nlargest(3, portfolio, key=lambda s: s['price'])

print(cheap)
print(expensive)
33 changes: 33 additions & 0 deletions src/1/grouping-records-together-based-on-a-field/grouping.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
rows = [
{'address': '5412 N CLARK', 'date': '07/01/2012'},
{'address': '5148 N CLARK', 'date': '07/04/2012'},
{'address': '5800 E 58TH', 'date': '07/02/2012'},
{'address': '2122 N CLARK', 'date': '07/03/2012'},
{'address': '5645 N RAVENSWOOD', 'date': '07/02/2012'},
{'address': '1060 W ADDISON', 'date': '07/02/2012'},
{'address': '4801 N BROADWAY', 'date': '07/01/2012'},
{'address': '1039 W GRANVILLE', 'date': '07/04/2012'},
]

from itertools import groupby

rows.sort(key=lambda r: r['date'])
for date, items in groupby(rows, key=lambda r: r['date']):
print(date)
for i in items:
print(' ', i)

# Example of building a multidict
from collections import defaultdict
rows_by_date = defaultdict(list)
for row in rows:
rows_by_date[row['date']].append(row)

for r in rows_by_date['07/01/2012']:
print(r)






35 changes: 35 additions & 0 deletions src/1/implementing_a_priority_queue/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# example.py
#
# Example of a priority queue

import heapq

class PriorityQueue:
def __init__(self):
self._queue = []
self._index = 0

def push(self, item, priority):
heapq.heappush(self._queue, (-priority, self._index, item))
self._index += 1

def pop(self):
return heapq.heappop(self._queue)[-1]

# Example use
class Item:
def __init__(self, name):
self.name = name
def __repr__(self):
return 'Item({!r})'.format(self.name)

q = PriorityQueue()
q.push(Item('foo'), 1)
q.push(Item('bar'), 5)
q.push(Item('spam'), 4)
q.push(Item('grok'), 1)

print("Should be bar:", q.pop())
print("Should be spam:", q.pop())
print("Should be foo:", q.pop())
print("Should be grok:", q.pop())
17 changes: 17 additions & 0 deletions src/1/keeping_the_last_n_items/example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from collections import deque

def search(lines, pattern, history=5):
previous_lines = deque(maxlen=history)
for line in lines:
if pattern in line:
yield line, previous_lines
previous_lines.append(line)

# Example use on a file
if __name__ == '__main__':
with open('somefile.txt') as f:
for line, prevlines in search(f, 'python', 5):
for pline in prevlines:
print(pline, end='')
print(line, end='')
print('-'*20)
86 changes: 86 additions & 0 deletions src/1/keeping_the_last_n_items/somefile.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
=== Keeping the Last N Items

==== Problem

You want to keep a limited history of the last few items seen
during iteration or during some other kind of processing.

==== Solution

Keeping a limited history is a perfect use for a `collections.deque`.
For example, the following code performs a simple text match on a
sequence of lines and prints the matching line along with the previous
N lines of context when found:

[source,python]
----
from collections import deque

def search(lines, pattern, history=5):
previous_lines = deque(maxlen=history)
for line in lines:
if pattern in line:
for pline in previous_lines:
print(lline, end='')
print(line, end='')
print()
previous_lines.append(line)

# Example use on a file
if __name__ == '__main__':
with open('somefile.txt') as f:
search(f, 'python', 5)
----

==== Discussion

Using `deque(maxlen=N)` creates a fixed size queue. When new items
are added and the queue is full, the oldest item is automatically
removed. For example:

[source,pycon]
----
>>> q = deque(maxlen=3)
>>> q.append(1)
>>> q.append(2)
>>> q.append(3)
>>> q
deque([1, 2, 3], maxlen=3)
>>> q.append(4)
>>> q
deque([2, 3, 4], maxlen=3)
>>> q.append(5)
>>> q
deque([3, 4, 5], maxlen=3)
----

Although you could manually perform such operations on a list (e.g.,
appending, deleting, etc.), the queue solution is far more elegant and
runs a lot faster.

More generally, a `deque` can be used whenever you need a simple queue
structure. If you don't give it a maximum size, you get an unbounded
queue that lets you append and pop items on either end. For example:

[source,pycon]
----
>>> q = deque()
>>> q.append(1)
>>> q.append(2)
>>> q.append(3)
>>> q
deque([1, 2, 3])
>>> q.appendleft(4)
>>> q
deque([4, 1, 2, 3])
>>> q.pop()
3
>>> q
deque([4, 1, 2])
>>> q.popleft()
4
----

Adding or popping items from either end of a queue has O(1)
complexity. This is unlike a list where inserting or removing
items from the front of the list is O(N).
22 changes: 22 additions & 0 deletions src/1/mapping_names_to_sequence_elements/example1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# example.py

from collections import namedtuple

Stock = namedtuple('Stock', ['name', 'shares', 'price'])

def compute_cost(records):
total = 0.0
for rec in records:
s = Stock(*rec)
total += s.shares * s.price
return total

# Some Data
records = [
('GOOG', 100, 490.1),
('ACME', 100, 123.45),
('IBM', 50, 91.15)
]

print(compute_cost(records))

Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# example.py
#
# Remove duplicate entries from a sequence while keeping order

def dedupe(items):
seen = set()
for item in items:
if item not in seen:
yield item
seen.add(item)

if __name__ == '__main__':
a = [1, 5, 2, 1, 9, 1, 5, 10]
print(a)
print(list(dedupe(a)))
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# example2.py
#
# Remove duplicate entries from a sequence while keeping order

def dedupe(items, key=None):
seen = set()
for item in items:
val = item if key is None else key(item)
if val not in seen:
yield item
seen.add(val)

if __name__ == '__main__':
a = [
{'x': 2, 'y': 3},
{'x': 1, 'y': 4},
{'x': 2, 'y': 3},
{'x': 2, 'y': 3},
{'x': 10, 'y': 15}
]
print(a)
print(list(dedupe(a, key=lambda a: (a['x'],a['y']))))

Loading

0 comments on commit 24ff777

Please sign in to comment.