update class 20 materials

AkhilSagar · Oct 23, 2015 · e68fa0e · e68fa0e
1 parent a1623fb
commit e68fa0e
Show file tree

Hide file tree

Showing 3 changed files with 933 additions and 72 deletions.
diff --git a/README.md b/README.md
@@ -555,7 +555,7 @@ Tuesday | Thursday
     * [Baltimore homicide data](data/homicides.txt)
     * [Regular expressions 101](https://regex101.com/#python): real-time testing of regular expressions
     * [Reference guide](code/20_regex_reference.py)
-    * Exercise
+    * [Exercise](code/20_regex_exercise.py)
 
 **Homework:**
 * Your final project is due next week!

diff --git a/code/20_regex_exercise.py b/code/20_regex_exercise.py
@@ -0,0 +1,61 @@
+'''
+EXERCISE: Regular Expressions
+'''
+
+# open file and store each line as one list element
+with open('homicides.txt', mode='rU') as f:
+    data = [row for row in f]
+
+
+'''
+Create a list of ages
+'''
+
+import re
+
+ages = []
+for row in data:
+    match = re.search(r'\d+ years? old', row)
+    if match:
+        ages.append(match.group())
+    else:
+        ages.append('0')
+
+# split the string on spaces, only keep the first element, and convert to int
+ages = [int(element.split()[0]) for element in ages]
+
+# calculate average age
+sum(ages) / float(len(ages))
+
+# check that 'data' and 'ages' are the same length
+assert(len(data)==len(ages))
+
+
+'''
+Create a list of ages (using match groups)
+'''
+
+ages = []
+for row in data:
+    match = re.search(r'(\d+)( years? old)', row)
+    if match:
+        ages.append(int(match.group(1)))
+    else:
+        ages.append(0)
+
+
+'''
+Create a list of causes
+'''
+
+causes = []
+for row in data:
+    match = re.search(r'Cause: (.+?)<', row)
+    if match:
+        causes.append(match.group(1).lower())
+    else:
+        causes.append('unknown')
+
+# tally the causes
+from collections import Counter
+Counter(causes)