Skip to content

Commit

Permalink
update class 4 code file with exercise solutions
Browse files Browse the repository at this point in the history
  • Loading branch information
justmarkham committed Aug 28, 2015
1 parent b1713df commit 68f73f1
Showing 1 changed file with 24 additions and 0 deletions.
24 changes: 24 additions & 0 deletions code/04_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,23 +64,41 @@
'''

# read drinks.csv into a DataFrame called 'drinks'
drinks = pd.read_table('drinks.csv', sep=',')
drinks = pd.read_csv('drinks.csv') # assumes separator is comma

# print the head and the tail
drinks.head()
drinks.tail()

# examine the default index, data types, and shape
drinks.index
drinks.dtypes
drinks.shape

# print the 'beer_servings' Series
drinks['beer_servings']
drinks.beer_servings

# calculate the mean 'beer_servings' for the entire dataset
drinks.describe() # summarize all numeric columns
drinks.beer_servings.describe() # summarize only the 'beer_servings' Series
drinks.beer_servings.mean() # only calculate the mean

# count the number of occurrences of each 'continent' value and see if it looks correct
drinks.continent.value_counts()

# BONUS: display only the number of rows of the 'users' DataFrame
users.shape[0]

# BONUS: display the 3 most frequent occupations in 'users'
users.occupation.value_counts().head(3)
users.occupation.value_counts()[:3]

# BONUS: create the 'users' DataFrame from the u.user_original file (which lacks a header row)
# Hint: read the pandas.read_table documentation
user_cols = ['user_id', 'age', 'gender', 'occupation', 'zip_code']
users = pd.read_table('u.user_original', sep='|', header=None, names=user_cols, index_col='user_id')

'''
Filtering and Sorting
Expand All @@ -107,17 +125,23 @@
'''

# filter 'drinks' to only include European countries
drinks[drinks.continent=='EU']

# filter 'drinks' to only include European countries with wine_servings > 300
drinks[(drinks.continent=='EU') & (drinks.wine_servings > 300)]

# calculate the mean 'beer_servings' for all of Europe
drinks[drinks.continent=='EU'].beer_servings.mean()

# determine which 10 countries have the highest total_litres_of_pure_alcohol
drinks.sort('total_litres_of_pure_alcohol').tail(10)

# BONUS: sort 'users' by 'occupation' and then by 'age' (in a single command)
users.sort(['occupation', 'age'])

# BONUS: filter 'users' to only include doctors and lawyers without using a |
# Hint: read the pandas.Series.isin documentation
users[users.occupation.isin(['doctor', 'lawyer'])]

'''
Renaming, Adding, and Removing Columns
Expand Down

0 comments on commit 68f73f1

Please sign in to comment.