Skip to content

Commit

Permalink
change code from jupyter notebook to .py file
Browse files Browse the repository at this point in the history
  • Loading branch information
wanghezhi authored and wanghezhi committed Apr 26, 2017
1 parent 1526f4b commit bc64004
Show file tree
Hide file tree
Showing 12 changed files with 135,041 additions and 160 deletions.
Binary file modified .DS_Store
Binary file not shown.
35,533 changes: 35,533 additions & 0 deletions output/weather2010-2011.csv

Large diffs are not rendered by default.

37,415 changes: 37,415 additions & 0 deletions output/weather2012-2013.csv

Large diffs are not rendered by default.

38,362 changes: 38,362 additions & 0 deletions output/weather2014-2015.csv

Large diffs are not rendered by default.

23,607 changes: 23,607 additions & 0 deletions output/weather2016-2017.csv

Large diffs are not rendered by default.

Binary file modified plots/cor1.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified plots/cor2.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added src/.DS_Store
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@
"fig.tight_layout()\n",
"\n",
"plt.title('Correalation between Average Temperature per Month and Number of days with extremely high complaints per Month')\n",
"plt.savefig('../plots/cor2.png')"
"plt.savefig('../plots/cor1.png')"
]
},
{
Expand Down
54 changes: 54 additions & 0 deletions src/col1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import pandas as pd
#%matplotlib inline
f = open('../output/count_day.out','r')
xx = []
yy = []
for line in f.readlines():
day = line.split('\t')[0]
xx.append(day)
yy.append(int(line.split('\t')[1]))
date = [datetime.strptime(x,'%Y-%m-%d') for x in xx]

yy1 = yy[-2651:]
xx1 = [i for i in xx if i.split('-')[0] != '2009']

weather1 = pd.read_csv('../output/weather2010-2011.csv')
weather2 = pd.read_csv('../output/weather2012-2013.csv')
weather3 = pd.read_csv('../output/weather2014-2015.csv')
weather4 = pd.read_csv('../output/weather2016-2017.csv')

frames = [weather1, weather2, weather3, weather4]
weather = pd.concat(frames)

weather = weather[['YEARMODA', 'TEMP']]
weather = weather.groupby(['YEARMODA'])['TEMP'].mean()
weather.index.name = None
weather = np.asarray(weather)
weather = pd.DataFrame({'Temperatue':weather}, index = xx1)

complaints = pd.DataFrame({'Complaints': yy1}, index = xx1)

df = pd.concat([weather, complaints], axis=1)

fig, ax1 = plt.subplots(figsize = (18, 12))
t = np.arange(1, 2652)
ax1.plot(t, df['Temperatue'], 'b', label = 'Average Temperature per Month')
ax1.set_xlabel('Date Index')
ax1.set_ylabel('Average Temperature per Day', color='b')
ax1.tick_params('y', colors='b')

ax2 = ax1.twinx()
ax2.plot(t, df['Complaints'], 'g')
ax2.set_ylabel('Number of Complaints per Day', color='g')
ax2.tick_params('y', colors='g')

fig.tight_layout()

plt.title('Correalation between Average Temperature per Month and Number of days with extremely high complaints per Month')
plt.savefig('../plots/cor1.png')

from scipy.stats import pearsonr
print(pearsonr(df['Temperatue'], df['Complaints']))
69 changes: 69 additions & 0 deletions src/col2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import matplotlib.pyplot as plt
from datetime import datetime
import numpy as np
import pandas as pd
#%matplotlib inline
f = open('../output/count_day.out','r')
xx = []
yy = []
for line in f.readlines():
day = line.split('\t')[0]
xx.append(day)
yy.append(int(line.split('\t')[1]))
date = [datetime.strptime(x,'%Y-%m-%d') for x in xx]

xx1 = [i for i in xx if i.split('-')[0] != '2009']
yy1 = yy[-2651:]

index = sorted(range(len(yy)), key=lambda i: yy[i], reverse=True)[:100]
lv = [yy[i] for i in index]
ld = [xx[i] for i in index]

p1=[]
for j in range(1, 13):
p1.append(sum([1 for i in ld if int(i.split('-')[1]) == j]))

weather1 = pd.read_csv('../output/weather2010-2011.csv')
weather2 = pd.read_csv('../output/weather2012-2013.csv')
weather3 = pd.read_csv('../output/weather2014-2015.csv')
weather4 = pd.read_csv('../output/weather2016-2017.csv')

frames = [weather1, weather2, weather3, weather4]
weather = pd.concat(frames)
weather = weather[['YEARMODA', 'TEMP']]
weather = weather.groupby(['YEARMODA'])['TEMP'].mean()
weather.index.name = None
weather = np.asarray(weather)
weather = pd.DataFrame({'Temperatue':weather}, index = xx1)

complaints = pd.DataFrame({'Complaints': yy1}, index = xx1)
df = pd.concat([weather, complaints], axis=1)

df.index = [i.split('-')[1] for i in df.index]
del df['Complaints']
df1 = df.groupby(df.index)['Temperatue'].mean()
df1.index = [1, 10, 11, 12, 2, 3, 4, 5, 6, 7, 8, 9]

tem = []
for i in range(1, 13):
tem.append(df1.ix[i])

fig, ax1 = plt.subplots(figsize = (12, 8))
t = np.arange(1, 13)
ax1.plot(t, tem, 'b', label = 'Average Temperature per Month')
ax1.set_xlabel('Month')
ax1.set_ylabel('Average Temperature per Day', size = 'xx-large', color='b')
ax1.tick_params('y', colors='b')

ax2 = ax1.twinx()
ax2.plot(t, p1, 'g')
ax2.set_ylabel('Number of days with extremely high complaints per Month', size = 'xx-large', color='g')
ax2.tick_params('y', colors='g')

fig.tight_layout()

plt.title('Correalation between Average Temperature per Month and Number of days with extremely high complaints per Month')
plt.savefig('../plots/cor2.png')

from scipy.stats import pearsonr
print(pearsonr(tem, p1))

This file was deleted.

0 comments on commit bc64004

Please sign in to comment.