Skip to content

Commit

Permalink
Merge pull request briandalessandro#34 from leora-dot/master
Browse files Browse the repository at this point in the history
just little things I noticed
  • Loading branch information
briandalessandro authored Dec 20, 2020
2 parents bd5509f + fa26ab8 commit c0cd5a6
Show file tree
Hide file tree
Showing 6 changed files with 34 additions and 21 deletions.
4 changes: 3 additions & 1 deletion ipython/python35/Lecture_Binning_NonLinear_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
" dummies = pd.get_dummies(d['g'], prefix='bin')\n",
" d_m = pd.merge(d, dummies, left_index=True, right_index=True, how='inner')\n",
" del d_m['g'] #we don't need this\n",
" del d_m['bin_0'] #we don't this either\n",
" del d_m['bin_0'] #we don't need this either\n",
" return d_m\n",
" \n",
"\n"
Expand Down Expand Up @@ -282,6 +282,8 @@
"d50=makeBin(dat, 20); g50=getBinName(d50.columns.values, 'bin')\n",
"PlotLinBin(d50[g50][:sp], d50['y'][:sp],d50[g50][sp:], d50['y'][sp:], 3, 'Bins=50', d50['x'][:sp], d50['x'][sp:])\n",
"\n",
"plt.tight_layout()\n",
"\n",
"plt.show()"
]
},
Expand Down
14 changes: 10 additions & 4 deletions ipython/python35/Lecture_Clustering_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -231,20 +231,26 @@
"source": [
"from matplotlib import pyplot as plt\n",
"\n",
"#Get correlation and 0 out the diagonal (for plotting purposes)\n",
"#Get correlation\n",
"c_mat = dpro.corr()\n",
"for i in range(c_mat.shape[0]):\n",
" c_mat.iloc[i,i] = 0\n",
"\n",
"#Mask diagnol & duplicates (for plotting purposes)\n",
"plot_data = c_mat.values\n",
"mask = np.triu(np.ones_like(c_mat, dtype=bool))\n",
"plot_data = np.ma.masked_where(np.asarray(mask), plot_data)\n",
"\n",
"fig, ax = plt.subplots()\n",
"heatmap = ax.pcolor(c_mat, cmap=plt.cm.RdBu)\n",
"ax.pcolormesh(plot_data, cmap=plt.cm.RdBu)\n",
"\n",
"#Set the tick labels and center them\n",
"ax.set_xticks(np.arange(c_mat.shape[0])+0.5, minor=False)\n",
"ax.set_yticks(np.arange(c_mat.shape[1])+0.5, minor=False)\n",
"ax.set_xticklabels(c_mat.index.values, minor=False)\n",
"ax.set_yticklabels(c_mat.index.values, minor=False)\n",
"\n",
"plt.title(\"Category Correlation\")\n",
"plt.show()\n",
"\n",
"c_mat"
]
},
Expand Down
21 changes: 13 additions & 8 deletions ipython/python35/Lecture_MultiArmedBandit.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@
" #Add a small tie breaker\n",
" probs = [p + np.random.random() / 100000 for p in probs]\n",
" \n",
" best_i = _chooseBestPolicy(probs)\n",
" best_i = self._chooseBestPolicy(probs)\n",
" \n",
" if np.random.random() > epsilon:\n",
" return best_i\n",
Expand Down Expand Up @@ -233,7 +233,7 @@
" \n",
" policy = self._thompsonSamp()\n",
" \n",
" else if self.strategy = 'UCB':\n",
" elif self.strategy == 'UCB':\n",
" policy = self._ucbSamp()\n",
" \n",
" else:\n",
Expand Down Expand Up @@ -306,31 +306,36 @@
" return np.random.random()\n",
"\n",
"def plotMabSim(mabsim, colors = None):\n",
"\n",
" \n",
" res = pd.DataFrame(mabsim.results_snapshot)\n",
" policies = list(set(res['policy'].values))\n",
"\n",
" if colors == None:\n",
" colors = [(r(), r(), r()) for i in policies]\n",
" \n",
" fig = plt.figure(figsize = (15, 8))\n",
" plt.title('MAB Sequences Over Time')\n",
" plt.suptitle(\"MAB Sequences Over Time\")\n",
"\n",
" ax1 = fig.add_subplot(211)\n",
" ax1 = plt.subplot(2, 1, 1)\n",
" for i in policies:\n",
" res_i = res[(res.policy == i)]\n",
" plt.plot(res_i['round'], res_i.betaProbs, color = colors[i], markersize = 4)\n",
" ax1.set_ylim([0.05, 0.55])\n",
" plt.plot(res_i['round'], mabsim.p_truths[i] * np.ones(len(res_i['round'])), 'g--', markersize = 2)\n",
" \n",
" ax1.set_ylim([0.05, 0.55])\n",
" ax1.set_xlabel('Round')\n",
" \n",
" ax2 = fig.add_subplot(212)\n",
" ax2 = plt.subplot(2, 1, 2)\n",
"\n",
" for i in policies:\n",
" res_i = res[(res.policy == i)]\n",
" plt.plot(res_i['round'], res_i.draws, color = colors[i])\n",
" \n",
" ax2.set_xlabel('Round')\n",
" ax2.set_ylabel('Number Draws')\n",
" ax1.set_ylabel('MAP Prob Estimate')\n"
" ax1.set_ylabel('MAP Prob Estimate')\n",
" \n",
" plt.subplots_adjust(top=0.95)"
]
},
{
Expand Down
2 changes: 1 addition & 1 deletion ipython/python35/Lecture_PandasIntro_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@
}
],
"source": [
"#To retrieve a row, you can index it like a list, or use the actual row index name using the .ix method\n",
"#To retrieve a row, you can index it like a list, or use the actual row index name using the .iloc method\n",
"frame[1:2], frame.iloc[1]"
]
},
Expand Down
6 changes: 3 additions & 3 deletions ipython/python35/Lecture_Regularization_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -290,10 +290,10 @@
"L1 = {}\n",
"\n",
"for i in np.arange(-5, 5, 0.5):\n",
" LR2 = linear_model.LogisticRegression(C=10**i, penalty = 'l2')\n",
" LR2 = linear_model.LogisticRegression(C=10**i, penalty = 'l2', solver = 'liblinear')\n",
" LR2.fit(X_train, Y_train)\n",
" L2[i] = LR2.coef_[0]\n",
" LR1 = linear_model.LogisticRegression(C=10**i, penalty = 'l1')\n",
" LR1 = linear_model.LogisticRegression(C=10**i, penalty = 'l1', solver = 'liblinear')\n",
" LR1.fit(X_train, Y_train)\n",
" L1[i] = LR1.coef_[0]\n",
"\n",
Expand Down Expand Up @@ -506,7 +506,7 @@
" \n",
" for c in cs:\n",
" for norm in [1,2]:\n",
" lr = linear_model.LogisticRegression(C=c, penalty='l{}'.format(norm))\n",
" lr = linear_model.LogisticRegression(C=c, penalty='l{}'.format(norm), solver = 'liblinear')\n",
" lr.fit(X_tr_f,Y_tr_f)\n",
" met = roc_auc_score(Y_va_f, lr.predict_proba(X_va_f)[:,1])\n",
"\n",
Expand Down
8 changes: 4 additions & 4 deletions ipython/python35/Lecture_TextMining_3.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@
"model = LogisticRegression()\n",
"model.fit(X_train_binary, Y_train)\n",
"\n",
"print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(model.predict(X_test_binary), Y_test), 3)}')"
"print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_binary)[:,1]), 4)}')"
]
},
{
Expand Down Expand Up @@ -586,7 +586,7 @@
"model = LogisticRegression(max_iter=1500)\n",
"model.fit(X_train_counts, Y_train)\n",
"\n",
"print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(model.predict(X_test_counts), Y_test), 3)}')"
"print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_counts)[:,1]), 4)}')"
]
},
{
Expand Down Expand Up @@ -661,7 +661,7 @@
"model = LogisticRegression()\n",
"model.fit(X_train_tfidf, Y_train)\n",
"\n",
"print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(model.predict(X_test_counts), Y_test), 3)}')"
"print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_tfidf)[:,1]), 4)}')"
]
},
{
Expand Down Expand Up @@ -798,7 +798,7 @@
}
],
"source": [
"print(f'AUC on the count data = {round(metrics.roc_auc_score(model.predict(X_test_tfidf), Y_test), 3)}')"
"print(f'AUC on the tfidf data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_tfidf)[:,1]), 4)}')"
]
},
{
Expand Down

0 comments on commit c0cd5a6

Please sign in to comment.