Merge pull request briandalessandro#34 from leora-dot/master

just little things I noticed
IsamAljawarneh · Dec 20, 2020 · c0cd5a6 · c0cd5a6
2 parents bd5509f + fa26ab8
commit c0cd5a6
Show file tree

Hide file tree

Showing 6 changed files with 34 additions and 21 deletions.
diff --git a/ipython/python35/Lecture_Binning_NonLinear_3.ipynb b/ipython/python35/Lecture_Binning_NonLinear_3.ipynb
@@ -74,7 +74,7 @@
     "    dummies = pd.get_dummies(d['g'], prefix='bin')\n",
     "    d_m = pd.merge(d, dummies, left_index=True, right_index=True, how='inner')\n",
     "    del d_m['g'] #we don't need this\n",
-    "    del d_m['bin_0'] #we don't this either\n",
+    "    del d_m['bin_0'] #we don't need this either\n",
     "    return d_m\n",
     "    \n",
     "\n"
@@ -282,6 +282,8 @@
     "d50=makeBin(dat, 20); g50=getBinName(d50.columns.values, 'bin')\n",
     "PlotLinBin(d50[g50][:sp], d50['y'][:sp],d50[g50][sp:], d50['y'][sp:], 3, 'Bins=50', d50['x'][:sp], d50['x'][sp:])\n",
     "\n",
+    "plt.tight_layout()\n",
+    "\n",
     "plt.show()"
    ]
   },

diff --git a/ipython/python35/Lecture_Clustering_3.ipynb b/ipython/python35/Lecture_Clustering_3.ipynb
@@ -231,20 +231,26 @@
    "source": [
     "from matplotlib import pyplot as plt\n",
     "\n",
-    "#Get correlation and 0 out the diagonal (for plotting purposes)\n",
+    "#Get correlation\n",
     "c_mat = dpro.corr()\n",
-    "for i in range(c_mat.shape[0]):\n",
-    "    c_mat.iloc[i,i] = 0\n",
+    "\n",
+    "#Mask diagnol & duplicates (for plotting purposes)\n",
+    "plot_data = c_mat.values\n",
+    "mask = np.triu(np.ones_like(c_mat, dtype=bool))\n",
+    "plot_data = np.ma.masked_where(np.asarray(mask), plot_data)\n",
     "\n",
     "fig, ax = plt.subplots()\n",
-    "heatmap = ax.pcolor(c_mat, cmap=plt.cm.RdBu)\n",
+    "ax.pcolormesh(plot_data, cmap=plt.cm.RdBu)\n",
     "\n",
     "#Set the tick labels and center them\n",
     "ax.set_xticks(np.arange(c_mat.shape[0])+0.5, minor=False)\n",
     "ax.set_yticks(np.arange(c_mat.shape[1])+0.5, minor=False)\n",
     "ax.set_xticklabels(c_mat.index.values, minor=False)\n",
     "ax.set_yticklabels(c_mat.index.values, minor=False)\n",
+    "\n",
+    "plt.title(\"Category Correlation\")\n",
     "plt.show()\n",
+    "\n",
     "c_mat"
    ]
   },

diff --git a/ipython/python35/Lecture_MultiArmedBandit.ipynb b/ipython/python35/Lecture_MultiArmedBandit.ipynb
@@ -189,7 +189,7 @@
     "        #Add a small tie breaker\n",
     "        probs = [p + np.random.random() / 100000 for p in probs]\n",
     "        \n",
-    "        best_i = _chooseBestPolicy(probs)\n",
+    "        best_i = self._chooseBestPolicy(probs)\n",
     "        \n",
     "        if np.random.random() > epsilon:\n",
     "            return best_i\n",
@@ -233,7 +233,7 @@
     "            \n",
     "            policy = self._thompsonSamp()\n",
     "            \n",
-    "        else if self.strategy = 'UCB':\n",
+    "        elif self.strategy == 'UCB':\n",
     "            policy = self._ucbSamp()\n",
     "            \n",
     "        else:\n",
@@ -306,31 +306,36 @@
     "    return np.random.random()\n",
     "\n",
     "def plotMabSim(mabsim, colors = None):\n",
-    "\n",
+    "    \n",
     "    res = pd.DataFrame(mabsim.results_snapshot)\n",
     "    policies = list(set(res['policy'].values))\n",
     "\n",
     "    if colors == None:\n",
     "        colors = [(r(), r(), r()) for i in policies]\n",
     "    \n",
     "    fig = plt.figure(figsize = (15, 8))\n",
-    "    plt.title('MAB Sequences Over Time')\n",
+    "    plt.suptitle(\"MAB Sequences Over Time\")\n",
     "\n",
-    "    ax1 = fig.add_subplot(211)\n",
+    "    ax1 = plt.subplot(2, 1, 1)\n",
     "    for i in policies:\n",
     "        res_i = res[(res.policy == i)]\n",
     "        plt.plot(res_i['round'], res_i.betaProbs, color = colors[i], markersize = 4)\n",
-    "        ax1.set_ylim([0.05, 0.55])\n",
     "        plt.plot(res_i['round'], mabsim.p_truths[i] * np.ones(len(res_i['round'])), 'g--', markersize = 2)\n",
+    "        \n",
+    "    ax1.set_ylim([0.05, 0.55])\n",
+    "    ax1.set_xlabel('Round')\n",
     "    \n",
-    "    ax2 = fig.add_subplot(212)\n",
+    "    ax2 = plt.subplot(2, 1, 2)\n",
+    "\n",
     "    for i in policies:\n",
     "        res_i = res[(res.policy == i)]\n",
     "        plt.plot(res_i['round'], res_i.draws, color = colors[i])\n",
     "        \n",
     "    ax2.set_xlabel('Round')\n",
     "    ax2.set_ylabel('Number Draws')\n",
-    "    ax1.set_ylabel('MAP Prob Estimate')\n"
+    "    ax1.set_ylabel('MAP Prob Estimate')\n",
+    "    \n",
+    "    plt.subplots_adjust(top=0.95)"
    ]
   },
   {

diff --git a/ipython/python35/Lecture_PandasIntro_3.ipynb b/ipython/python35/Lecture_PandasIntro_3.ipynb
@@ -172,7 +172,7 @@
     }
    ],
    "source": [
-    "#To retrieve a row, you can index it like a list, or use the actual row index name using the .ix method\n",
+    "#To retrieve a row, you can index it like a list, or use the actual row index name using the .iloc method\n",
     "frame[1:2], frame.iloc[1]"
    ]
   },

diff --git a/ipython/python35/Lecture_Regularization_3.ipynb b/ipython/python35/Lecture_Regularization_3.ipynb
@@ -290,10 +290,10 @@
     "L1 = {}\n",
     "\n",
     "for i in np.arange(-5, 5, 0.5):\n",
-    "    LR2 = linear_model.LogisticRegression(C=10**i, penalty = 'l2')\n",
+    "    LR2 = linear_model.LogisticRegression(C=10**i, penalty = 'l2', solver = 'liblinear')\n",
     "    LR2.fit(X_train, Y_train)\n",
     "    L2[i] = LR2.coef_[0]\n",
-    "    LR1 = linear_model.LogisticRegression(C=10**i, penalty = 'l1')\n",
+    "    LR1 = linear_model.LogisticRegression(C=10**i, penalty = 'l1', solver = 'liblinear')\n",
     "    LR1.fit(X_train, Y_train)\n",
     "    L1[i] = LR1.coef_[0]\n",
     "\n",
@@ -506,7 +506,7 @@
     "        \n",
     "        for c in cs:\n",
     "            for norm in [1,2]:\n",
-    "                lr = linear_model.LogisticRegression(C=c, penalty='l{}'.format(norm))\n",
+    "                lr = linear_model.LogisticRegression(C=c, penalty='l{}'.format(norm), solver = 'liblinear')\n",
     "                lr.fit(X_tr_f,Y_tr_f)\n",
     "                met = roc_auc_score(Y_va_f, lr.predict_proba(X_va_f)[:,1])\n",
     "\n",

diff --git a/ipython/python35/Lecture_TextMining_3.ipynb b/ipython/python35/Lecture_TextMining_3.ipynb
@@ -511,7 +511,7 @@
     "model = LogisticRegression()\n",
     "model.fit(X_train_binary, Y_train)\n",
     "\n",
-    "print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(model.predict(X_test_binary), Y_test), 3)}')"
+    "print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_binary)[:,1]), 4)}')"
    ]
   },
   {
@@ -586,7 +586,7 @@
     "model = LogisticRegression(max_iter=1500)\n",
     "model.fit(X_train_counts, Y_train)\n",
     "\n",
-    "print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(model.predict(X_test_counts), Y_test), 3)}')"
+    "print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_counts)[:,1]), 4)}')"
    ]
   },
   {
@@ -661,7 +661,7 @@
     "model = LogisticRegression()\n",
     "model.fit(X_train_tfidf, Y_train)\n",
     "\n",
-    "print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(model.predict(X_test_counts), Y_test), 3)}')"
+    "print(f'Area under the ROC curve on the test data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_tfidf)[:,1]), 4)}')"
    ]
   },
   {
@@ -798,7 +798,7 @@
     }
    ],
    "source": [
-    "print(f'AUC on the count data = {round(metrics.roc_auc_score(model.predict(X_test_tfidf), Y_test), 3)}')"
+    "print(f'AUC on the tfidf data = {round(metrics.roc_auc_score(Y_test, model.predict_proba(X_test_tfidf)[:,1]), 4)}')"
    ]
   },
   {