Skip to content

Commit

Permalink
Update reading with no na_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
CrafterKolyan committed Mar 19, 2020
1 parent 72d2d95 commit fa4b754
Showing 1 changed file with 16 additions and 18 deletions.
34 changes: 16 additions & 18 deletions Assignment.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
"source": [
"columns = [\"text\", \"parent_text\", \"score\"]\n",
"df = pd.concat([\n",
" pd.read_csv(\"comments_positive.csv\", usecols=columns),\n",
" pd.read_csv(\"comments_negative.csv\", usecols=columns)\n",
" pd.read_csv(\"comments_positive.csv\", usecols=columns, na_filter=False),\n",
" pd.read_csv(\"comments_negative.csv\", usecols=columns, na_filter=False)\n",
"], ignore_index=True)"
]
},
Expand All @@ -33,15 +33,6 @@
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"df.dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"y = df['score']\n",
"df.drop(columns='score', inplace=True)\n",
Expand All @@ -50,20 +41,27 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Train shape: (3799931, 2)\n",
"Test shape: (199997, 2)\n"
"Train shape: (3800000, 2)\n",
"Test shape: (200000, 2)\n"
]
}
],
"source": [
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.05, random_state=SEED)\n",
"\n",
"# To be sure we don't use indices to predict something\n",
"X_train.reset_index(drop=True, inplace=True)\n",
"X_test.reset_index(drop=True, inplace=True)\n",
"y_train.reset_index(drop=True, inplace=True)\n",
"y_test.reset_index(drop=True, inplace=True)\n",
"\n",
"print(\"Train shape: {}\".format(X_train.shape))\n",
"print(\"Test shape: {}\".format(X_test.shape))"
]
Expand All @@ -77,7 +75,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -93,16 +91,16 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"44283.49363114471"
"44239.11158943143"
]
},
"execution_count": 12,
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit fa4b754

Please sign in to comment.