Skip to content

Commit

Permalink
Data-integration file
Browse files Browse the repository at this point in the history
  • Loading branch information
whopriyam committed Jul 13, 2020
1 parent 870f708 commit a0c4ec9
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 0 deletions.
Binary file modified .DS_Store
Binary file not shown.
Binary file modified Code/.DS_Store
Binary file not shown.
338 changes: 338 additions & 0 deletions Code/dataset_integration.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,338 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"data = pd.read_csv(\"datasetv3.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>tweet_id</th>\n",
" <th>text</th>\n",
" <th>missing_text</th>\n",
" <th>Text_Only_Informative</th>\n",
" <th>Image_Only_Informative</th>\n",
" <th>Directed_Hate</th>\n",
" <th>Generalized_Hate</th>\n",
" <th>Sarcasm</th>\n",
" <th>Allegation</th>\n",
" <th>Justification</th>\n",
" <th>Refutation</th>\n",
" <th>Support</th>\n",
" <th>Oppose</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1.052240e+18</td>\n",
" <td>New post (Domestic Violence Awareness Hasn't C...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>1.052210e+18</td>\n",
" <td>Domestic Violence Awareness Hasn’t Caught Up W...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1.052180e+18</td>\n",
" <td>Mother Nature’s #MeToo https://t.co/nN3hbEHz85\\n</td>\n",
" <td>0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1.052160e+18</td>\n",
" <td>ption - no:2\\n</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>1.052100e+18</td>\n",
" <td>It is 'high time' #MeToo named and shamed men ...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>1.052050e+18</td>\n",
" <td>“There's this idea that persistence involves s...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>1.052010e+18</td>\n",
" <td>ption - no:2\\n</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>7</td>\n",
" <td>7</td>\n",
" <td>1.051990e+18</td>\n",
" <td>Meredith’s #MeToo moment on #TheOffice https:/...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>1.051980e+18</td>\n",
" <td>Interesting analysis by @TheEconomist about th...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <td>9</td>\n",
" <td>9</td>\n",
" <td>1.051940e+18</td>\n",
" <td>What a shock hey, Old Bill is in the clear &amp;am...</td>\n",
" <td>0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID tweet_id text \\\n",
"0 0 1.052240e+18 New post (Domestic Violence Awareness Hasn't C... \n",
"1 1 1.052210e+18 Domestic Violence Awareness Hasn’t Caught Up W... \n",
"2 2 1.052180e+18 Mother Nature’s #MeToo https://t.co/nN3hbEHz85\\n \n",
"3 3 1.052160e+18 ption - no:2\\n \n",
"4 4 1.052100e+18 It is 'high time' #MeToo named and shamed men ... \n",
"5 5 1.052050e+18 “There's this idea that persistence involves s... \n",
"6 6 1.052010e+18 ption - no:2\\n \n",
"7 7 1.051990e+18 Meredith’s #MeToo moment on #TheOffice https:/... \n",
"8 8 1.051980e+18 Interesting analysis by @TheEconomist about th... \n",
"9 9 1.051940e+18 What a shock hey, Old Bill is in the clear &am... \n",
"\n",
" missing_text Text_Only_Informative Image_Only_Informative Directed_Hate \\\n",
"0 0 1.0 1.0 0.0 \n",
"1 0 1.0 1.0 0.0 \n",
"2 0 0.0 1.0 0.0 \n",
"3 0 1.0 0.0 1.0 \n",
"4 0 1.0 1.0 0.0 \n",
"5 0 1.0 1.0 0.0 \n",
"6 0 1.0 1.0 0.0 \n",
"7 0 1.0 0.0 0.0 \n",
"8 0 1.0 1.0 0.0 \n",
"9 0 1.0 1.0 1.0 \n",
"\n",
" Generalized_Hate Sarcasm Allegation Justification Refutation Support \\\n",
"0 0.0 0.0 0.0 1.0 0.0 1.0 \n",
"1 0.0 0.0 0.0 0.0 0.0 1.0 \n",
"2 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 1.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 1.0 0.0 0.0 1.0 \n",
"5 0.0 0.0 0.0 1.0 0.0 1.0 \n",
"6 0.0 0.0 1.0 1.0 0.0 0.0 \n",
"7 0.0 0.0 0.0 0.0 0.0 0.0 \n",
"8 0.0 0.0 0.0 1.0 0.0 1.0 \n",
"9 0.0 0.0 1.0 1.0 0.0 0.0 \n",
"\n",
" Oppose \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 1.0 \n",
"4 0.0 \n",
"5 0.0 \n",
"6 1.0 \n",
"7 0.0 \n",
"8 0.0 \n",
"9 0.0 "
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data2 = pd.read_csv(\"MeTooMMD_train.csv\")\n",
"data[\"Text_Only_Informative\"] = data2[\"Text_Only_Informative\"]\n",
"data[\"Image_Only_Informative\"] = data2[\"Image_Only_Informative\"]\n",
"data[\"Directed_Hate\"] = data2[\"Directed_Hate\"]\n",
"data[\"Generalized_Hate\"] = data2[\"Generalized_Hate\"]\n",
"data[\"Sarcasm\"] = data2[\"Sarcasm\"]\n",
"data[\"Allegation\"] = data2[\"Allegation\"]\n",
"data[\"Justification\"] = data2[\"Justification\"]\n",
"data[\"Refutation\"] = data2[\"Refutation\"]\n",
"data[\"Support\"] = data2[\"Support\"]\n",
"data[\"Oppose\"] = data2[\"Oppose\"]\n",
"data.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
"data.to_csv('final_dataset.csv')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.7"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Binary file added Shashank Data/.DS_Store
Binary file not shown.
Binary file added Shashank Data/Data/.DS_Store
Binary file not shown.

0 comments on commit a0c4ec9

Please sign in to comment.