Add files via upload

ElenaCasiraghi · Oct 22, 2020 · dcdb2f5 · dcdb2f5
1 parent 9a438d5
commit dcdb2f5
Showing 1 changed file with 195 additions and 0 deletions.
diff --git a/Features rilevanti con RFE.ipynb b/Features rilevanti con RFE.ipynb
@@ -0,0 +1,195 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "from sklearn import datasets\n",
+    "import matplotlib.pyplot as plt\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "from sklearn.svm import SVR\n",
+    "from sklearn.preprocessing import normalize\n",
+    "from sklearn.metrics import accuracy_score\n",
+    "from sklearn.model_selection import StratifiedKFold\n",
+    "from sklearn.feature_selection import RFE\n",
+    "    "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "40001\n",
+      "100001\n",
+      "1\n",
+      "40002\n",
+      "100002\n",
+      "2\n",
+      "40003\n",
+      "100003\n",
+      "3\n",
+      "40004\n",
+      "100004\n",
+      "4\n",
+      "40005\n",
+      "100005\n",
+      "5\n",
+      "40006\n",
+      "100006\n",
+      "6\n",
+      "40007\n",
+      "100007\n",
+      "7\n",
+      "40008\n",
+      "100008\n",
+      "8\n",
+      "40009\n",
+      "100009\n",
+      "9\n",
+      "40010\n",
+      "100010\n",
+      "10\n",
+      "\n",
+      "CAT.Fever          10\n",
+      "CAT.Cough          10\n",
+      "CAT.Dyspnea          0\n",
+      "CAT.IR          0\n",
+      "CAT.Myalgias          0\n",
+      "CAT.Other          10\n",
+      "CAT.Syncope          10\n",
+      "CAT.Asthenia          10\n",
+      "CAT.Vomiting.Nausea          0\n",
+      "CAT.Diarrhea          10\n",
+      "CAT.Headache          0\n",
+      "CAT.Pharingeal.pain          10\n",
+      "INT.No.Symptoms          10\n",
+      "CAT.Pneumo.asthma          10\n",
+      "CAT.Pneumo.BPCO          0\n",
+      "CAT.Neoplasia.last.5.years          10\n",
+      "CAT.Smoke          0\n",
+      "CAT.Arterial.hypertension          0\n",
+      "CAT.Cardiovascular.pathologies          10\n",
+      "CAT.Diabetes          10\n",
+      "CAT.Obesity          10\n",
+      "CAT.Celebral.stroke          10\n",
+      "INT.No.Comorbidities          10\n",
+      "CAT.Sex          0\n",
+      "INT.Age          0\n",
+      "INT.Symptoms.No.days          0\n",
+      "INT.usa.radio.score.MAX          10\n",
+      "INT.radio.SCORE          0\n",
+      "NUM.GEO.extent.score          0\n",
+      "NUM.OPC.extent.score          10\n",
+      "INT.PaO2.PF          0\n",
+      "INT.SpO2.in.FA          10\n",
+      "INT.ALT          0\n",
+      "INT.Platelets          0\n",
+      "NUM.White.blood.cells          0\n",
+      "NUM.Red.blood.cells          10\n",
+      "NUM.Lymphocyte          0\n",
+      "NUM.perc.Lymphocyte          0\n",
+      "NUM.CRP          0\n",
+      "NUM.Haemoglobin          10\n",
+      "NUM.Haematocrit          0\n"
+     ]
+    }
+   ],
+   "source": [
+    "#carico il dataset\n",
+    "data = pd.read_csv(r'C:\\Users\\Utente\\anaconda3\\Lib\\site-packages\\pandas\\io\\data_covnet_score-imputed_missRF_increasing_1.txt')\n",
+    "\n",
+    "#creo un array che mi serve per salvare le informazioni delle features di volta in volta.\n",
+    "parzial_features=list()\n",
+    "\n",
+    "#Seleziono tutte le colonne tranne la prima.\n",
+    "features = [f for f in data.columns if f not in ['LABEL']]\n",
+    "X = data[features].values\n",
+    "y = data['LABEL'].values.ravel()\n",
+    "\n",
+    "#creo un array monodimensionale lungo quanto \"altri\" ma solo di 0. \n",
+    "y_pred=y-y\n",
+    "\n",
+    "#classificatore\n",
+    "estimator = SVR(kernel=\"linear\")\n",
+    "selector = RFE(estimator, n_features_to_select=20, step=1)\n",
+    "#le tre variabili sotto sono create per capire a che punto è l'esecuzione del programma, in quanto molto lento.\n",
+    "a=0\n",
+    "b=100000\n",
+    "c=40000\n",
+    "#crea un oggetto pronto ad operare: quando gli arriva in input qualcosa lo divide in 10 pezzettini con la stessa proporzione\n",
+    "skf = StratifiedKFold(n_splits=10)\n",
+    "\n",
+    "for train_index, test_index in skf.split(X, y):\n",
+    "    c=c+1\n",
+    "    print(c)\n",
+    "\n",
+    "    X_train, X_test = X[train_index,:], X[test_index,:]\n",
+    "    y_train, y_test = y[train_index], y[test_index]\n",
+    "    \n",
+    "    selector=selector.fit(X, y)\n",
+    "    \n",
+    "    b=b+1\n",
+    "    print(b)\n",
+    "    \n",
+    "    #salvo tutte le volte i risultati ottenuti in un array.\n",
+    "    indexes = np.where(selector.support_ == True)\n",
+    "    for x in np.nditer(indexes):\n",
+    "        parzial_features.append(features[x])\n",
+    "    \n",
+    "    a=a+1\n",
+    "    print(a)\n",
+    "\n",
+    "\n",
+    "    \n",
+    "    #clf = rbf_svm.fit(X_train, y_train)\n",
+    "    #y_pred[test_index] = clf.predict(X_test)\n",
+    "    \n",
+    "    \n",
+    "    #fine ciclo for\n",
+    "#selector.support_\n",
+    "\n",
+    "print()\n",
+    "for i in features:\n",
+    "  c=parzial_features.count(i)\n",
+    "  print(i,'        ', c)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.8.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}