From 70a5483d99c0b91e9bef004af57ae5004f28b501 Mon Sep 17 00:00:00 2001 From: Michael Waskom Date: Sat, 30 May 2020 17:07:18 -0400 Subject: [PATCH] Add geyser dataset --- geyser.csv | 273 ++++++++++++++++++++++++++++++++++++++++++++++ process/geyser.py | 15 +++ raw/geyser.csv | 273 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 561 insertions(+) create mode 100644 geyser.csv create mode 100644 process/geyser.py create mode 100644 raw/geyser.csv diff --git a/geyser.csv b/geyser.csv new file mode 100644 index 0000000..bc7cafc --- /dev/null +++ b/geyser.csv @@ -0,0 +1,273 @@ +duration,waiting,kind +3.6,79,long +1.8,54,short +3.333,74,long +2.283,62,short +4.533,85,long +2.883,55,short +4.7,88,long +3.6,85,long +1.95,51,short +4.35,85,long +1.8330000000000002,54,short +3.917,84,long +4.2,78,long +1.75,47,short +4.7,83,long +2.167,52,short +1.75,62,short +4.8,84,long +1.6,52,short +4.25,79,long +1.8,51,short +1.75,47,short +3.45,78,long +3.0669999999999997,69,long +4.533,74,long +3.6,83,long +1.9669999999999999,55,short +4.083,76,long +3.85,78,long +4.433,79,long +4.3,73,long +4.467,77,long +3.367,66,short +4.033,80,long +3.833,74,long +2.017,52,short +1.867,48,short +4.833,80,long +1.8330000000000002,59,short +4.783,90,long +4.35,80,long +1.883,58,short +4.567,84,long +1.75,58,short +4.533,73,long +3.3169999999999997,83,long +3.833,64,short +2.1,53,short +4.633,82,long +2.0,59,short +4.8,75,long +4.716,90,long +1.8330000000000002,54,short +4.833,80,long +1.733,54,short +4.883,83,long +3.717,71,long +1.6669999999999998,64,short +4.567,77,long +4.317,81,long +2.233,59,short +4.5,84,long +1.75,48,short +4.8,82,long +1.817,60,short +4.4,92,long +4.167,78,long +4.7,78,long +2.0669999999999997,65,short +4.7,73,long +4.033,82,long +1.9669999999999999,56,short +4.5,79,long +4.0,71,long +1.983,62,short +5.067,76,long +2.017,60,short +4.567,78,long +3.883,76,long +3.6,83,long +4.133,75,long +4.333,82,long +4.1,70,long +2.633,65,short +4.067,73,long +4.933,88,long +3.95,76,long +4.5169999999999995,80,long +2.167,48,short +4.0,86,long +2.2,60,short +4.333,90,long +1.867,50,short +4.817,78,long +1.8330000000000002,63,short +4.3,72,long +4.667,84,long +3.75,75,long +1.867,51,short +4.9,82,long +2.483,62,short +4.367,88,long +2.1,49,short +4.5,83,long +4.05,81,long +1.867,47,short +4.7,84,long +1.7830000000000001,52,short +4.85,86,long +3.6830000000000003,81,long +4.7330000000000005,75,long +2.3,59,short +4.9,89,long +4.417,79,long +1.7,59,short +4.633,81,long +2.3169999999999997,50,short +4.6,85,long +1.817,59,short +4.417,87,long +2.617,53,short +4.067,69,long +4.25,77,long +1.9669999999999999,56,short +4.6,88,long +3.767,81,long +1.9169999999999998,45,short +4.5,82,long +2.267,55,short +4.65,90,long +1.867,45,short +4.167,83,long +2.8,56,short +4.333,89,long +1.8330000000000002,46,short +4.383,82,long +1.883,51,short +4.933,86,long +2.033,53,short +3.733,79,long +4.2330000000000005,81,long +2.233,60,short +4.533,82,long +4.817,77,long +4.333,76,long +1.983,59,short +4.633,80,long +2.017,49,short +5.1,96,long +1.8,53,short +5.033,77,long +4.0,77,long +2.4,65,short +4.6,81,long +3.5669999999999997,71,long +4.0,70,long +4.5,81,long +4.083,93,long +1.8,53,short +3.967,89,long +2.2,45,short +4.15,86,long +2.0,58,short +3.833,78,long +3.5,66,short +4.583,76,long +2.367,63,short +5.0,88,long +1.933,52,short +4.617,93,long +1.9169999999999998,49,short +2.083,57,short +4.583,77,long +3.333,68,long +4.167,81,long +4.333,81,long +4.5,73,long +2.417,50,short +4.0,85,long +4.167,74,long +1.883,55,short +4.583,77,long +4.25,83,long +3.767,83,long +2.033,51,short +4.433,78,long +4.083,84,long +1.8330000000000002,46,short +4.417,83,long +2.1830000000000003,55,short +4.8,81,long +1.8330000000000002,57,short +4.8,76,long +4.1,84,long +3.966,77,long +4.2330000000000005,81,long +3.5,87,long +4.3660000000000005,77,long +2.25,51,short +4.667,78,long +2.1,60,short +4.35,82,long +4.133,91,long +1.867,53,short +4.6,78,long +1.7830000000000001,46,short +4.367,77,long +3.85,84,long +1.933,49,short +4.5,83,long +2.383,71,long +4.7,80,long +1.867,49,short +3.833,75,long +3.417,64,short +4.2330000000000005,76,long +2.4,53,short +4.8,94,long +2.0,55,short +4.15,76,long +1.867,50,short +4.2669999999999995,82,long +1.75,54,short +4.4830000000000005,75,long +4.0,78,long +4.117,79,long +4.083,78,long +4.2669999999999995,78,long +3.917,70,long +4.55,79,long +4.083,70,long +2.417,54,short +4.183,86,long +2.217,50,short +4.45,90,long +1.883,54,short +1.85,54,short +4.283,77,long +3.95,79,long +2.333,64,short +4.15,75,long +2.35,47,short +4.933,86,long +2.9,63,short +4.583,85,long +3.833,82,long +2.083,57,short +4.367,82,long +2.133,67,short +4.35,74,long +2.2,54,short +4.45,83,long +3.5669999999999997,73,long +4.5,73,long +4.15,88,long +3.8169999999999997,80,long +3.917,71,long +4.45,83,long +2.0,56,short +4.283,79,long +4.7669999999999995,78,long +4.533,84,long +1.85,58,short +4.25,83,long +1.983,43,short +2.25,60,short +4.75,75,long +4.117,81,long +2.15,46,short +4.417,90,long +1.817,46,short +4.467,74,long diff --git a/process/geyser.py b/process/geyser.py new file mode 100644 index 0000000..3aaf439 --- /dev/null +++ b/process/geyser.py @@ -0,0 +1,15 @@ +import numpy as np +import pandas as pd +from scipy.cluster.vq import kmeans2 + +if __name__ == "__main__": + + np.random.seed(0) + + df = pd.read_csv("raw/geyser.csv") + df.columns = ["duration", "waiting"] + + _, z = kmeans2(df, 2) + df["kind"] = np.where(z, "long", "short") + + df.to_csv("geyser.csv", index=False) diff --git a/raw/geyser.csv b/raw/geyser.csv new file mode 100644 index 0000000..2f23dc9 --- /dev/null +++ b/raw/geyser.csv @@ -0,0 +1,273 @@ +"eruptions","waiting" +3.6,79 +1.8,54 +3.333,74 +2.283,62 +4.533,85 +2.883,55 +4.7,88 +3.6,85 +1.95,51 +4.35,85 +1.833,54 +3.917,84 +4.2,78 +1.75,47 +4.7,83 +2.167,52 +1.75,62 +4.8,84 +1.6,52 +4.25,79 +1.8,51 +1.75,47 +3.45,78 +3.067,69 +4.533,74 +3.6,83 +1.967,55 +4.083,76 +3.85,78 +4.433,79 +4.3,73 +4.467,77 +3.367,66 +4.033,80 +3.833,74 +2.017,52 +1.867,48 +4.833,80 +1.833,59 +4.783,90 +4.35,80 +1.883,58 +4.567,84 +1.75,58 +4.533,73 +3.317,83 +3.833,64 +2.1,53 +4.633,82 +2,59 +4.8,75 +4.716,90 +1.833,54 +4.833,80 +1.733,54 +4.883,83 +3.717,71 +1.667,64 +4.567,77 +4.317,81 +2.233,59 +4.5,84 +1.75,48 +4.8,82 +1.817,60 +4.4,92 +4.167,78 +4.7,78 +2.067,65 +4.7,73 +4.033,82 +1.967,56 +4.5,79 +4,71 +1.983,62 +5.067,76 +2.017,60 +4.567,78 +3.883,76 +3.6,83 +4.133,75 +4.333,82 +4.1,70 +2.633,65 +4.067,73 +4.933,88 +3.95,76 +4.517,80 +2.167,48 +4,86 +2.2,60 +4.333,90 +1.867,50 +4.817,78 +1.833,63 +4.3,72 +4.667,84 +3.75,75 +1.867,51 +4.9,82 +2.483,62 +4.367,88 +2.1,49 +4.5,83 +4.05,81 +1.867,47 +4.7,84 +1.783,52 +4.85,86 +3.683,81 +4.733,75 +2.3,59 +4.9,89 +4.417,79 +1.7,59 +4.633,81 +2.317,50 +4.6,85 +1.817,59 +4.417,87 +2.617,53 +4.067,69 +4.25,77 +1.967,56 +4.6,88 +3.767,81 +1.917,45 +4.5,82 +2.267,55 +4.65,90 +1.867,45 +4.167,83 +2.8,56 +4.333,89 +1.833,46 +4.383,82 +1.883,51 +4.933,86 +2.033,53 +3.733,79 +4.233,81 +2.233,60 +4.533,82 +4.817,77 +4.333,76 +1.983,59 +4.633,80 +2.017,49 +5.1,96 +1.8,53 +5.033,77 +4,77 +2.4,65 +4.6,81 +3.567,71 +4,70 +4.5,81 +4.083,93 +1.8,53 +3.967,89 +2.2,45 +4.15,86 +2,58 +3.833,78 +3.5,66 +4.583,76 +2.367,63 +5,88 +1.933,52 +4.617,93 +1.917,49 +2.083,57 +4.583,77 +3.333,68 +4.167,81 +4.333,81 +4.5,73 +2.417,50 +4,85 +4.167,74 +1.883,55 +4.583,77 +4.25,83 +3.767,83 +2.033,51 +4.433,78 +4.083,84 +1.833,46 +4.417,83 +2.183,55 +4.8,81 +1.833,57 +4.8,76 +4.1,84 +3.966,77 +4.233,81 +3.5,87 +4.366,77 +2.25,51 +4.667,78 +2.1,60 +4.35,82 +4.133,91 +1.867,53 +4.6,78 +1.783,46 +4.367,77 +3.85,84 +1.933,49 +4.5,83 +2.383,71 +4.7,80 +1.867,49 +3.833,75 +3.417,64 +4.233,76 +2.4,53 +4.8,94 +2,55 +4.15,76 +1.867,50 +4.267,82 +1.75,54 +4.483,75 +4,78 +4.117,79 +4.083,78 +4.267,78 +3.917,70 +4.55,79 +4.083,70 +2.417,54 +4.183,86 +2.217,50 +4.45,90 +1.883,54 +1.85,54 +4.283,77 +3.95,79 +2.333,64 +4.15,75 +2.35,47 +4.933,86 +2.9,63 +4.583,85 +3.833,82 +2.083,57 +4.367,82 +2.133,67 +4.35,74 +2.2,54 +4.45,83 +3.567,73 +4.5,73 +4.15,88 +3.817,80 +3.917,71 +4.45,83 +2,56 +4.283,79 +4.767,78 +4.533,84 +1.85,58 +4.25,83 +1.983,43 +2.25,60 +4.75,75 +4.117,81 +2.15,46 +4.417,90 +1.817,46 +4.467,74