forked from achingachris/datasciencelearninghub
-
Notifications
You must be signed in to change notification settings - Fork 0
/
financialdataset.py
82 lines (55 loc) · 1.57 KB
/
financialdataset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# -*- coding: utf-8 -*-
"""financialDataSet.ipynb
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/drive/1e7JJ8Awj36KmsM0QYTBMcdhft_XMJzBM
"""
# Import Libraries we use for analysis
import pandas as pd
#add a document
myDataSet = pd.read_csv('https://bit.ly/FinancialDataset')
#view observations
myDataSet.head()
#check more info
myDataSet.tail(10)
myDataSet.info()
#information on datamy
myDataSet.describe()
#summary of statistics
myDataSet.shape
myDataSet.columns
myDataSet.year.unique()
myDataSet.marital_status.unique()
myDataSet["Cell Phone Access"].unique()
myDataSet["Type of Job"].unique()
myDataSet.gender_of_respondent.unique()
myDataSet.columns
#viewing unique values
myDataSet.uniqueid.unique()
myDataSet.dtypes
myDataSet["Level of Educuation"].unique()
#import libraries for graphs
import matplotlib.pyplot as plt
import seaborn as sns
myDataSet.dropna(inplace = True)
#plotting a histogram using matplotlib and seaborn
#create a figure and give dimensions
plt.figure(figsize = (8, 5))
sns.distplot(myDataSet['Respondent Age'], color='green')
#title to the graph
plt.title('Distribution for Respondent Age')
plt.xlabel('Respondent')
plt.ylabel('Frequency')
plt.show()
myDataSet['Respondent Age'].mean()
#another one lol
#create afigure and give dimensions
plt.figure(figsize = (12, 8))
sns.countplot(myDataSet['country'])
plt.title('Country Chart', color = 'green', )
plt.xlabel('Country')
plt.ylabel('Frequency')
plt.show()
#another one
plt.figure(figsize = (12, 8))
sns.countplot(myDataSet['marital_status'])