-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathCOVID_data_exploratory_analysis.sql
259 lines (219 loc) · 7.06 KB
/
COVID_data_exploratory_analysis.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
--preview of a table
select * from cdeath;
--altering data type
ALTER TABLE cdeath
ALTER COLUMN population type bigint
USING population::bigint;
-- united states %death=total cases/total deaths;
select location,date, total_deaths,total_cases,
(total_deaths/total_cases*100::float) as percent_death
from cdeath
where location ilike '%states'
order by date
;
--percent infection from the total population
select location,date, population,total_cases,
(total_cases/population*100::float) as percent_infection
from cdeath
where location ilike '%states'
order by date
;
-- sum of deaths in United Sates per year
select extract(year from date) as yr, location, sum(total_deaths) as yearly_death,
sum(total_cases) as yearly_cases,(sum(total_deaths)/sum(total_cases)*100::float) as percent_death
from cdeath
where location ilike '% States'
group by yr,location
--order by yr,location
;
-- sum of deaths in United Sates for year 2022 on monthly basis
select extract(year from date) as yr, extract(month from date) as mon, location,
sum(total_deaths) as monthly_death, sum(total_cases) as monthly_cases,
(sum(total_deaths)/sum(total_cases)*100::float) as percent_death
from cdeath
where location ilike '% States' and extract(year from date)='2022'
group by yr, mon, location
--order by yr,location
;
-- sum of deaths in United Sates on monthly basis
select extract(year from date) as yr, extract(month from date) as mon, location,
sum(total_deaths) as monthly_death, sum(total_cases) as monthly_cases,
(sum(total_deaths)/sum(total_cases)*100::float) as percent_death
from cdeath
where location ilike '% States'
group by yr, mon, location
--order by yr,location
;
--infection rate per location
select location, population, total_cases,
(total_cases/population)*100::float as inf_rate
from cdeath;
-----infection rate aggregated per country
select location, avg(population) as pop, avg(total_cases) as avg_cases,
--(total_cases/population)*100::float as inf_rate
avg(total_cases/population)*100::float as inf_rate
from cdeath
group by location
order by location
;
-----death rate aggregated per country
select location,avg(total_cases) as tot_cases,avg(total_deaths),
avg(total_deaths)/avg(total_cases)*100::float as death_rate
from cdeath
where location ilike '%States'
group by location
--order by location
;
--highest infection rate location vs pop
select location, population, max(total_cases) as max_case,
max(total_cases/population)*100::float as inf_rate
from cdeath
--where location ilike '%states'
group by location, population
--order by inf_rate desc
;
--highest death rate location
select location, population, max(total_deaths) as max_death
--max(total_deaths/total_cases)*100::float as death_rate
from cdeath
where continent is not null or total_deaths is not null
group by location, population
order by location
--max_death desc
;
--stat by continent
--1. total death by contineny
select location, max(total_cases) as max_inf
--avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null and location in ('Low income','High income')
group by location
order by location
;
-- 2. death rate by continent
select location, max(total_cases) as max_inf,
avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null
group by location
order by location
;
-- 2. death rate by continent broken down by date
select location, extract(year from date) as yr, max(total_cases) as max_inf,
avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null
group by location, yr
order by location
;
--Global infection rate
select location, avg(population) as total_pop,max(total_cases) as max_inf, avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null and location='World'
group by location
--order by location
;
--Global infection rate by year
select location, extract(year from date) as yr,
max(total_cases) as max_inf, avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null and location='World'
group by location,yr
order by yr
;
--global total cases
select location, sum(new_cases) as total_new_cases,
avg(population) as total_pop
from cdeath
where continent is null and location='World'
group by location
--order by location
;
--join the two tables
--select * from cvax;
select *
from cdeath
join cvax
on cdeath.location=cvax.location
and cdeath.date=cvax.date
;
--looking at population vs vaxs
select cdeath.continent,cdeath.location, cdeath.date, cdeath.population,
total_vaccinations, new_vaccinations,
sum(cvax.new_vaccinations) over (partition by cdeath.location order by cdeath.location,cdeath.date) as cummulative_vax,
cummulative_vax/cdeath.population as vax_rate
from cdeath
join cvax
on cdeath.location=cvax.location
and cdeath.date=cvax.date
--where cdeath.location='Albenia'
;
---creating a new table from results???
create table table1
--with new_tab(continent,location, date, population,total_vaccinations, new_vaccinations,cummulative_vax)
as(
select cdeath.continent,cdeath.location, cdeath.date, cdeath.population,
total_vaccinations, new_vaccinations,
sum(cvax.new_vaccinations) over (partition by cdeath.location order by cdeath.location,cdeath.date) as cummulative_vax
from cdeath
join cvax
on cdeath.location=cvax.location
and cdeath.date=cvax.date
)
;
select *, cummulative_vax/population as vax_rate
from table1
--new_tab
;
select * from table1;
--creating view to store data for later visulizations
create view percentpopulationvaccinated as
(select cdeath.continent,cdeath.location, cdeath.date, cdeath.population,
total_vaccinations, new_vaccinations,
sum(cvax.new_vaccinations) over (partition by cdeath.location order by cdeath.location,cdeath.date) as cummulative_vax
from cdeath
join cvax
on cdeath.location=cvax.location
and cdeath.date=cvax.date
)
;
--Global infection rate by year
create view GlobalInfectionRatebyYear as
(select location, extract(year from date) as yr,
max(total_cases) as max_inf, avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null and location='World'
group by location,yr
order by yr)
;
--infection rate by continent broken down by year
create view continental_infection_rate_per_year as
(
select location, extract(year from date) as yr, max(total_cases) as max_inf,
avg(total_cases/population)*100 as inf_rate
from cdeath
where continent is null and location in ('Africa','Asia','Europe','North America','Oceania','South America')
group by location, yr
order by location
)
;
-----death rate aggregated per country
create view deathratepercountry
as
(select location,avg(total_cases) as tot_cases,avg(total_deaths),
avg(total_deaths)/avg(total_cases)*100::float as death_rate
from cdeath
where continent is not null --and location not in ('Africa','Asia','Europe','North America','Oceania','South America')
--where location ilike '%States'
group by location
--order by location
)
;
--total death count per continent
create view totaldeathcount as
select location, sum(new_deaths)as totaldeathcount
from cdeath
where continent is null and location in ('Africa','Asia','Europe','North America','Oceania','South America')
group by location
order by totaldeathcount desc
;