forked from ReinZhang233/WFH
-
Notifications
You must be signed in to change notification settings - Fork 0
/
05-results.Rmd
614 lines (505 loc) · 30.7 KB
/
05-results.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
# Results
## Part I: WFH Employees before and after COVID-19
```{r libs and df, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
library(tidyr)
library(readxl)
library(readr)
library(stringr)
library(ggplot2)
library(ggmosaic)
library(zoo)
library(RColorBrewer)
before <- read_csv("./data/before.csv")
after <- read_csv("./data/after.csv")
```
```{r,warning=F,message=F,echo=FALSE}
# using stacked bar plot to show the numbers of WFH people in each sector
before <- before[,-c(3,4)]
num_bef <- before%>%gather("Sector","Population",-c(Year,WFH_only))
wfh <- num_bef%>%filter(WFH_only==T)%>%select(Year,Sector,Population)
wfh$Year<- lubridate::ymd(wfh$Year, truncated = 2L)
ggplot(data = wfh,aes(x = Year,y = Population,fill = Sector))+
geom_bar(stat = "identity")+
scale_fill_brewer(palette="Set4")+
ggtitle("Time Series Analysis on Telework Population for different Sectors")
```
Overall, the general population of telework employees tend to increase over the last 10 years. Among all the sectors being investigated, `Service` and `Management,Business and Financial` Sector seemed contribute to the increase in work-from-home employees' population the most.
```{r,warning=F,message=F,echo=FALSE}
#create line plots representing the change in WFH employees over total employed percentages before COVID
#calculate percentage table
totals <- seq(1,21,2)
wfh <- totals+1
before_perc <- before[wfh,]/before[totals,]
before_perc <- before_perc[,-ncol(before)]
before_perc$Year <- before$Year[totals]
#calculate percentage change
prev <- seq(1:10)
later <- prev+1
perc_change <- (before_perc[later,]-before_perc[prev,])/before_perc[prev,]
row.names(perc_change) <- before$Year[seq(3,22,2)]
perc_change <- perc_change[,-1]
new_perc_change <- gather(perc_change, columnNames, change)
colnames(new_perc_change)[1]<-"Sector"
Year <- rep(seq(2010,2019,1),8)
new_perc_change <- data.frame(new_perc_change,Year = lubridate::ymd(Year, truncated = 2L))
#construct plot
ggplot(new_perc_change,aes(x = Year,y = change))+
geom_point(size=1, color="#69b3a2") +
geom_line(color="#69b3a2")+
geom_hline(yintercept=0,alpha = 0.3,color = "#69b3a2")+
ggtitle('Time Series Analysis on the Growth Rate of \nWFH population in each sector before COVID \nper Year') +
xlab("Year") +
ylab("Growth Rate") +
geom_area(fill = "#69b3a2",alpha = 0.4)+
facet_wrap(~Sector)+
theme(plot.title = element_text(size = 20, face = "bold"))
```
The plot above shows the fluctuation of growth rate with WFH population within selected Sectors over years. We can see that some Sectors, such as `Construction`, `Production`, `Office and Administration`, `Services` and `Transportation` seem to have high volatility in the changes of WFH population percentages, with fluctuation ranging between -0.5% to 1%. `Management, Business and Financial` and `Professional` Sectors seem to have less fluctuation in the change of rate.
```{r,warning=F,message=F,echo=FALSE}
#change of WFH workers proportion over years
num_bef <- before%>%gather("Sector","Population",-c(Year,WFH_only))%>%group_by(Year,WFH_only)%>%summarise(Population = sum(Population))
totalemp_bef<-num_bef%>%filter(WFH_only==F)%>%select(Year,totalPop = Population)
wfh_bef<-num_bef%>%filter(WFH_only==T)%>%select(wfhPop = Population)
rate_bef <- data.frame(totalemp_bef$Year,perc = wfh_bef$wfhPop/totalemp_bef$totalPop)
num_aft <- after%>%gather("Sector","Population",-c(Year,WFH_only))%>%group_by(Year,WFH_only)%>%summarise(Population = sum(Population))
totalemp_aft<-num_aft%>%filter(WFH_only==F)%>%select(Time = Year,totalPop = Population)
wfh_aft<-num_aft%>%filter(WFH_only==T)%>%select(wfhPop = Population)
wfh_aft$Original <- totalemp_aft$totalPop-wfh_aft$wfhPop
wfh_aft<-wfh_aft%>%gather("Status","Population",-Year)
#stack bar plot for increase of wfh population due to covid
ggplot(data = wfh_aft,aes(x = Year,y = Population,fill = Status))+
geom_bar(stat = "identity")+
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Time Series Analysis on \nthe Increase of Telework population due to COVID")
```
In general, we can see that the increase of telework population due to COVID has a greater portion among the number of all employed workers starting from May 2020 and its proportion started to decrease as time pass by. Even though we could not get the data before May 2020, this trend may suggest that at the beginning of 2020, when COVID had just begun, the ratio of COVID-lead WFH population over the total employed population would be even higher. Another trend we can see from the graph is that the total employed workers' number is gradually increasing.
## Part II: Productivity
### Labor Productivity
```{r,echo=FALSE}
library(dplyr)
# Import productivity data
business_pro <- read.csv("./data/business_productivity.csv", header=T,stringsAsFactors = F)
durable_goods_pro <- read.csv("./data/durable_goods_productivity.csv", header=T,stringsAsFactors = F)
non_durable_goods_pro <- read.csv("./data/non_durable_goods_productivity.csv", header=T,stringsAsFactors = F)
manufacturing_pro <- read.csv("./data/manufacturing_productivity.csv", header=T,stringsAsFactors = F)
nonfarm_business_pro <- read.csv("./data/nonfarm_business_productivity.csv", header=T,stringsAsFactors = F)
non_financial_corporations_pro <- read.csv("./data/non_financial_corporations_productivity.csv", header=T,stringsAsFactors = F)
```
```{r,echo=FALSE}
# Transform data and combine their rows into one productivity data frame
business_pro$Sector <- c("Business")
business_pro <- business_pro[,4:6]
nonfarm_business_pro$Sector <- c("Nonfarm Business")
nonfarm_business_pro <- nonfarm_business_pro[,4:6]
durable_goods_pro$Sector <- c("Durable Goods")
durable_goods_pro <- durable_goods_pro[,4:6]
non_durable_goods_pro$Sector <- c("Non Durable Goods")
non_durable_goods_pro <- non_durable_goods_pro[,4:6]
manufacturing_pro$Sector <- c("Manufacturing")
manufacturing_pro <- manufacturing_pro[,4:6]
non_financial_corporations_pro$Sector <- c("Non Financial Corporations")
non_financial_corporations_pro <- non_financial_corporations_pro[,4:6]
productivity <- rbind(business_pro,nonfarm_business_pro,durable_goods_pro,non_durable_goods_pro,manufacturing_pro,non_financial_corporations_pro)
```
```{r,echo=FALSE}
# Create Time series plot of productivity from 2018 to 2020
plot_df <- productivity
plot_df$Sector <- substr(plot_df$Sector,1,8)
ggplot(plot_df,aes(x = Label, y = Value,group = as.factor(Sector)))+
geom_line()+
facet_grid(as.factor(Sector)~.) +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Labor productivity from 2018 to 2020") +
xlab("Time") + ylab("Output (per hour)") +
theme(
strip.text.x = element_text(
size = 8, color = "blue"),
strip.text.y = element_text(
size = 8, color = "blue"),
strip.background = element_rect(
color="lightblue", fill="lightblue", size=3, linetype="solid"
))
```
From the time series subplots, we can see that the output has a abrupt change at 2020 first and second quarter. `Manufacturing`, `Durable Goods` and `Non-Durable Goods` sectors have a sudden decrease and other sectors had a sudden increase. The increasing number of Work From Home employees might be the reason that causes this change.
### Work hours
```{r,echo=FALSE}
# Import weekly average work hours data
business_wh <- read.csv("./data/Index_business_avghours.csv", header=T,stringsAsFactors = F)
durable_goods_wh <- read.csv("./data/Index_durable_goods_avghours.csv", header=T,stringsAsFactors = F)
non_durable_goods_wh <- read.csv("./data/Index_non_durable_goods_avghours.csv", header=T,stringsAsFactors = F)
manufacturing_wh <- read.csv("./data/Index_manufacturing_avghours.csv", header=T,stringsAsFactors = F)
nonfarm_business_wh <- read.csv("./data/Index_nonfarm_business_avghours.csv", header=T,stringsAsFactors = F)
non_financial_corporations_wh <- read.csv("./data/Index_nonfinancial_avghours.csv", header=T,stringsAsFactors = F)
```
```{r,echo=FALSE}
# Transform data and combine their rows into one work_hours data frame
business_wh$Sector <- c("Business")
business_wh <- business_wh[,4:6]
nonfarm_business_wh$Sector <- c("Nonfarm Business")
nonfarm_business_wh <- nonfarm_business_wh[,4:6]
durable_goods_wh$Sector <- c("Durable Goods")
durable_goods_wh <- durable_goods_wh[,4:6]
non_durable_goods_wh$Sector <- c("Non Durable Goods")
non_durable_goods_wh <- non_durable_goods_wh[,4:6]
manufacturing_wh$Sector <- c("Manufacturing")
manufacturing_wh <- manufacturing_wh[,4:6]
non_financial_corporations_wh$Sector <- c("Non Financial Corporations")
non_financial_corporations_wh <- non_financial_corporations_wh[,4:6]
work_hours <- rbind(business_wh,nonfarm_business_wh,durable_goods_wh,non_durable_goods_wh,manufacturing_wh,non_financial_corporations_wh)
```
```{r,echo=FALSE}
# Create Time series plot of weekly average work hours from 2018 to 2020
ggplot(data = work_hours, aes(x = Label, y = Value,group = as.factor(Sector), color = Sector)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Weekly Average Work Hours from 2018 to 2020") +
xlab("Time") + ylab("Hours Worked")
```
Then we check work hours from 2018-2020. Work hours have apparent decrease for all sectors in the first quarter of 2020. This shows an opposite trend with productivity.
### Unit labor cost
```{r,echo=FALSE}
# Import labor costs data
business_cost <- read.csv("./data/business_cost.csv", header=T,stringsAsFactors = F)
durable_goods_cost <- read.csv("./data/durable_goods_cost.csv", header=T,stringsAsFactors = F)
non_durable_goods_cost <- read.csv("./data/non_durable_goods_cost.csv", header=T,stringsAsFactors = F)
manufacturing_cost <- read.csv("./data/manufacturing_cost.csv", header=T,stringsAsFactors = F)
nonfarm_business_cost <- read.csv("./data/nonfarm_business_cost.csv", header=T,stringsAsFactors = F)
non_financial_corporations_cost <- read.csv("./data/nonfinancial_cost.csv", header=T,stringsAsFactors = F)
```
```{r,echo=FALSE}
# Transform data and combine their rows into one costs data frame
business_cost$Sector <- c("Business")
business_cost <- business_cost[,4:6]
nonfarm_business_cost$Sector <- c("Nonfarm Business")
nonfarm_business_cost <- nonfarm_business_cost[,4:6]
durable_goods_cost$Sector <- c("Durable Goods")
durable_goods_cost <- durable_goods_cost[,4:6]
non_durable_goods_cost$Sector <- c("Non Durable Goods")
non_durable_goods_cost <- non_durable_goods_cost[,4:6]
manufacturing_cost$Sector <- c("Manufacturing")
manufacturing_cost <- manufacturing_cost[,4:6]
non_financial_corporations_cost$Sector <- c("Non Financial Corporations")
non_financial_corporations_cost <- non_financial_corporations_cost[,4:6]
costs <- rbind(business_cost,nonfarm_business_cost,durable_goods_cost,non_durable_goods_cost,manufacturing_cost,non_financial_corporations_cost)
```
```{r,echo=FALSE}
# Create Time series plot of unit labor costs from 2018 to 2020
ggplot(data = costs, aes(x = Label, y = Value,group = as.factor(Sector), color = Sector)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Labor Costs from 2018 to 2020") +
xlab("Time") + ylab("Unit Labor Cost")
```
We check unit labor cost from 2018-2020 and find the costs increase a lot in 2020 first quarter. Since three time series plots all show that there is a sudden change at 2020 first quarter, the pandemic could be the cause.
### Relationship between Producitivity and Work From Home employees
```{r,echo=FALSE}
WFH_business <- read.csv("./data/WFH_business.csv", header=T,stringsAsFactors = F)
WFH_manufacturing <- read.csv("./data/WFH_manufacturing.csv", header=T,stringsAsFactors = F)
WFH_durable_goods <- read.csv("./data/WFH_durable_goods.csv", header=T,stringsAsFactors = F)
WFH_non_durable_goods <- read.csv("./data/WFH_non_durable_goods.csv", header=T,stringsAsFactors = F)
WFH_non_financial <- read.csv("./data/WFH_non_financial.csv", header=T,stringsAsFactors = F)
WFH_nonfarm <- read.csv("./data/WFH_nonfarm_business.csv", header=T,stringsAsFactors = F)
```
```{r,echo=FALSE}
# Calculate work from home percentage by month.
WFH_business$WFM_pct <- WFH_business$WFM/WFH_business$Total
WFH_manufacturing$WFM_pct <- WFH_manufacturing$WFM/WFH_manufacturing$Total
WFH_durable_goods$WFM_pct <- WFH_durable_goods$WFM/WFH_durable_goods$Total
WFH_non_durable_goods$WFM_pct <- WFH_non_durable_goods$WFM/WFH_non_durable_goods$Total
WFH_non_financial$WFM_pct <- WFH_non_financial$WFM/WFH_non_financial$Total
WFH_nonfarm$WFM_pct <- WFH_nonfarm$WFM/WFH_nonfarm$Total
WFH_employee <- rbind(WFH_business,WFH_durable_goods,WFH_manufacturing,WFH_non_durable_goods,WFH_non_financial,WFH_nonfarm)
WFH_employee$Time <- as.Date(paste(WFH_employee$Time,"01"),"%Y %b %d")
WFH_employee$Time <- substr(WFH_employee$Time,6,7)
```
```{r,echo=FALSE}
# Time series plot of WFH employees percentage by month
ggplot(data = WFH_employee, aes(x = Time, y = WFM_pct,group = as.factor(Sector), color = Sector)) +
geom_line() +
ggtitle("WFH employees in 2020") +
xlab("Month") + ylab("Percent of WFH Employees")
```
The graph represents the percentage of Work From Home Employees change due to the pandemic. There is a decreasing trend from May to October and lines slightly back up in November and December.
```{r,echo=FALSE}
# Compute data to quarterly based for WFH employees
WFH_2020 <- WFH_employee %>% group_by(Sector,Label) %>%
summarize(mean_WFM = mean(WFM, na.rm = TRUE),mean_total = mean(Total, na.rm = TRUE), .groups = 'drop')
WFH_2020 <- WFH_2020[-15, ]
WFH_2020$WFM_qt_pct <- WFH_2020$mean_WFM/WFH_2020$mean_total
WFH_2020 <- WFH_2020[,c("Sector","Label","WFM_qt_pct")]
# Select 2020 data from producitivity
qts <- WFH_business$Label
prod_2020 <- productivity %>% filter(Label=="2020 Qtr2" | Label=="2020 Qtr3" | Label=="2020 Qtr4")
prod_2020 <- prod_2020[order(prod_2020$Sector, prod_2020$Label),]
# Column bind two dfs together
WFH_prod_df <- cbind(WFH_2020,prod_2020)
WFH_prod_df <- WFH_prod_df[,c("Sector","Label","WFM_qt_pct","Value")]
```
```{r,echo=FALSE}
# Check scatterplot for the relationship between productivity and WFH employees
ggplot(data = WFH_prod_df, aes(x = WFM_qt_pct, y = Value)) +
geom_point()+
geom_smooth(method=lm, se=FALSE) +
ggtitle("Scatterplot of Productivity and WFH employees") +
xlab("Percent of WFH Employees") + ylab("Productivity")
```
From the previous sections, we find that Work hours decrease and production increase in first and sector quarter of 2020. This means that the efficiency has increased, which might be caused by teleworking. Hence, we make a scatter-plot to illustrate the relationship between Productivity and WFH employees. The scatter-plot indicates that there is a positive correlation between two variables. As the percentage of WFH employees increase, the Productivity tends to increase.
## Part III: Employment and earnings
```{r,echo=FALSE}
# select data from 2011 to 2021
data_processing <- function(path) {
df_raw <- read_excel(path, range = "A13:M24")
df <- data.frame(matrix(ncol = 4, nrow = 0))
colnames(df) <- c('Year', 'Month', 'Label', 'Value')
k=1
for (i in 1:11){
for (j in 2:13){
df[k, 'Value'] = df_raw[i,j]
df[k, 'Year'] = df_raw[i,1]
df[k, 'Month'] = names(df_raw)[j]
df[k, 'Label'] = paste(df_raw[i,1], names(df_raw)[j])
k <- k+1
}
}
df <- df[1:123,]
df$Label <- as.Date(paste(df$Label,"01"),"%Y %b %d")
return(df)
}
```
### Employee numbers
```{r,echo=FALSE}
# Import and transform employees data
EmployeesTotalPrivate <- data_processing("data/EmployeesTotalPrivate.xlsx")
EmployeesGoodsProducing <- data_processing("data/EmployeesGoodsProducing.xlsx")
EmployeesGPMiningLogging <- data_processing("data/EmployeesGPMiningLogging.xlsx")
EmployeesGPConstruction <- data_processing("data/EmployeesGPConstruction.xlsx")
EmployeesGPManufacturing <- data_processing("data/EmployeesGPManufacturing.xlsx")
EmployeesGPMDurableGoods <- data_processing("data/EmployeesGPMDurableGoods.xlsx")
EmployeesGPMNondurableGoods <- data_processing("data/EmployeesGPMNondurableGoods.xlsx")
EmployeesServiceProviding <- data_processing("data/EmployeesServiceProviding.xlsx")
EmployeesSPEducationHealthServices <- data_processing("data/EmployeesSPEducationHealthServices.xlsx")
EmployeesSPFinancialActivities <- data_processing("data/EmployeesSPFinancialActivities.xlsx")
EmployeesSPInformation <- data_processing("data/EmployeesSPInformation.xlsx")
EmployeesSPLeisureHospitality <- data_processing("data/EmployeesSPLeisureHospitality.xlsx")
EmployeesSPOtherServices <- data_processing("data/EmployeesSPOtherServices.xlsx")
EmployeesSPProfessionalBusinessService <- data_processing("data/EmployeesSPProfessionalBusinessService.xlsx")
EmployeesSPTradeTransportationUtilities <- data_processing("data/EmployeesSPTradeTransportationUtilities.xlsx")
EmployeesSPTTURetail <- data_processing("data/EmployeesSPTTURetail.xlsx")
EmployeesSPTTUTransportationWarehousing <- data_processing("data/EmployeesSPTTUTransportationWarehousing.xlsx")
EmployeesSPTTUUtilities <- data_processing("data/EmployeesSPTTUUtilities.xlsx")
EmployeesSPTTUWholesale <- data_processing("data/EmployeesSPTTUWholesale.xlsx")
```
```{r,echo=FALSE}
# assign sectors
EmployeesTotalPrivate$Industry <- c("Total Private")
EmployeesGoodsProducing$Industry <- c("Goods-producing")
EmployeesGPMiningLogging$Industry <- c("Mining and Logging")
EmployeesGPConstruction$Industry <- c("Construction")
EmployeesGPManufacturing$Industry <- c("Manufacturing")
EmployeesGPMDurableGoods$Industry <- c("Durable Goods")
EmployeesGPMNondurableGoods$Industry <- c("Nondurable Goods")
EmployeesServiceProviding$Industry <- c("Private Service Providing")
EmployeesSPEducationHealthServices$Industry <- c("Education and Health Services")
EmployeesSPFinancialActivities$Industry <- c("Financial Activities")
EmployeesSPInformation$Industry <- c("Information")
EmployeesSPLeisureHospitality$Industry <- c("Leisure and Hospitality")
EmployeesSPOtherServices$Industry <- c("Other Services")
EmployeesSPProfessionalBusinessService$Industry <- c("Professional and Business Service")
EmployeesSPTradeTransportationUtilities$Industry <- c("Trade Transportation and Utilities")
EmployeesSPTTURetail$Industry <- c("Retail Trade")
EmployeesSPTTUTransportationWarehousing$Industry <- c("Transportation and Warehousing")
EmployeesSPTTUUtilities$Industry <- c("Utilities")
EmployeesSPTTUWholesale$Industry <- c("Wholesale Trade")
```
```{r,echo=FALSE}
# combine tables
employees_total <- rbind(EmployeesGoodsProducing, EmployeesServiceProviding)
employees_gp <- rbind(EmployeesGPMiningLogging, EmployeesGPConstruction,
EmployeesGPManufacturing)
employees_gp_manu <- rbind(EmployeesGPMDurableGoods, EmployeesGPMNondurableGoods)
employees_sp <- rbind(EmployeesSPEducationHealthServices, EmployeesSPFinancialActivities, EmployeesSPInformation, EmployeesSPLeisureHospitality, EmployeesSPProfessionalBusinessService, EmployeesSPTradeTransportationUtilities, EmployeesSPOtherServices)
employees_sp_ttu <- rbind(EmployeesSPTTURetail, EmployeesSPTTUTransportationWarehousing, EmployeesSPTTUUtilities, EmployeesSPTTUWholesale)
employees_sectors <- rbind(EmployeesGPMiningLogging,
EmployeesGPConstruction,
EmployeesGPManufacturing,
EmployeesSPEducationHealthServices,
EmployeesSPFinancialActivities,
EmployeesSPInformation,
EmployeesSPLeisureHospitality,
EmployeesSPProfessionalBusinessService,
EmployeesSPTradeTransportationUtilities,
EmployeesSPOtherServices)
```
```{r, echo=FALSE}
ggplot(data = employees_sectors, aes(x = Label, y = Value,
group = as.factor(Industry), color = Industry)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Employment from 2011 to 2021 by subsectors") +
xlab("Time") + ylab("Employees (in thousands)")
ggplot(data = employees_sp_ttu, aes(x = Label, y = Value,
group = as.factor(Industry), color = Industry)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Employment from 2011 to 2021 in Trade Transportation and Utilities") +
xlab("Time") + ylab("Employees (in thousands)")
```
From the time series plot for different sub-sectors, we can see that for all the industries, there was an abrupt drop in employees number at the beginning of 2020, around the time of March when the pandemic started spreading across the US. The only industry that did not fluctuate much is Utilities. This shows that this industry provides stable jobs.
### Average weekly working hours and overtime hours
```{r,echo=FALSE}
# Import and transform working hour data
WklyHrTotalPrivate <- data_processing("data/WklyHrTotalPrivate.xlsx")
WklyHrGoodsProducing <- data_processing("data/WklyHrGoodsProducing.xlsx")
WklyHrGPMiningLogging <- data_processing("data/WklyHrGPMiningLogging.xlsx")
WklyHrGPConstruction <- data_processing("data/WklyHrGPConstruction.xlsx")
WklyHrGPManufacturing <- data_processing("data/WklyHrGPManufacturing.xlsx")
WklyHrGPMDurableGoods <- data_processing("data/WklyHrGPMDurableGoods.xlsx")
WklyHrGPMNondurableGoods <- data_processing("data/WklyHrGPMNondurableGoods.xlsx")
WklyHrServiceProviding <- data_processing("data/WklyHrServiceProviding.xlsx")
WklyHrSPEducationHealthServices <- data_processing("data/WklyHrSPEducationHealthServices.xlsx")
WklyHrSPFinancialActivities <- data_processing("data/WklyHrSPFinancialActivities.xlsx")
WklyHrSPInformation <- data_processing("data/WklyHrSPInformation.xlsx")
WklyHrSPLeisureHospitality <- data_processing("data/WklyHrSPLeisureHospitality.xlsx")
WklyHrSPOtherServices <- data_processing("data/WklyHrSPOtherServices.xlsx")
WklyHrSPProfessionalBusinessService <- data_processing("data/WklyHrSPProfessionalBusinessServices.xlsx")
WklyHrSPTradeTransportationUtilities <- data_processing("data/WklyHrSPTradeTransportationUtilities.xlsx")
WklyHrSPTTURetail <- data_processing("data/WklyHrSPTTURetail.xlsx")
WklyHrSPTTUTransportationWarehousing <- data_processing("data/WklyHrSPTTUTransportationWarehousing.xlsx")
WklyHrSPTTUUtilities <- data_processing("data/WklyHrSPTTUUtilities.xlsx")
WklyHrSPTTUWholesale <- data_processing("data/WklyHrSPTTUWholesale.xlsx")
```
```{r,echo=FALSE}
# assign sectors
WklyHrTotalPrivate$Industry <- c("Total Private")
WklyHrGoodsProducing$Industry <- c("Goods-producing")
WklyHrGPMiningLogging$Industry <- c("Mining and Logging")
WklyHrGPConstruction$Industry <- c("Construction")
WklyHrGPManufacturing$Industry <- c("Manufacturing")
WklyHrGPMDurableGoods$Industry <- c("Durable Goods")
WklyHrGPMNondurableGoods$Industry <- c("Nondurable Goods")
WklyHrServiceProviding$Industry <- c("Private Service Providing")
WklyHrSPEducationHealthServices$Industry <- c("Education and Health Services")
WklyHrSPFinancialActivities$Industry <- c("Financial Activities")
WklyHrSPInformation$Industry <- c("Information")
WklyHrSPLeisureHospitality$Industry <- c("Leisure and Hospitality")
WklyHrSPOtherServices$Industry <- c("Other Services")
WklyHrSPProfessionalBusinessService$Industry <- c("Professional and Business Service")
WklyHrSPTradeTransportationUtilities$Industry <- c("Trade Transportation and Utilities")
WklyHrSPTTURetail$Industry <- c("Retail Trade")
WklyHrSPTTUTransportationWarehousing$Industry <- c("Transportation and Warehousing")
WklyHrSPTTUUtilities$Industry <- c("Utilities")
WklyHrSPTTUWholesale$Industry <- c("Wholesale Trade")
```
```{r,echo=FALSE}
# combine tables
WklyHr_total <- rbind(WklyHrGoodsProducing, WklyHrServiceProviding)
WklyHr_gp <- rbind(WklyHrGPMiningLogging, WklyHrGPConstruction,
WklyHrGPManufacturing)
WklyHr_gp_manu <- rbind(WklyHrGPMDurableGoods, WklyHrGPMNondurableGoods)
WklyHr_sp <- rbind(WklyHrSPEducationHealthServices, WklyHrSPFinancialActivities, WklyHrSPInformation, WklyHrSPLeisureHospitality, WklyHrSPOtherServices, WklyHrSPProfessionalBusinessService, WklyHrSPTradeTransportationUtilities)
WklyHr_sp_ttu <- rbind(WklyHrSPTTURetail, WklyHrSPTTUTransportationWarehousing, WklyHrSPTTUUtilities, WklyHrSPTTUWholesale)
WklyHr_sectors <- rbind(WklyHr_gp, WklyHr_sp)
```
```{r, echo=FALSE}
ggplot(data = WklyHrTotalPrivate, aes(x = Label, y = Value,)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Average weekly working hours from 2011 to 2021") +
xlab("Year") + ylab("Average hours worked per week")
ggplot(data = WklyHr_sectors, aes(x = Label, y = Value,
group = as.factor(Industry), color = Industry)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Avg Weekly working hours from 2011 to 2021 in subsectors") +
xlab("Year") + ylab("Average hours worked per week")
ggplot(data = WklyHr_sp_ttu, aes(x = Label, y = Value,
group = as.factor(Industry), color = Industry)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Avg Weekly working hours from 2011 to 2021 in Trade Transportation and Utilities industry") +
xlab("Year") + ylab("Average hours worked per week")
```
Looking at the first graph, we can see that in general there's a decrease in weekly working hours at the beginning of pandemic, then the hours started to increase to a new high point. Then by checking the the subsectors, it's clear that the weekly hours for industries in goods-producing sector dropped while that for industries in private service providing sector increased since the pandemic started. When looking at the hours worked for Utilities, we can see a gradual increase. This may be the result of more people working from home causing higher needs for maintenance.
### Average hourly earnings
```{r,echo=FALSE}
# Import and transform hourly earnings data
HrlyEarningsTotalPrivate <- data_processing("data/HrlyEarningsTotalPrivate.xlsx")
HrlyEarningsGoodsProducing <- data_processing("data/HrlyEarningsGoodsProducing.xlsx")
HrlyEarningsGPMiningLogging <- data_processing("data/HrlyEarningsGPMiningLogging.xlsx")
HrlyEarningsGPConstruction <- data_processing("data/HrlyEarningsGPConstruction.xlsx")
HrlyEarningsGPManufacturing <- data_processing("data/HrlyEarningsGPManufacturing.xlsx")
HrlyEarningsGPMDurableGoods <- data_processing("data/HrlyEarningsGPMDurableGoods.xlsx")
HrlyEarningsGPMNondurableGoods <- data_processing("data/HrlyEarningsGPMNondurableGoods.xlsx")
HrlyEarningsServiceProviding <- data_processing("data/HrlyEarningsServiceProviding.xlsx")
HrlyEarningsSPEducationHealthServices <- data_processing("data/HrlyEarningsSPEducationHealthServices.xlsx")
HrlyEarningsSPFinancialActivities <- data_processing("data/HrlyEarningsSPFinancialActivities.xlsx")
HrlyEarningsSPInformation <- data_processing("data/HrlyEarningsSPInformation.xlsx")
HrlyEarningsSPLeisureHospitality <- data_processing("data/HrlyEarningsSPLeisureHospitality.xlsx")
HrlyEarningsSPOtherServices <- data_processing("data/HrlyEarningsSPOtherServices.xlsx")
HrlyEarningsSPProfessionalBusinessService <- data_processing("data/HrlyEarningsSPProfessionalBusinessServices.xlsx")
HrlyEarningsSPTradeTransportationUtilities <- data_processing("data/HrlyEarningsSPTradeTransportationUtilities.xlsx")
HrlyEarningsSPTTURetail <- data_processing("data/HrlyEarningsSPTTURetail.xlsx")
HrlyEarningsSPTTUTransportationWarehousing <- data_processing("data/HrlyEarningsSPTTUTransportationWarehousing.xlsx")
HrlyEarningsSPTTUUtilities <- data_processing("data/HrlyEarningsSPTTUUtilities.xlsx")
HrlyEarningsSPTTUWholesale <- data_processing("data/HrlyEarningsSPTTUWholesale.xlsx")
```
```{r,echo=FALSE}
# assign sectors
HrlyEarningsTotalPrivate$Industry <- c("Total Private")
HrlyEarningsGoodsProducing$Industry <- c("Goods-producing")
HrlyEarningsGPMiningLogging$Industry <- c("Mining and Logging")
HrlyEarningsGPConstruction$Industry <- c("Construction")
HrlyEarningsGPManufacturing$Industry <- c("Manufacturing")
HrlyEarningsGPMDurableGoods$Industry <- c("Durable Goods")
HrlyEarningsGPMNondurableGoods$Industry <- c("Nondurable Goods")
HrlyEarningsServiceProviding$Industry <- c("Private Service Providing")
HrlyEarningsSPEducationHealthServices$Industry <- c("Education and Health Services")
HrlyEarningsSPFinancialActivities$Industry <- c("Financial Activities")
HrlyEarningsSPInformation$Industry <- c("Information")
HrlyEarningsSPLeisureHospitality$Industry <- c("Leisure and Hospitality")
HrlyEarningsSPOtherServices$Industry <- c("Other Services")
HrlyEarningsSPProfessionalBusinessService$Industry <- c("Professional and Business Service")
HrlyEarningsSPTradeTransportationUtilities$Industry <- c("Trade Transportation and Utilities")
HrlyEarningsSPTTURetail$Industry <- c("Retail Trade")
HrlyEarningsSPTTUTransportationWarehousing$Industry <- c("Transportation and Warehousing")
HrlyEarningsSPTTUUtilities$Industry <- c("Utilities")
HrlyEarningsSPTTUWholesale$Industry <- c("Wholesale Trade")
```
```{r,echo=FALSE}
# combine tables
HrlyEarnings_total <- rbind(HrlyEarningsGoodsProducing,
HrlyEarningsServiceProviding)
HrlyEarnings_gp <- rbind(HrlyEarningsGPMiningLogging,
HrlyEarningsGPConstruction,
HrlyEarningsGPManufacturing)
HrlyEarnings_gp_manu <- rbind(HrlyEarningsGPMDurableGoods,
HrlyEarningsGPMNondurableGoods)
HrlyEarnings_sp <- rbind(HrlyEarningsSPEducationHealthServices,
HrlyEarningsSPFinancialActivities,
HrlyEarningsSPInformation,
HrlyEarningsSPLeisureHospitality,
HrlyEarningsSPOtherServices,
HrlyEarningsSPProfessionalBusinessService,
HrlyEarningsSPTradeTransportationUtilities)
HrlyEarnings_sp_ttu <- rbind(HrlyEarningsSPTTURetail,
HrlyEarningsSPTTUTransportationWarehousing,
HrlyEarningsSPTTUUtilities,
HrlyEarningsSPTTUWholesale)
```
```{r,echo=FALSE}
ggplot(data = HrlyEarnings_gp, aes(x = Label, y = Value,
group = as.factor(Industry), color = Industry)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Hourly Earning from 2011 to 2021 in Goods-producing sector") +
xlab("Time") + ylab("Earnings per hour ($)")
ggplot(data = HrlyEarnings_sp, aes(x = Label, y = Value,
group = as.factor(Industry), color = Industry)) +
geom_line() +
theme(axis.text.x=element_text(angle = 45)) +
ggtitle("Hourly Earning from 2011 to 2021 in Private Service Providing sector") +
xlab("Time") + ylab("Earnings per hour ($)")
```
From these two graphs, it's apparent that the there's an inflation in earnings per hour after the pandemic started, and it's more obvious in the Private Service Providing sector. Recall that the average weekly working hours in goods-producing sector decreased while that in Private Service Providing sector, we can conclude that the employees in the Private Service Providing sector get higher payrolls after Covid-19 started spreading. Recall again from part 1 that the proportion of WFH increased since Covid-19, it's possible that there is a positive relationship between the proportion of WFH and payrolls in the Private Service Providing sector.