forked from rdpeng/RepData_PeerAssessment1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PA1_template.Rmd
97 lines (60 loc) · 2.29 KB
/
PA1_template.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# Reproducible Research: Peer Assessment 1
## Loading and preprocessing the data
```{r echo=TRUE}
library(knitr)
library(plyr)
library(zoo)
library(lattice)
Sys.setlocale(locale = "C")
#setwd("C:/Users/kovlac.3/Documents/RepData_PeerAssessment1")
#getwd()
data <- read.csv(unz("activity.zip", "activity.csv"))
#summary(data)
dataf <- data[!is.na(data$steps),]
```
## What is mean total number of steps taken per day?
Histogram:
```{r echo=TRUE}
steps_count_days <- ddply(dataf, "date", summarise, count.steps=sum(steps))
hist (steps_count_days$count.steps)
```
Means, Medians:
```{r echo=TRUE}
steps_stats_days <- ddply(dataf, "date", summarise, mean.steps=mean(steps),median.steps=median(steps))
steps_stats_days
```
## What is the average daily activity pattern?
Plot:
```{r echo= TRUE}
steps_mean_intervals<- ddply(dataf, "interval", summarise, mean.interval=mean(steps))
plot(steps_mean_intervals$mean.interval ~ steps_mean_intervals$interval , type="l")
```
max:
```{r echo=TRUE}
head(steps_mean_intervals[with(steps_mean_intervals, order(-steps_mean_intervals$mean.interval)),],1)
```
## Imputing missing values
The missing values will be replaced by theaverage steps by the given time-interval, as the deviation during the day is mush more highher than the deviation between days.
```{r echo=TRUE}
data2 <- data
data2$steps<-na.aggregate(data2$steps, data2$interval)
```
Histogram with replaced missing values:
```{r echo=TRUE}
steps_count_days2 <- ddply(data2, "date", summarise, count.steps=sum(steps))
hist(steps_count_days2$count.steps)
```
## Are there differences in activity patterns between weekdays and weekends?
```{r echo=TRUE}
data2$day <- weekdays(as.Date(data2$date,format= "%Y-%m-%d"))
data2$working_day <- "weekday"
data2[data2$day %in% c("Sunday", "Saturday"),5] <- "weekend"
data2we <- data2[data2$working_day=="weekend",]
data2wd <- data2[data2$working_day=="weekday",]
steps_mean_int_we <- ddply(data2we, "interval", summarise, mean.steps=mean(steps))
steps_mean_int_we$working_day ="weekend"
steps_mean_int_wd <- ddply(data2wd, "interval", summarise, mean.steps=mean(steps))
steps_mean_int_wd$working_day= "weekday"
data2_mean <- rbind(steps_mean_int_we,steps_mean_int_wd)
xyplot( mean.steps ~ interval | working_day, data = data2_mean, type ="l" ,layout = c(1, 2))
```