forked from rdpeng/RepData_PeerAssessment1
-
Notifications
You must be signed in to change notification settings - Fork 0
/
PA1_template.Rmd
104 lines (75 loc) · 2.29 KB
/
PA1_template.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# Reproducible Research: Peer Assessment 1
```{r,echo=TRUE}
require(ggplot2)
require(timeDate)
```
## Loading and preprocessing the data
```{r}
data<-read.csv('activity.csv')
data$intfact <- as.factor(data$interval)
data3<-data
data3$date<-as.Date(strptime(data3$date,"%Y-%m-%d"))
data3$weekdays<-isWeekday(data3$date,wday=1:5)
```
## What is mean total number of steps taken per day?
### mean:
```{r,echo=TRUE}
y<-aggregate(data$steps,by=list(data$date),FUN=mean)
y$x
qplot(Group.1,data=y,weight=x,geom="histogram",ylab="mean",xlab="date")
```
### median:
```{r,echo=TRUE}
y<-aggregate(data$steps,by=list(data$date),FUN=median,na.action=na.omit)
y$x
qplot(Group.1,data=y,weight=x,geom="histogram",ylab="median",xlab="date")
```
## What is the average daily activity pattern?
### daily activity:
```{r,echo=TRUE}
data2<-data[!is.na(data$steps),]
y<-aggregate(data2$steps,by=list(data2$intfact),FUN=mean)
y$x
plot(y$x~y$Group.1,type="l",ylab="mean",xlab="intervals")
```
### interval with maximum number of steps:
```{r,echo=TRUE}
y$Group.1[which(y$x == max(y$x))]
```
## Imputing missing values
### number of missing rows:
```{r,echo=TRUE}
length(data$steps[is.na(data$steps)])
```
### fix data:
remove all NAs and put mean number of steps instead
```{r,echo=TRUE}
data$steps[is.na(data$steps)]<- mean(data$steps,na.rm=TRUE)
```
### calculate mean and median again:
### mean:
```{r,echo=TRUE}
y<-aggregate(data$steps,by=list(data$date),FUN=mean)
y$x
qplot(Group.1,data=y,weight=x,geom="histogram",ylab="mean",xlab="date")
```
### median:
```{r,echo=TRUE}
y<-aggregate(data$steps,by=list(data$date),FUN=median,na.action=na.omit)
y$x
qplot(Group.1,data=y,weight=x,geom="histogram",ylab="median",xlab="date")
```
Mean and median values slightly differ from original data set
## Are there differences in activity patterns between weekdays and weekends?
TRUE = weekday, FALSE = weekend
```{r,echo=TRUE}
data3<-data3[!is.na(data3$steps),]
y<-aggregate(data3$steps[data3$weekdays == TRUE],by=list(data3$intfact[data3$weekdays == TRUE]),FUN=mean)
names(y)<-c("interval","steps")
z<-aggregate(data3$steps[data3$weekdays == FALSE],by=list(data3$intfact[data3$weekdays == FALSE]),FUN=mean)
names(z)<-c("interval","steps")
y$weekdays<-TRUE
z$weekdays<-FALSE
w<-merge(y,z,all=TRUE)
qplot(interval,steps,data=w,facets=weekdays~.)
```