-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrun_analysis.R
66 lines (60 loc) · 3.26 KB
/
run_analysis.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
#Step 1:--Merges the training and the test sets to create one data set.
# Reading subject training data
subject_train = read.table("UCI HAR Dataset/train/subject_train.txt", col.names=c("subject_id"))
# Assign row number as the values of ID column
subject_train$ID <- as.numeric(rownames(subject_train))
# Reading training data
X_train = read.table("UCI HAR Dataset/train/X_train.txt")
# Assign row number as the values of ID column
X_train$ID <- as.numeric(rownames(X_train))
# read activity training data
y_train = read.table("UCI HAR Dataset/train/y_train.txt", col.names=c("activity_id")) # max = 6
#y_train = merge(y_train, activity_labels)
# assign row number as the values of ID column
y_train$ID <- as.numeric(rownames(y_train))
# merge subject_train and y_train to train
train <- merge(subject_train, y_train, all=TRUE)
# merge train and X_train
train <- merge(train, X_train, all=TRUE)
# Reading subject training data
subject_test = read.table("UCI HAR Dataset/test/subject_test.txt", col.names=c("subject_id"))
# Assign row number as the values of ID column
subject_test$ID <- as.numeric(rownames(subject_test))
# Read testing data
X_test = read.table("UCI HAR Dataset/test/X_test.txt")
# Assign row number as the values of ID column
X_test$ID <- as.numeric(rownames(X_test))
# Read activity testing data
y_test = read.table("UCI HAR Dataset/test/y_test.txt", col.names=c("activity_id")) # max = 6
#y_test = merge(y_test, activity_labels)
# Assign row number as the values of ID column
y_test$ID <- as.numeric(rownames(y_test))
# Merge subject_test and y_test to train
test <- merge(subject_test, y_test, all=TRUE)
# Merge test and X_test
test <- merge(test, X_test, all=TRUE)
#combine train and test
data1 <- rbind(train, test)
#Step 2:---Extracts only the measurements on the mean and standard deviation for each measurement.
features = read.table("UCI HAR Dataset/features.txt", col.names=c("feature_id", "feature_label"),) #561
#Extracts only the measurements on the mean and standard deviation for each measurement.
selected_features <- features[grepl("mean\\(\\)", features$feature_label) | grepl("std\\(\\)", features$feature_label), ]
data2 <- data1[, c(c(1, 2, 3), selected_features$feature_id + 3) ]
#Step 3:--Uses descriptive activity names to name the activities in the data set.
activity_labels = read.table("UCI HAR Dataset/activity_labels.txt", col.names=c("activity_id", "activity_label"),) #
data3 = merge(data2, activity_labels)
#Step 4:--Appropriately labels the data set with descriptive activity names.
selected_features$feature_label = gsub("\\(\\)", "", selected_features$feature_label)
selected_features$feature_label = gsub("-", ".", selected_features$feature_label)
for (i in 1:length(selected_features$feature_label)) {
colnames(data3)[i + 3] <- selected_features$feature_label[i]
}
data4 = data3
#Step 5:--Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
drops <- c("ID","activity_label")
data5 <- data4[,!(names(data4) %in% drops)]
aggdata <-aggregate(data5, by=list(subject = data5$subject_id, activity = data5$activity_id), FUN=mean, na.rm=TRUE)
drops <- c("subject","activity")
aggdata <- aggdata[,!(names(aggdata) %in% drops)]
aggdata = merge(aggdata, activity_labels)
write.csv(file="tidy.txt", x=aggdata)