This file describes the variables, the data, and any transformations or work that I performed to clean up the data.
first set the working directory and read in data files
setwd("~/Desktop/JHU/Getting_and_Cleanning_data/UCI HAR Dataset")
x_test <- read.table("test/X_test.txt")
y_test <- read.table("test/y_test.txt")
subject_test <- read.table("test/subject_test.txt")
x_train <- read.table("train/X_train.txt")
y_train <- read.table("train/y_train.txt")
subject_train <- read.table("train/subject_train.txt")
features <- read.table("features.txt")
activity_labels <- read.table("activity_labels.txt")
Add the subject number, activity type and the measurement together
testdata <- cbind(subject_test, y_test, x_test)
traindata <- cbind(subject_train, y_train, x_train)
merge test data with train data
data <- rbind(testdata, traindata)
give colnames to the merged dataset
colname <- c("Subject", "Activity", as.character(features[, 2]))
colnames(data) <- colname
extract columns with measurements mean or std, and also extract subject, activity columns
mean <- grep("mean", colname)
sd <- grep("std", colname)
extract <- c(1, 2, mean, sd)
subdata <- data[, extract]
Uses descriptive activity names to name the activities in the data set
dim(subdata)
## [1] 10299 81
for (i in 1:dim(subdata)[1]) {
if (subdata$Activity[i] == 1)
subdata$Activity[i] = "WALKING" else if (subdata$Activity[i] == 2)
subdata$Activity[i] = "WALKING_UPSTAIRS" else if (subdata$Activity[i] == 3)
subdata$Activity[i] = "WALKING_DOWNSTAIRS" else if (subdata$Activity[i] == 4)
subdata$Activity[i] = "SITTING" else if (subdata$Activity[i] == 5)
subdata$Activity[i] = "STANDING" else if (subdata$Activity[i] == 6)
subdata$Activity[i] = "LAYING"
}
the average of each variable for each activity
a <- data.frame(rep(0, 6))
for (i in 3:dim(subdata)[2]) {
a <- cbind(a, tapply(subdata[, i], subdata$Activity, mean))
}
a <- a[, -1]
colnames(a) <- names(subdata)[3:81]
the average of each variable for each subject
s <- data.frame(rep(0, 30))
for (i in 3:dim(subdata)[2]) {
s <- cbind(s, tapply(subdata[, i], subdata$Subject, mean))
}
s <- s[, -1]
colnames(s) <- names(subdata)[3:81]
activity_subject <- rbind(a, s)
write out the final data
write.table(activity_subject, file = "tidy_mean_for_each_activity_and_subject.csv")