The package I used is as follows; data.table, plyr, dplyr, reshape
setwd("D:\\Data specialist\\Getting and Clearing Data\\Week4")
Feture <- fread("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/features.txt")
Test <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/test/X_test.txt")
Train <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/train/X_train.txt")
List <- list(Train, Test)
Merge <- do.call(rbind,List)
With this code below, I found the column to extract, and the specific names which would be assigned to a column
FeturesWanted <- grep(".*mean.*|.*std.*", Feture$V2)
FeturesWanted.names <- Feture[FeturesWanted]
FeturesWanted.names <- FeturesWanted.names$V2
FeturesWanted.names = gsub('-mean', 'Mean', FeturesWanted.names)
FeturesWanted.names = gsub('-std', 'Std', FeturesWanted.names)
FeturesWanted.names <- gsub('[-()]', '', FeturesWanted.names)
Extract the Data for the instruction, then assign the column name
Data <- Merge[,FeturesWanted]
colnames(Data) <- FeturesWanted.names
setwd("D:\\Data specialist\\Getting and Clearing Data\\Week4")
TestY <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/test/y_test.txt")
TrainY <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/train/y_train.txt")
ActLabel <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/activity_labels.txt")
ListY <- list(TrainY, TestY)
MergeY <- do.call(rbind,ListY)
MergeY$V1 <- as.factor(MergeY$V1)
MergeY$V1 <- mapvalues(MergeY$V1, c("1","2","3","4","5","6"), c("WALKING","WALKING_UPSTAIRS",
"WALKING_DOWNSTAIRS", "SITTING",
"STANDING","LAYING"))
Data$Activity <- MergeY$V1
I brought the Subject data set, then changed the class of data as factor to anlyze more clearly.
setwd("D:\\Data specialist\\Getting and Clearing Data\\Week4")
TestZ <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/test/subject_test.txt")
TrainZ <- read.table("./getdata%2Fprojectfiles%2FUCI HAR Dataset/UCI HAR Dataset/train/subject_train.txt")
ListZ <- list(TestZ, TrainZ)
MergeZ <- do.call(rbind,ListZ)
MergeZ$V1 <- as.factor(MergeZ$V1)
Data$Subject <- MergeZ$V1
This code make the tidy data set to show the mean for each variable per each activity and subjects.
Data.melted <- melt(Data , id = c("Subject", "Activity"))
Data.mean <- dcast(Data.melted, Subject + Activity ~ variable, mean)
write.table(Data.mean, "tidy.txt", row.names = FALSE)