Description

This project is the coursera Getting and Cleaning Data course project. The project involves the transformation of raw variable names to tidy descriptive names that is easier to read and understand.

Download and load the datasets

if (!file.exists("Courseradata")) {
    dir.create("Courseradata")
}
fileUrl <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(fileUrl, destfile = "/Users/adrianromano/Downloads/Courseradata/dataset.zip", method = "curl")
if (!file.exists("/Users/adrianromano/Downloads/Courseradata/UCI HAR Dataset")) {
    unzip(zipfile = "/Users/adrianromano/Downloads/Courseradata/dataset.zip", 
          exdir = "/Users/adrianromano/Downloads/Courseradata")
}
dataPath <- "/Users/adrianromano/Downloads/Courseradata/UCI HAR Dataset"

Read the datasets

activityLabels <- read.table(file.path(dataPath, "activity_labels.txt"))
features <- read.table(file.path(dataPath, "features.txt"))
features$V2 <- as.character(features$V2)

subjectTrain <- read.table(file.path(dataPath, "train", "subject_train.txt"))
featuresTrain <- read.table(file.path(dataPath, "train", "X_train.txt"))
activityTrain <- read.table(file.path(dataPath, "train", "y_train.txt"))

subjectTest <- read.table(file.path(dataPath, "test", "subject_test.txt"))
featuresTest <- read.table(file.path(dataPath, "test", "X_test.txt"))
activityTest <- read.table(file.path(dataPath, "test", "y_test.txt"))

Merge the training and the test sets to create one data set

trainData <- cbind(subjectTrain, featuresTrain, activityTrain)
testData <- cbind(subjectTest, featuresTest, activityTest)
mergeData <- rbind(trainData, testData)
colnames(mergeData) <- c("subject", features$V2, "activity")
#names(mergeData)     #check

Subset the measurements that include mean and standard deviation names

selectedColumns <- grepl("subject|mean|std|activity", colnames(mergeData))
mergeData <- mergeData[, selectedColumns]
names(mergeData)    # check
##  [1] "subject"                         "tBodyAcc-mean()-X"              
##  [3] "tBodyAcc-mean()-Y"               "tBodyAcc-mean()-Z"              
##  [5] "tBodyAcc-std()-X"                "tBodyAcc-std()-Y"               
##  [7] "tBodyAcc-std()-Z"                "tGravityAcc-mean()-X"           
##  [9] "tGravityAcc-mean()-Y"            "tGravityAcc-mean()-Z"           
## [11] "tGravityAcc-std()-X"             "tGravityAcc-std()-Y"            
## [13] "tGravityAcc-std()-Z"             "tBodyAccJerk-mean()-X"          
## [15] "tBodyAccJerk-mean()-Y"           "tBodyAccJerk-mean()-Z"          
## [17] "tBodyAccJerk-std()-X"            "tBodyAccJerk-std()-Y"           
## [19] "tBodyAccJerk-std()-Z"            "tBodyGyro-mean()-X"             
## [21] "tBodyGyro-mean()-Y"              "tBodyGyro-mean()-Z"             
## [23] "tBodyGyro-std()-X"               "tBodyGyro-std()-Y"              
## [25] "tBodyGyro-std()-Z"               "tBodyGyroJerk-mean()-X"         
## [27] "tBodyGyroJerk-mean()-Y"          "tBodyGyroJerk-mean()-Z"         
## [29] "tBodyGyroJerk-std()-X"           "tBodyGyroJerk-std()-Y"          
## [31] "tBodyGyroJerk-std()-Z"           "tBodyAccMag-mean()"             
## [33] "tBodyAccMag-std()"               "tGravityAccMag-mean()"          
## [35] "tGravityAccMag-std()"            "tBodyAccJerkMag-mean()"         
## [37] "tBodyAccJerkMag-std()"           "tBodyGyroMag-mean()"            
## [39] "tBodyGyroMag-std()"              "tBodyGyroJerkMag-mean()"        
## [41] "tBodyGyroJerkMag-std()"          "fBodyAcc-mean()-X"              
## [43] "fBodyAcc-mean()-Y"               "fBodyAcc-mean()-Z"              
## [45] "fBodyAcc-std()-X"                "fBodyAcc-std()-Y"               
## [47] "fBodyAcc-std()-Z"                "fBodyAcc-meanFreq()-X"          
## [49] "fBodyAcc-meanFreq()-Y"           "fBodyAcc-meanFreq()-Z"          
## [51] "fBodyAccJerk-mean()-X"           "fBodyAccJerk-mean()-Y"          
## [53] "fBodyAccJerk-mean()-Z"           "fBodyAccJerk-std()-X"           
## [55] "fBodyAccJerk-std()-Y"            "fBodyAccJerk-std()-Z"           
## [57] "fBodyAccJerk-meanFreq()-X"       "fBodyAccJerk-meanFreq()-Y"      
## [59] "fBodyAccJerk-meanFreq()-Z"       "fBodyGyro-mean()-X"             
## [61] "fBodyGyro-mean()-Y"              "fBodyGyro-mean()-Z"             
## [63] "fBodyGyro-std()-X"               "fBodyGyro-std()-Y"              
## [65] "fBodyGyro-std()-Z"               "fBodyGyro-meanFreq()-X"         
## [67] "fBodyGyro-meanFreq()-Y"          "fBodyGyro-meanFreq()-Z"         
## [69] "fBodyAccMag-mean()"              "fBodyAccMag-std()"              
## [71] "fBodyAccMag-meanFreq()"          "fBodyBodyAccJerkMag-mean()"     
## [73] "fBodyBodyAccJerkMag-std()"       "fBodyBodyAccJerkMag-meanFreq()" 
## [75] "fBodyBodyGyroMag-mean()"         "fBodyBodyGyroMag-std()"         
## [77] "fBodyBodyGyroMag-meanFreq()"     "fBodyBodyGyroJerkMag-mean()"    
## [79] "fBodyBodyGyroJerkMag-std()"      "fBodyBodyGyroJerkMag-meanFreq()"
## [81] "activity"

Name the activities with descriptive names

mergeData$activity <- factor(mergeData$activity, levels = activityLabels[,1], labels = activityLabels[,2])
mergeData$subject <- as.factor(mergeData$subject)
head(mergeData$activity)     #check
## [1] STANDING STANDING STANDING STANDING STANDING STANDING
## 6 Levels: WALKING WALKING_UPSTAIRS WALKING_DOWNSTAIRS ... LAYING

Clean the names to appropriate descriptive variable names

colnames(mergeData) <- gsub('[-()]', '', colnames(mergeData))
colnames(mergeData) <- gsub("^t", "time", colnames(mergeData))
colnames(mergeData) <- gsub("^f", "frequency", colnames(mergeData))
colnames(mergeData) <- gsub("Acc", "Accelerometer", colnames(mergeData))
colnames(mergeData) <- gsub("Gyro", "Gyroscope", colnames(mergeData))
colnames(mergeData) <- gsub("Mag", "Magnitude", colnames(mergeData))
colnames(mergeData) <- gsub("mean", "Mean", colnames(mergeData))
colnames(mergeData) <- gsub("std", "Std", colnames(mergeData))
colnames(mergeData) <- gsub("BodyBody", "Body", colnames(mergeData))
names(mergeData)       # check
##  [1] "subject"                                        
##  [2] "timeBodyAccelerometerMeanX"                     
##  [3] "timeBodyAccelerometerMeanY"                     
##  [4] "timeBodyAccelerometerMeanZ"                     
##  [5] "timeBodyAccelerometerStdX"                      
##  [6] "timeBodyAccelerometerStdY"                      
##  [7] "timeBodyAccelerometerStdZ"                      
##  [8] "timeGravityAccelerometerMeanX"                  
##  [9] "timeGravityAccelerometerMeanY"                  
## [10] "timeGravityAccelerometerMeanZ"                  
## [11] "timeGravityAccelerometerStdX"                   
## [12] "timeGravityAccelerometerStdY"                   
## [13] "timeGravityAccelerometerStdZ"                   
## [14] "timeBodyAccelerometerJerkMeanX"                 
## [15] "timeBodyAccelerometerJerkMeanY"                 
## [16] "timeBodyAccelerometerJerkMeanZ"                 
## [17] "timeBodyAccelerometerJerkStdX"                  
## [18] "timeBodyAccelerometerJerkStdY"                  
## [19] "timeBodyAccelerometerJerkStdZ"                  
## [20] "timeBodyGyroscopeMeanX"                         
## [21] "timeBodyGyroscopeMeanY"                         
## [22] "timeBodyGyroscopeMeanZ"                         
## [23] "timeBodyGyroscopeStdX"                          
## [24] "timeBodyGyroscopeStdY"                          
## [25] "timeBodyGyroscopeStdZ"                          
## [26] "timeBodyGyroscopeJerkMeanX"                     
## [27] "timeBodyGyroscopeJerkMeanY"                     
## [28] "timeBodyGyroscopeJerkMeanZ"                     
## [29] "timeBodyGyroscopeJerkStdX"                      
## [30] "timeBodyGyroscopeJerkStdY"                      
## [31] "timeBodyGyroscopeJerkStdZ"                      
## [32] "timeBodyAccelerometerMagnitudeMean"             
## [33] "timeBodyAccelerometerMagnitudeStd"              
## [34] "timeGravityAccelerometerMagnitudeMean"          
## [35] "timeGravityAccelerometerMagnitudeStd"           
## [36] "timeBodyAccelerometerJerkMagnitudeMean"         
## [37] "timeBodyAccelerometerJerkMagnitudeStd"          
## [38] "timeBodyGyroscopeMagnitudeMean"                 
## [39] "timeBodyGyroscopeMagnitudeStd"                  
## [40] "timeBodyGyroscopeJerkMagnitudeMean"             
## [41] "timeBodyGyroscopeJerkMagnitudeStd"              
## [42] "frequencyBodyAccelerometerMeanX"                
## [43] "frequencyBodyAccelerometerMeanY"                
## [44] "frequencyBodyAccelerometerMeanZ"                
## [45] "frequencyBodyAccelerometerStdX"                 
## [46] "frequencyBodyAccelerometerStdY"                 
## [47] "frequencyBodyAccelerometerStdZ"                 
## [48] "frequencyBodyAccelerometerMeanFreqX"            
## [49] "frequencyBodyAccelerometerMeanFreqY"            
## [50] "frequencyBodyAccelerometerMeanFreqZ"            
## [51] "frequencyBodyAccelerometerJerkMeanX"            
## [52] "frequencyBodyAccelerometerJerkMeanY"            
## [53] "frequencyBodyAccelerometerJerkMeanZ"            
## [54] "frequencyBodyAccelerometerJerkStdX"             
## [55] "frequencyBodyAccelerometerJerkStdY"             
## [56] "frequencyBodyAccelerometerJerkStdZ"             
## [57] "frequencyBodyAccelerometerJerkMeanFreqX"        
## [58] "frequencyBodyAccelerometerJerkMeanFreqY"        
## [59] "frequencyBodyAccelerometerJerkMeanFreqZ"        
## [60] "frequencyBodyGyroscopeMeanX"                    
## [61] "frequencyBodyGyroscopeMeanY"                    
## [62] "frequencyBodyGyroscopeMeanZ"                    
## [63] "frequencyBodyGyroscopeStdX"                     
## [64] "frequencyBodyGyroscopeStdY"                     
## [65] "frequencyBodyGyroscopeStdZ"                     
## [66] "frequencyBodyGyroscopeMeanFreqX"                
## [67] "frequencyBodyGyroscopeMeanFreqY"                
## [68] "frequencyBodyGyroscopeMeanFreqZ"                
## [69] "frequencyBodyAccelerometerMagnitudeMean"        
## [70] "frequencyBodyAccelerometerMagnitudeStd"         
## [71] "frequencyBodyAccelerometerMagnitudeMeanFreq"    
## [72] "frequencyBodyAccelerometerJerkMagnitudeMean"    
## [73] "frequencyBodyAccelerometerJerkMagnitudeStd"     
## [74] "frequencyBodyAccelerometerJerkMagnitudeMeanFreq"
## [75] "frequencyBodyGyroscopeMagnitudeMean"            
## [76] "frequencyBodyGyroscopeMagnitudeStd"             
## [77] "frequencyBodyGyroscopeMagnitudeMeanFreq"        
## [78] "frequencyBodyGyroscopeJerkMagnitudeMean"        
## [79] "frequencyBodyGyroscopeJerkMagnitudeStd"         
## [80] "frequencyBodyGyroscopeJerkMagnitudeMeanFreq"    
## [81] "activity"

Create a second independent tidy data set with the average of each variable for each activity and each subject

library(dplyr)
secondData <- mergeData %>%
    group_by(subject, activity) %>%
    summarise_each(funs(mean))
write.table(secondData, "tidydata.txt", row.names = FALSE, quote = FALSE)