warningsSorted.R.R

#Setting the working directory to "Temp" to test downloading and unzipping if needed
setwd("/Users/tina/desktop/Temp")

#Installing packages required for the script
# packages <- c("RCurl", "downloader", "plyr", "dplyr", "data.table", "rapport", "tidyr")
packages <- c("RCurl", "downloader", "data.table", "rapport", "tidyr")
sapply(packages, require, character.only=TRUE, quietly=TRUE)

##      RCurl downloader data.table    rapport      tidyr 
##       TRUE       TRUE       TRUE       TRUE       TRUE

#Reading the data in and unzipping it from the internet
#Not needed if explicitly state that the files need to be downloaded to the working directory first

# check to see if the file is there, if not, download it
# if (!file.info("UCI HAR Dataset")$isdir) 
dataFile <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
#created directory to put the files into
suppressWarnings(dir.create("UCI HAR Dataset"))
#download the zipped file
download.file(dataFile, "UCI-HAR-Dataset.zip", method ="curl")
unzip("./UCI-HAR-Dataset.zip")

#working out column classes to speed up read.table
initial_test <- read.table("./UCI HAR Dataset/test/X_test.txt", nrows = 100)
test_classes <- sapply(initial_test, class)
initial_subject_id <- read.table("./UCI HAR Dataset/test/X_test.txt", nrows = 100)
test_classes <- sapply(initial_test, class)

#Reading test and train data sets in as tables using colClasses from initial investigation
XtestData <- read.table("./UCI HAR Dataset/test/X_test.txt", quote = "", fill = TRUE, colClasses = test_classes)
XtrainData <- read.table("./UCI HAR Dataset/train/X_train.txt", quote = "", fill = TRUE, colClasses = test_classes)
CombinedTestTrain <- rbind(XtestData, XtrainData)
#CombinedTestTrain is now the required output for part1 of the project :"Merges the training and the test sets to create one data set."

#Subset combined test and train data extracting only those columns that have mean and std measurements in them (keep)
features <- read.table("./UCI HAR Dataset/features.txt")
MeanAndStd <- grep("(.*)[Mm]ean(.*)|(.*)[Ss]td(.*)", features[, 2], perl = TRUE, value = FALSE) #works 
MeanAndStdNames <- grep("(.*)[Mm]ean(.*)|(.*)[Ss]td(.*)", features[, 2], perl = TRUE, value = TRUE) #works

# to be used as column names for data set
make.names(MeanAndStdNames)

##  [1] "tBodyAcc.mean...X"                   
##  [2] "tBodyAcc.mean...Y"                   
##  [3] "tBodyAcc.mean...Z"                   
##  [4] "tBodyAcc.std...X"                    
##  [5] "tBodyAcc.std...Y"                    
##  [6] "tBodyAcc.std...Z"                    
##  [7] "tGravityAcc.mean...X"                
##  [8] "tGravityAcc.mean...Y"                
##  [9] "tGravityAcc.mean...Z"                
## [10] "tGravityAcc.std...X"                 
## [11] "tGravityAcc.std...Y"                 
## [12] "tGravityAcc.std...Z"                 
## [13] "tBodyAccJerk.mean...X"               
## [14] "tBodyAccJerk.mean...Y"               
## [15] "tBodyAccJerk.mean...Z"               
## [16] "tBodyAccJerk.std...X"                
## [17] "tBodyAccJerk.std...Y"                
## [18] "tBodyAccJerk.std...Z"                
## [19] "tBodyGyro.mean...X"                  
## [20] "tBodyGyro.mean...Y"                  
## [21] "tBodyGyro.mean...Z"                  
## [22] "tBodyGyro.std...X"                   
## [23] "tBodyGyro.std...Y"                   
## [24] "tBodyGyro.std...Z"                   
## [25] "tBodyGyroJerk.mean...X"              
## [26] "tBodyGyroJerk.mean...Y"              
## [27] "tBodyGyroJerk.mean...Z"              
## [28] "tBodyGyroJerk.std...X"               
## [29] "tBodyGyroJerk.std...Y"               
## [30] "tBodyGyroJerk.std...Z"               
## [31] "tBodyAccMag.mean.."                  
## [32] "tBodyAccMag.std.."                   
## [33] "tGravityAccMag.mean.."               
## [34] "tGravityAccMag.std.."                
## [35] "tBodyAccJerkMag.mean.."              
## [36] "tBodyAccJerkMag.std.."               
## [37] "tBodyGyroMag.mean.."                 
## [38] "tBodyGyroMag.std.."                  
## [39] "tBodyGyroJerkMag.mean.."             
## [40] "tBodyGyroJerkMag.std.."              
## [41] "fBodyAcc.mean...X"                   
## [42] "fBodyAcc.mean...Y"                   
## [43] "fBodyAcc.mean...Z"                   
## [44] "fBodyAcc.std...X"                    
## [45] "fBodyAcc.std...Y"                    
## [46] "fBodyAcc.std...Z"                    
## [47] "fBodyAcc.meanFreq...X"               
## [48] "fBodyAcc.meanFreq...Y"               
## [49] "fBodyAcc.meanFreq...Z"               
## [50] "fBodyAccJerk.mean...X"               
## [51] "fBodyAccJerk.mean...Y"               
## [52] "fBodyAccJerk.mean...Z"               
## [53] "fBodyAccJerk.std...X"                
## [54] "fBodyAccJerk.std...Y"                
## [55] "fBodyAccJerk.std...Z"                
## [56] "fBodyAccJerk.meanFreq...X"           
## [57] "fBodyAccJerk.meanFreq...Y"           
## [58] "fBodyAccJerk.meanFreq...Z"           
## [59] "fBodyGyro.mean...X"                  
## [60] "fBodyGyro.mean...Y"                  
## [61] "fBodyGyro.mean...Z"                  
## [62] "fBodyGyro.std...X"                   
## [63] "fBodyGyro.std...Y"                   
## [64] "fBodyGyro.std...Z"                   
## [65] "fBodyGyro.meanFreq...X"              
## [66] "fBodyGyro.meanFreq...Y"              
## [67] "fBodyGyro.meanFreq...Z"              
## [68] "fBodyAccMag.mean.."                  
## [69] "fBodyAccMag.std.."                   
## [70] "fBodyAccMag.meanFreq.."              
## [71] "fBodyBodyAccJerkMag.mean.."          
## [72] "fBodyBodyAccJerkMag.std.."           
## [73] "fBodyBodyAccJerkMag.meanFreq.."      
## [74] "fBodyBodyGyroMag.mean.."             
## [75] "fBodyBodyGyroMag.std.."              
## [76] "fBodyBodyGyroMag.meanFreq.."         
## [77] "fBodyBodyGyroJerkMag.mean.."         
## [78] "fBodyBodyGyroJerkMag.std.."          
## [79] "fBodyBodyGyroJerkMag.meanFreq.."     
## [80] "angle.tBodyAccMean.gravity."         
## [81] "angle.tBodyAccJerkMean..gravityMean."
## [82] "angle.tBodyGyroMean.gravityMean."    
## [83] "angle.tBodyGyroJerkMean.gravityMean."
## [84] "angle.X.gravityMean."                
## [85] "angle.Y.gravityMean."                
## [86] "angle.Z.gravityMean."

#Subsetting the combined data set by columns containing mean and std measurements:
DataNeeded <- CombinedTestTrain[, MeanAndStd]
names(DataNeeded) <- make.names(MeanAndStdNames)
#Now all the test and train data is in one dataset, subsetted by the columns containing
#Mean and Std measurements
#The column names are taken directly from the downloaded features.txt file

# Read the subject ID to the datasets and bind them as one ID dataset to maintain order
XtestID <- read.table("./UCI HAR Dataset/test/subject_test.txt", quote = "", fill = TRUE, col.names = "subject_ID")
XtrainID <- read.table("./UCI HAR Dataset/train/subject_train.txt", quote = "", fill = TRUE, col.names = "subject_ID")
IDs <- rbind(XtestID, XtrainID)

# Read the activities and bind them as one dataset to maintain order
XtestActivity <- read.table("./UCI HAR Dataset/test/y_test.txt", quote = "", fill = TRUE, col.names = "activity")
XtrainActivity <- read.table("./UCI HAR Dataset/train/y_train.txt", quote = "", fill = TRUE, col.names = "activity")
activity <- rbind(XtestActivity, XtrainActivity)
activity <- as.data.frame(activity)

# Read the activity lables in and convert them to human readable lables 
Activity_Lables <- read.table("./UCI HAR Dataset/activity_labels.txt", header = FALSE, quote = "", fill = TRUE)
Activity_Lables[, 2] <- tolower(as.character(Activity_Lables[,2]))
Activity_Lables <- Activity_Lables[, 2]
ActivityLables <- tocamel(Activity_Lables, delim = "\\_", upper = TRUE, sep = "")

#Bind the activities, IDs and all the data together
AllData <- cbind(activity, IDs, DataNeeded)

# Map the activity name to the activity number in the dataset
AllData[, 1] <- ActivityLables[AllData[, 1]]

#Change variable names to more descriptive ones
#first change all to CamelCase
VariableNames <- tocamel(names(AllData), delim = "\\.|\\_", upper = TRUE, sep = "")

#Substitue abbreviations with full words, try to collapse all this into one call to gsub
VariableNames1 <- gsub("Acc", "Acceleration", VariableNames)
VariableNames2 <- gsub("Mag", "Magnitude", VariableNames1)
VariableNames3 <- gsub("Gyro", "Gyroscope", VariableNames2)
VariableNames4 <- gsub("BodyBody", "Body", VariableNames3)
VariableNames5 <- gsub("Std", "StandardDeviation", VariableNames4)
VariableNames6 <- gsub("Freq", "Frequency", VariableNames5)

#Name the variables as human readable lables to the dataset
names(AllData) <- VariableNames6

# Create a tidy data set containing:
# the average of the observations for each subject and activity 
# for each of the variables measured
AveragedAllData <- suppressWarnings(aggregate(AllData, by = list(Activities = AllData$Activity, Subject = AllData$SubjectID), mean))
AveragedAllData2 <- suppressWarnings(AveragedAllData[, !(colnames(AveragedAllData) %in% c("SubjectID", "Activity"))])

#Write the tidy data set named average.txt as a table to the working directory.
write.table(AveragedAllData2, "./average.txt", row.names = FALSE, col.names = names(AveragedAllData2))

#Reading the tidy data set back into R using check.names = FALSE
# check.names = FALSE is to remove the "X." R adds to the column names.
# tidy <- read.table("./average.txt", quote = "", header = TRUE, fill = TRUE, check.names = FALSE)

warningsSorted.R.R

tina

Sun Aug 16 12:41:02 2015