#Setting the working directory to "Temp" to test downloading and unzipping if needed
setwd("/Users/tina/desktop/GaCD_Project")
#Installing packages required for the script
# packages <- c("RCurl", "downloader", "plyr", "dplyr", "data.table", "rapport", "tidyr")
packages <- c("RCurl", "downloader", "data.table", "rapport", "tidyr")
sapply(packages, require, character.only=TRUE, quietly=TRUE)
## RCurl downloader data.table rapport tidyr
## TRUE TRUE TRUE TRUE TRUE
#Reading the data in and unzipping it from the internet
#Not needed if explicitly state that the files need to be downloaded to the working directory first
# check to see if the file is there, if not, download it
# if (!file.info("UCI HAR Dataset")$isdir)
dataFile <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
#created directory to put the files into
suppressWarnings(dir.create("UCI HAR Dataset"))
#download the zipped file
download.file(dataFile, "UCI-HAR-Dataset.zip", method ="curl")
unzip("./UCI-HAR-Dataset.zip")
#working out column classes to speed up read.table
initial_test <- read.table("./UCI HAR Dataset/test/X_test.txt", nrows = 100)
test_classes <- sapply(initial_test, class)
initial_subject_id <- read.table("./UCI HAR Dataset/test/X_test.txt", nrows = 100)
test_classes <- sapply(initial_test, class)
#Reading test and train data sets in as tables using colClasses from initial investigation
XtestData <- read.table("./UCI HAR Dataset/test/X_test.txt", quote = "", fill = TRUE, colClasses = test_classes)
XtrainData <- read.table("./UCI HAR Dataset/train/X_train.txt", quote = "", fill = TRUE, colClasses = test_classes)
CombinedTestTrain <- rbind(XtestData, XtrainData)
#CombinedTestTrain is now the required output for part1 of the project :"Merges the training and the test sets to create one data set."
#Subset combined test and train data extracting only those columns that have mean and std measurements in them (keep)
features <- read.table("./UCI HAR Dataset/features.txt")
MeanAndStd <- grep("-mean\\(\\)|-std\\(\\)", features[, 2], perl = TRUE, value = FALSE) #works
MeanAndStdNames <- grep("-mean\\(\\)|-std\\(\\)", features[, 2], perl = TRUE, value = TRUE) #works
# to be used as column names for data set
make.names(MeanAndStdNames)
## [1] "tBodyAcc.mean...X" "tBodyAcc.mean...Y"
## [3] "tBodyAcc.mean...Z" "tBodyAcc.std...X"
## [5] "tBodyAcc.std...Y" "tBodyAcc.std...Z"
## [7] "tGravityAcc.mean...X" "tGravityAcc.mean...Y"
## [9] "tGravityAcc.mean...Z" "tGravityAcc.std...X"
## [11] "tGravityAcc.std...Y" "tGravityAcc.std...Z"
## [13] "tBodyAccJerk.mean...X" "tBodyAccJerk.mean...Y"
## [15] "tBodyAccJerk.mean...Z" "tBodyAccJerk.std...X"
## [17] "tBodyAccJerk.std...Y" "tBodyAccJerk.std...Z"
## [19] "tBodyGyro.mean...X" "tBodyGyro.mean...Y"
## [21] "tBodyGyro.mean...Z" "tBodyGyro.std...X"
## [23] "tBodyGyro.std...Y" "tBodyGyro.std...Z"
## [25] "tBodyGyroJerk.mean...X" "tBodyGyroJerk.mean...Y"
## [27] "tBodyGyroJerk.mean...Z" "tBodyGyroJerk.std...X"
## [29] "tBodyGyroJerk.std...Y" "tBodyGyroJerk.std...Z"
## [31] "tBodyAccMag.mean.." "tBodyAccMag.std.."
## [33] "tGravityAccMag.mean.." "tGravityAccMag.std.."
## [35] "tBodyAccJerkMag.mean.." "tBodyAccJerkMag.std.."
## [37] "tBodyGyroMag.mean.." "tBodyGyroMag.std.."
## [39] "tBodyGyroJerkMag.mean.." "tBodyGyroJerkMag.std.."
## [41] "fBodyAcc.mean...X" "fBodyAcc.mean...Y"
## [43] "fBodyAcc.mean...Z" "fBodyAcc.std...X"
## [45] "fBodyAcc.std...Y" "fBodyAcc.std...Z"
## [47] "fBodyAccJerk.mean...X" "fBodyAccJerk.mean...Y"
## [49] "fBodyAccJerk.mean...Z" "fBodyAccJerk.std...X"
## [51] "fBodyAccJerk.std...Y" "fBodyAccJerk.std...Z"
## [53] "fBodyGyro.mean...X" "fBodyGyro.mean...Y"
## [55] "fBodyGyro.mean...Z" "fBodyGyro.std...X"
## [57] "fBodyGyro.std...Y" "fBodyGyro.std...Z"
## [59] "fBodyAccMag.mean.." "fBodyAccMag.std.."
## [61] "fBodyBodyAccJerkMag.mean.." "fBodyBodyAccJerkMag.std.."
## [63] "fBodyBodyGyroMag.mean.." "fBodyBodyGyroMag.std.."
## [65] "fBodyBodyGyroJerkMag.mean.." "fBodyBodyGyroJerkMag.std.."
#Subsetting the combined data set by columns containing mean and std measurements:
DataNeeded <- CombinedTestTrain[, MeanAndStd]
names(DataNeeded) <- make.names(MeanAndStdNames)
#Now all the test and train data is in one dataset, subsetted by the columns containing
#Mean and Std measurements
#The column names are taken directly from the downloaded features.txt file
# Read the subject ID to the datasets and bind them as one ID dataset to maintain order
XtestID <- read.table("./UCI HAR Dataset/test/subject_test.txt", quote = "", fill = TRUE, col.names = "subject_ID")
XtrainID <- read.table("./UCI HAR Dataset/train/subject_train.txt", quote = "", fill = TRUE, col.names = "subject_ID")
IDs <- rbind(XtestID, XtrainID)
# Read the activities and bind them as one dataset to maintain order
XtestActivity <- read.table("./UCI HAR Dataset/test/y_test.txt", quote = "", fill = TRUE, col.names = "activity")
XtrainActivity <- read.table("./UCI HAR Dataset/train/y_train.txt", quote = "", fill = TRUE, col.names = "activity")
activity <- rbind(XtestActivity, XtrainActivity)
activity <- as.data.frame(activity)
# Read the activity lables in and convert them to human readable lables
Activity_Lables <- read.table("./UCI HAR Dataset/activity_labels.txt", header = FALSE, quote = "", fill = TRUE)
Activity_Lables[, 2] <- tolower(as.character(Activity_Lables[,2]))
Activity_Lables <- Activity_Lables[, 2]
ActivityLables <- tocamel(Activity_Lables, delim = "\\_", upper = TRUE, sep = "")
#Bind the activities, IDs and all the data together
AllData <- cbind(activity, IDs, DataNeeded)
# Map the activity name to the activity number in the dataset
AllData[, 1] <- ActivityLables[AllData[, 1]]
#Change variable names to more descriptive ones
#first change all to CamelCase
VariableNames <- tocamel(names(AllData), delim = "\\.|\\_", upper = TRUE, sep = "")
#Substitue abbreviations with full words, try to collapse all this into one call to gsub
VariableNames1 <- gsub("Acc", "Acceleration", VariableNames)
VariableNames2 <- gsub("Mag", "Magnitude", VariableNames1)
VariableNames3 <- gsub("Gyro", "Gyroscope", VariableNames2)
VariableNames4 <- gsub("BodyBody", "Body", VariableNames3)
VariableNames5 <- gsub("Std", "StandardDeviation", VariableNames4)
VariableNames6 <- gsub("Freq", "Frequency", VariableNames5)
#Name the variables as human readable lables to the dataset
names(AllData) <- VariableNames6
# write.table(names(AllData), "./VariableNames.txt", row.names = FALSE)
# Create a tidy data set containing:
# the average of the observations for each subject and activity
# for each of the variables measured
AveragedAllData <- suppressWarnings(aggregate(AllData, by = list(Activities = AllData$Activity, SubjectId = AllData$SubjectID), mean))
AveragedAllData2 <- suppressWarnings(AveragedAllData[, !(colnames(AveragedAllData) %in% c("SubjectID", "Activity"))])
#Write the tidy data set named average.txt as a table to the working directory.
write.table(AveragedAllData2, "./average.txt", row.names = FALSE, col.names = names(AveragedAllData2))
#start of variable description for codebook.
VariablePrefixDescrip <- c("Activities", "SubjectId", "TBodyAccelerationMeanX", "TBodyAccelerationMeanY", "TBodyAccelerationMeanZ", "TBodyAccelerationStandardDeviationX", "TBodyAccelerationStandardDeviationY", "TBodyAccelerationStandardDeviationZ", "TGravityAccelerationMeanX", "TGravityAccelerationMeanY", "TGravityAccelerationMeanZ", "TGravityAccelerationStandardDeviationX", "TGravityAccelerationStandardDeviationY", "TGravityAccelerationStandardDeviationZ", "TBodyAccelerationJerkMeanX", "TBodyAccelerationJerkMeanY", "TBodyAccelerationJerkMeanZ", "TBodyAccelerationJerkStandardDeviationX", "TBodyAccelerationJerkStandardDeviationY", "TBodyAccelerationJerkStandardDeviationZ", "TBodyGyroscopeMeanX", "TBodyGyroscopeMeanY", "TBodyGyroscopeMeanZ", "TBodyGyroscopeStandardDeviationX", "TBodyGyroscopeStandardDeviationY", "TBodyGyroscopeStandardDeviationZ", "TBodyGyroscopeJerkMeanX", "TBodyGyroscopeJerkMeanY", "TBodyGyroscopeJerkMeanZ", "TBodyGyroscopeJerkStandardDeviationX", "TBodyGyroscopeJerkStandardDeviationY", "TBodyGyroscopeJerkStandardDeviationZ", "TBodyAccelerationMagnitudeMean", "TBodyAccelerationMagnitudeStandardDeviation", "TGravityAccelerationMagnitudeMean", "TGravityAccelerationMagnitudeStandardDeviation", "TBodyAccelerationJerkMagnitudeMean", "TBodyAccelerationJerkMagnitudeStandardDeviation", "TBodyGyroscopeMagnitudeMean", "TBodyGyroscopeMagnitudeStandardDeviation", "TBodyGyroscopeJerkMagnitudeMean", "TBodyGyroscopeJerkMagnitudeStandardDeviation", "FBodyAccelerationMeanX", "FBodyAccelerationMeanY", "FBodyAccelerationMeanZ", "FBodyAccelerationStandardDeviationX", "FBodyAccelerationStandardDeviationY", "FBodyAccelerationStandardDeviationZ", "FBodyAccelerationJerkMeanX", "FBodyAccelerationJerkMeanY", "FBodyAccelerationJerkMeanZ", "FBodyAccelerationJerkStandardDeviationX", "FBodyAccelerationJerkStandardDeviationY", "FBodyAccelerationJerkStandardDeviationZ", "FBodyGyroscopeMeanX", "FBodyGyroscopeMeanY", "FBodyGyroscopeMeanZ", "FBodyGyroscopeStandardDeviationX", "FBodyGyroscopeStandardDeviationY", "FBodyGyroscopeStandardDeviationZ", "FBodyAccelerationMagnitudeMean", "FBodyAccelerationMagnitudeStandardDeviation", "FBodyAccelerationJerkMagnitudeMean", "FBodyAccelerationJerkMagnitudeStandardDeviation", "FBodyGyroscopeMagnitudeMean", "FBodyGyroscopeMagnitudeStandardDeviation", "FBodyGyroscopeJerkMagnitudeMean", "FBodyGyroscopeJerkMagnitudeStandardDeviation")
explanation <- VariablePrefixDescrip
explanation <- gsub("Activities", "The activity that was performed by the subject while the measurements were taken", explanation)
explanation <- gsub("SubjectId", "The unique identity number assigned to subject who took part in the experiment", explanation)
explanation <- gsub("TBodyAccelerationMeanX", "The mean of the acceleration of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyAccelerationMeanY", "The mean of the acceleration of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyAccelerationMeanZ", "The mean of the acceleration of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyAccelerationStandardDeviationX", "The standard deviation of the acceleration of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyAccelerationStandardDeviationY", "The standard deviation of the acceleration of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyAccelerationStandardDeviationZ", "The standard deviation of the acceleration of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TGravityAccelerationMeanX", "The mean of the acceleration in gravity in the X direction in time", explanation)
explanation <- gsub("TGravityAccelerationMeanY", "The mean of the acceleration in gravity in the Y direction in time", explanation)
explanation <- gsub("TGravityAccelerationMeanZ", "The mean of the acceleration in gravity in the Z direction in time", explanation)
explanation <- gsub("TGravityAccelerationStandardDeviationX", "The standard deviation of the acceleration in gravity in the X direction in time", explanation)
explanation <- gsub("TGravityAccelerationStandardDeviationY", "The standard deviation of the acceleration in gravity in the Y direction in time", explanation)
explanation <- gsub("TGravityAccelerationStandardDeviationZ", "The standard deviation of the acceleration in gravity in the Z direction in time", explanation)
explanation <- gsub("TBodyAccelerationJerkMeanX", "The mean of the jerk in acceleration of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyAccelerationJerkMeanY", "The mean of the jerk in acceleration of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyAccelerationJerkMeanZ", "The mean of the jerk in acceleration of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyAccelerationJerkStandardDeviationX", "The standard deviation of the jerk in acceleration of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyAccelerationJerkStandardDeviationY", "The standard deviation of the jerk in acceleration of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyAccelerationJerkStandardDeviationZ", "The standard deviation of the jerk in acceleration of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyGyroscopeMeanX", "The mean of the gyroscope measurement of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyGyroscopeMeanY", "The mean of the gyroscope measurement of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyGyroscopeMeanZ", "The mean of the gyroscope measurement of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyGyroscopeStandardDeviationX", "The standard deviation of the gyroscope measurement of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyGyroscopeStandardDeviationY", "The standard deviation of the gyroscope measurement of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyGyroscopeStandardDeviationZ", "The standard deviation of the gyroscope measurement of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkMeanX", "The mean of the jerk of the gyroscope measurement of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkMeanY", "The mean of the jerk of the gyroscope measurement of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkMeanZ", "The mean of the jerk of the gyroscope measurement of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkStandardDeviationX", "The standard deviation of the jerk of the gyroscope measurement of the subject's body in the X direction in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkStandardDeviationY", "The standard deviation of the jerk of the gyroscope measurement of the subject's body in the Y direction in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkStandardDeviationZ", "The standard deviation of the jerk of the gyroscope measurement of the subject's body in the Z direction in time", explanation)
explanation <- gsub("TBodyAccelerationMagnitudeMean", "The mean of the magnitude of the acceleration of the subject's body in time", explanation)
explanation <- gsub("TBodyAccelerationMagnitudeStandardDeviation", "The standard deviation of the magnitude of the acceleration of the subject's body in time", explanation)
explanation <- gsub("TGravityAccelerationMagnitudeMean", "The mean of the magnitude of the gravity measurement in time", explanation)
explanation <- gsub("TGravityAccelerationMagnitudeStandardDeviation", "The standard deviation of the magnitude of the gravity measurement in time", explanation)
explanation <- gsub("TBodyAccelerationJerkMagnitudeMean", "The mean of the magnitude of the jerk of the acceleration of the subject's body in time", explanation)
explanation <- gsub("TBodyAccelerationJerkMagnitudeStandardDeviation", "The standard deviation of the magnitude of the jerk of the acceleration of the subject's body in time", explanation)
explanation <- gsub("TBodyGyroscopeMagnitudeMean", "The mean of the magnitude of the gyroscope measurement on the subject's body in time", explanation)
explanation <- gsub("TBodyAccelerationMagnitudeStandardDeviation", "The standard deviation of the magnitude of the gyroscope measurement on the subject's body in time", explanation)
explanation <- gsub("TBodyGyroscopeJerkMagnitudeMean", "The mean of the magnitude of the jerk of the gyroscope measurement on the subject's body in time", explanation)
explanation <- gsub("TBodyAccelerationMagnitudeStandardDeviation", "The standard deviation of the magnitude of the jerk of the gyroscope measurement on the subject's body in time", explanation)
explanation <- gsub("FBodyAccelerationMeanX", "The mean of the acceleration of the subject's body in the X direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationMeanY", "The mean of the acceleration of the subject's body in the Y direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationMeanZ", "The mean of the acceleration of the subject's body in the Z direction in time", explanation)
explanation <- gsub("FBodyAccelerationStandardDeviationX", "The standard deviation of the acceleration of the subject's body in the X direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationStandardDeviationY", "The standard deviation of the acceleration of the subject's body in the Y direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationStandardDeviationZ", "The standard deviation of the acceleration of the subject's body in the Z direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkMeanX", "The mean of the jerk in acceleration of the subject's body in the X direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkMeanY", "The mean of the jerk in acceleration of the subject's body in the Y direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkMeanZ", "The mean of the jerk in acceleration of the subject's body in the Z direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkStandardDeviationX", "The standard deviation of the jerk in acceleration of the subject's body in the X direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkStandardDeviationY", "The standard deviation of the jerk in acceleration of the subject's body in the Y direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkStandardDeviationZ", "The standard deviation of the jerk in acceleration of the subject's body in the Z direction in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeMeanX", "The mean of the gyroscope measurement of the subject's body in the X direction in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeMeanY", "The mean of the gyroscope measurement of the subject's body in the Y direction in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeMeanZ", "The mean of the gyroscope measurement of the subject's body in the Z direction in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeStandardDeviationX", "The standard deviation of the gyroscope measurement of the subject's body in the X direction in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeStandardDeviationY", "The standard deviation of the gyroscope measurement of the subject's body in the Y direction in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeStandardDeviationZ", "The standard deviation of the gyroscope measurement of the subject's body in the Z direction in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationMagnitudeMean", "The mean of the magnitude of the acceleration of the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationMagnitudeStandardDeviation", "The standard deviation of the magnitude of the acceleration of the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkMagnitudeMean", "The mean of the magnitude of the jerk of the acceleration of the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationJerkMagnitudeStandardDeviation", "The standard deviation of the magnitude of the jerk of the acceleration of the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeMagnitudeMean", "The mean of the magnitude of the gyroscope measurement on the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyAccelerationMagnitudeStandardDeviation", "The standard deviation of the magnitude of the gyroscope measurement on the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeJerkMagnitudeMean", "The mean of the magnitude of the jerk of the gyroscope measurement on the subject's body in the frequency domain", explanation)
explanation <- gsub("FBodyGyroscopeJerkMagnitudeStandardDeviation", "The standard deviation of the magnitude of the jerk of the gyroscope measurement on the subject's body in the frequency domain", explanation)
VariablePrefixCodebook <- paste("* ", VariablePrefixDescrip,"\n", explanation,"\n")
write.table(VariablePrefixCodebook, "./VariableCodebook.txt", row.names = FALSE)
#end of variable descriptions for codebook
#Reading the tidy data set back into R using check.names = FALSE
# The following code can be used to read the file back into R
# tidy <- read.table("./average.txt", quote = "", header = TRUE, fill = TRUE, check.names = FALSE)
# the following code can be used to determine the dimensions of the data set:
# dim(tidy)
# the values that can be expected are:
# [1] 180 88