The purpose of this exercise is to demonstrate your ability to collect, work with, and clean a data set. The goal is to prepare tidy data that can be used for later analysis.
Creates a second, independent tidy data set with the average of each variable for each activity and each subject.
Check the file, if exist then skip. Otherwise download it from the given URL.
fileName <- "dataset.zip"
if (!file.exists(fileName)){
fileURL <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
download.file(fileURL, fileName, method="curl")
}
folderName <- "dataset"
if (!file.exists(folderName)) {
unzip(fileName)
}
file.rename("UCI HAR DATASET", "dataset")
## [1] FALSE
x_train <- read.csv("dataset/train/X_train.txt", sep="", header=FALSE)
y_train <- read.csv("dataset/train/Y_train.txt", sep="", header=FALSE)
subject_train <- read.csv("dataset/train/subject_train.txt", sep="", header=FALSE)
# Check the file
head(subject_train)
## V1
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
# Create training set
training_Set <- cbind(x_train,y_train,subject_train)
x_test <- read.csv("dataset/test/X_test.txt", sep="", header=FALSE)
y_test <- read.csv("dataset/test/Y_test.txt", sep="", header=FALSE)
# Check the file
subject_test <- read.csv("dataset/test/subject_test.txt", sep="", header=FALSE)
# Create testing set
test_Set <- cbind(x_test,y_test,subject_test)
dataset <- rbind(training_Set,test_Set)
features <- read.csv("dataset/features.txt", sep = "", header = FALSE)
features$V2 <- as.character(features$V2)
head(features)
## V1 V2
## 1 1 tBodyAcc-mean()-X
## 2 2 tBodyAcc-mean()-Y
## 3 3 tBodyAcc-mean()-Z
## 4 4 tBodyAcc-std()-X
## 5 5 tBodyAcc-std()-Y
## 6 6 tBodyAcc-std()-Z
Next, add the new label for “activity” and “subjects” and rename the column names in the data set based on: * Name in the “features” data. * “-mean” to Mean. * “-std” to std
features <- rbind(features, c(562,"activity"))
features <- rbind(features, c(563,"subject"))
names(dataset) <- features$V2
names(dataset) <- gsub('-mean', 'Mean', names(dataset))
names(dataset) <- gsub('-std', 'Std', names(dataset))
Finally, retrieve all the data with mean, std, subject and activity from the data set.
final_dataset <- dataset[,grepl("Mean|Std|activity|subject", names(dataset))]
activity_labels <- read.csv("dataset/activity_labels.txt", sep = "", header = FALSE)
activity_labels$V2 <- as.character(activity_labels$V2)
final_dataset$activity <- activity_labels[final_dataset$activity,2]
final_dataset$activity <- as.factor(final_dataset$activity)
final_dataset$subject <- as.factor(final_dataset$subject)
names(final_dataset) <- gsub('[-()]', '', names(final_dataset))
names(final_dataset) <- make.names(names(final_dataset))
head(final_dataset)
## tBodyAccMeanX tBodyAccMeanY tBodyAccMeanZ tBodyAccStdX tBodyAccStdY
## 1 0.2885845 -0.02029417 -0.1329051 -0.9952786 -0.9831106
## 2 0.2784188 -0.01641057 -0.1235202 -0.9982453 -0.9753002
## 3 0.2796531 -0.01946716 -0.1134617 -0.9953796 -0.9671870
## 4 0.2791739 -0.02620065 -0.1232826 -0.9960915 -0.9834027
## 5 0.2766288 -0.01656965 -0.1153619 -0.9981386 -0.9808173
## 6 0.2771988 -0.01009785 -0.1051373 -0.9973350 -0.9904868
## tBodyAccStdZ tGravityAccMeanX tGravityAccMeanY tGravityAccMeanZ
## 1 -0.9135264 0.9633961 -0.1408397 0.11537494
## 2 -0.9603220 0.9665611 -0.1415513 0.10937881
## 3 -0.9789440 0.9668781 -0.1420098 0.10188392
## 4 -0.9906751 0.9676152 -0.1439765 0.09985014
## 5 -0.9904816 0.9682244 -0.1487502 0.09448590
## 6 -0.9954200 0.9679482 -0.1482100 0.09190972
## tGravityAccStdX tGravityAccStdY tGravityAccStdZ tBodyAccJerkMeanX
## 1 -0.9852497 -0.9817084 -0.8776250 0.07799634
## 2 -0.9974113 -0.9894474 -0.9316387 0.07400671
## 3 -0.9995740 -0.9928658 -0.9929172 0.07363596
## 4 -0.9966456 -0.9813928 -0.9784764 0.07732061
## 5 -0.9984293 -0.9880982 -0.9787449 0.07344436
## 6 -0.9989793 -0.9867539 -0.9973064 0.07793244
## tBodyAccJerkMeanY tBodyAccJerkMeanZ tBodyAccJerkStdX tBodyAccJerkStdY
## 1 0.005000803 -0.067830808 -0.9935191 -0.9883600
## 2 0.005771104 0.029376633 -0.9955481 -0.9810636
## 3 0.003104037 -0.009045631 -0.9907428 -0.9809556
## 4 0.020057642 -0.009864772 -0.9926974 -0.9875527
## 5 0.019121574 0.016779979 -0.9964202 -0.9883587
## 6 0.018684046 0.009344434 -0.9948136 -0.9887145
## tBodyAccJerkStdZ tBodyGyroMeanX tBodyGyroMeanY tBodyGyroMeanZ
## 1 -0.9935750 -0.006100849 -0.03136479 0.10772540
## 2 -0.9918457 -0.016111620 -0.08389378 0.10058429
## 3 -0.9896866 -0.031698294 -0.10233542 0.09612688
## 4 -0.9934976 -0.043409983 -0.09138618 0.08553770
## 5 -0.9924549 -0.033960416 -0.07470803 0.07739203
## 6 -0.9922663 -0.028775508 -0.07039311 0.07901214
## tBodyGyroStdX tBodyGyroStdY tBodyGyroStdZ tBodyGyroJerkMeanX
## 1 -0.9853103 -0.9766234 -0.9922053 -0.09916740
## 2 -0.9831200 -0.9890458 -0.9891212 -0.11050283
## 3 -0.9762921 -0.9935518 -0.9863787 -0.10848567
## 4 -0.9913848 -0.9924073 -0.9875542 -0.09116989
## 5 -0.9851836 -0.9923781 -0.9874019 -0.09077010
## 6 -0.9851808 -0.9921175 -0.9830768 -0.09424758
## tBodyGyroJerkMeanY tBodyGyroJerkMeanZ tBodyGyroJerkStdX
## 1 -0.05551737 -0.06198580 -0.9921107
## 2 -0.04481873 -0.05924282 -0.9898726
## 3 -0.04241031 -0.05582883 -0.9884618
## 4 -0.03633262 -0.06046466 -0.9911194
## 5 -0.03763253 -0.05828932 -0.9913545
## 6 -0.04335526 -0.04193600 -0.9916216
## tBodyGyroJerkStdY tBodyGyroJerkStdZ tBodyAccMagMean tBodyAccMagStd
## 1 -0.9925193 -0.9920553 -0.9594339 -0.9505515
## 2 -0.9972926 -0.9938510 -0.9792892 -0.9760571
## 3 -0.9956321 -0.9915318 -0.9837031 -0.9880196
## 4 -0.9966410 -0.9933289 -0.9865418 -0.9864213
## 5 -0.9964730 -0.9945110 -0.9928271 -0.9912754
## 6 -0.9960147 -0.9930906 -0.9942950 -0.9952490
## tGravityAccMagMean tGravityAccMagStd tBodyAccJerkMagMean
## 1 -0.9594339 -0.9505515 -0.9933059
## 2 -0.9792892 -0.9760571 -0.9912535
## 3 -0.9837031 -0.9880196 -0.9885313
## 4 -0.9865418 -0.9864213 -0.9930780
## 5 -0.9928271 -0.9912754 -0.9934800
## 6 -0.9942950 -0.9952490 -0.9930177
## tBodyAccJerkMagStd tBodyGyroMagMean tBodyGyroMagStd tBodyGyroJerkMagMean
## 1 -0.9943364 -0.9689591 -0.9643352 -0.9942478
## 2 -0.9916944 -0.9806831 -0.9837542 -0.9951232
## 3 -0.9903969 -0.9763171 -0.9860515 -0.9934032
## 4 -0.9933808 -0.9820599 -0.9873511 -0.9955022
## 5 -0.9958537 -0.9852037 -0.9890626 -0.9958076
## 6 -0.9954243 -0.9858944 -0.9864403 -0.9952748
## tBodyGyroJerkMagStd fBodyAccMeanX fBodyAccMeanY fBodyAccMeanZ
## 1 -0.9913676 -0.9947832 -0.9829841 -0.9392687
## 2 -0.9961016 -0.9974507 -0.9768517 -0.9735227
## 3 -0.9950910 -0.9935941 -0.9725115 -0.9833040
## 4 -0.9952666 -0.9954906 -0.9835697 -0.9910798
## 5 -0.9952580 -0.9972859 -0.9823010 -0.9883694
## 6 -0.9952050 -0.9966567 -0.9869395 -0.9927386
## fBodyAccStdX fBodyAccStdY fBodyAccStdZ fBodyAccMeanFreqX
## 1 -0.9954217 -0.9831330 -0.9061650 0.25248290
## 2 -0.9986803 -0.9749298 -0.9554381 0.27130855
## 3 -0.9963128 -0.9655059 -0.9770493 0.12453124
## 4 -0.9963121 -0.9832444 -0.9902291 0.02904438
## 5 -0.9986065 -0.9801295 -0.9919150 0.18108977
## 6 -0.9976438 -0.9922637 -0.9970459 0.15738377
## fBodyAccMeanFreqY fBodyAccMeanFreqZ fBodyAccJerkMeanX fBodyAccJerkMeanY
## 1 0.13183575 -0.05205025 -0.9923325 -0.9871699
## 2 0.04286364 -0.01430976 -0.9950322 -0.9813115
## 3 -0.06461056 0.08267692 -0.9909937 -0.9816423
## 4 0.08030227 0.18569468 -0.9944466 -0.9887272
## 5 0.05798789 0.55978632 -0.9962920 -0.9887900
## 6 0.31883523 0.60559943 -0.9948507 -0.9882443
## fBodyAccJerkMeanZ fBodyAccJerkStdX fBodyAccJerkStdY fBodyAccJerkStdZ
## 1 -0.9896961 -0.9958207 -0.9909363 -0.9970517
## 2 -0.9897398 -0.9966523 -0.9820839 -0.9926268
## 3 -0.9875663 -0.9912488 -0.9814148 -0.9904159
## 4 -0.9913542 -0.9913783 -0.9869269 -0.9943908
## 5 -0.9906244 -0.9969025 -0.9886067 -0.9929065
## 6 -0.9901575 -0.9952180 -0.9901788 -0.9930667
## fBodyAccJerkMeanFreqX fBodyAccJerkMeanFreqY fBodyAccJerkMeanFreqZ
## 1 0.87038451 0.21069700 0.26370789
## 2 0.60851352 -0.05367561 0.06314827
## 3 0.11543400 -0.19343634 0.03825433
## 4 0.03579805 -0.09303585 0.16809523
## 5 0.27335020 0.07913538 0.29238418
## 6 0.32883589 0.05477140 0.32094497
## fBodyGyroMeanX fBodyGyroMeanY fBodyGyroMeanZ fBodyGyroStdX fBodyGyroStdY
## 1 -0.9865744 -0.9817615 -0.9895148 -0.9850326 -0.9738861
## 2 -0.9773867 -0.9925300 -0.9896058 -0.9849043 -0.9871681
## 3 -0.9754332 -0.9937147 -0.9867557 -0.9766422 -0.9933990
## 4 -0.9871096 -0.9936015 -0.9871913 -0.9928104 -0.9916460
## 5 -0.9824465 -0.9929838 -0.9886664 -0.9859818 -0.9919558
## 6 -0.9848902 -0.9927862 -0.9807784 -0.9852871 -0.9916595
## fBodyGyroStdZ fBodyGyroMeanFreqX fBodyGyroMeanFreqY fBodyGyroMeanFreqZ
## 1 -0.9940349 -0.25754888 0.09794711 0.54715105
## 2 -0.9897847 -0.04816744 -0.40160791 -0.06817833
## 3 -0.9873282 -0.21668507 -0.01726417 -0.11072029
## 4 -0.9886776 0.21686246 -0.13524536 -0.04972798
## 5 -0.9879443 -0.15334258 -0.08840273 -0.16223039
## 6 -0.9853661 -0.36303968 -0.13323831 0.19483324
## fBodyAccMagMean fBodyAccMagStd fBodyAccMagMeanFreq
## 1 -0.9521547 -0.9561340 -0.08843612
## 2 -0.9808566 -0.9758658 -0.04414989
## 3 -0.9877948 -0.9890155 0.25789914
## 4 -0.9875187 -0.9867420 0.07358150
## 5 -0.9935909 -0.9900635 0.39431033
## 6 -0.9948360 -0.9952833 0.43796212
## fBodyBodyAccJerkMagMean fBodyBodyAccJerkMagStd
## 1 -0.9937257 -0.9937550
## 2 -0.9903355 -0.9919603
## 3 -0.9892801 -0.9908667
## 4 -0.9927689 -0.9916998
## 5 -0.9955228 -0.9943890
## 6 -0.9947329 -0.9951562
## fBodyBodyAccJerkMagMeanFreq fBodyBodyGyroMagMean fBodyBodyGyroMagStd
## 1 0.3469885 -0.9801349 -0.9613094
## 2 0.5320605 -0.9882956 -0.9833219
## 3 0.6607950 -0.9892548 -0.9860277
## 4 0.6789213 -0.9894128 -0.9878358
## 5 0.5590577 -0.9914330 -0.9890594
## 6 0.2469096 -0.9905000 -0.9858609
## fBodyBodyGyroMagMeanFreq fBodyBodyGyroJerkMagMean
## 1 -0.1289889 -0.9919904
## 2 -0.2719585 -0.9958539
## 3 -0.2127279 -0.9950305
## 4 -0.0356842 -0.9952207
## 5 -0.2735820 -0.9950928
## 6 -0.2973291 -0.9951433
## fBodyBodyGyroJerkMagStd fBodyBodyGyroJerkMagMeanFreq
## 1 -0.9906975 -0.07432303
## 2 -0.9963995 0.15807454
## 3 -0.9951274 0.41450281
## 4 -0.9952369 0.40457253
## 5 -0.9954648 0.08775301
## 6 -0.9952387 0.01995331
## angletBodyAccMean.gravity angletBodyAccJerkMean.gravityMean
## 1 -0.11275434 0.030400372
## 2 0.05347695 -0.007434566
## 3 -0.11855926 0.177899480
## 4 -0.03678797 -0.012892494
## 5 0.12332005 0.122541960
## 6 0.08263215 -0.143439010
## angletBodyGyroMean.gravityMean angletBodyGyroJerkMean.gravityMean
## 1 -0.4647614 -0.01844588
## 2 -0.7326262 0.70351059
## 3 0.1006992 0.80852908
## 4 0.6400110 -0.48536645
## 5 0.6935783 -0.61597061
## 6 0.2750408 -0.36822404
## angleX.gravityMean angleY.gravityMean angleZ.gravityMean activity
## 1 -0.8412468 0.1799406 -0.05862692 STANDING
## 2 -0.8447876 0.1802889 -0.05431672 STANDING
## 3 -0.8489335 0.1806373 -0.04911782 STANDING
## 4 -0.8486494 0.1819348 -0.04766318 STANDING
## 5 -0.8478653 0.1851512 -0.04389225 STANDING
## 6 -0.8496316 0.1848225 -0.04212638 STANDING
## subject
## 1 1
## 2 1
## 3 1
## 4 1
## 5 1
## 6 1
tidy <- aggregate(final_dataset, by=list(activity = final_dataset$activity,
subject=final_dataset$subject), mean)
tidy[,90] = NULL
tidy[,89] = NULL
write.table(tidy, "tidy.txt", sep="\t", row.name=FALSE)