Introduction

The purpose of this exercise is to demonstrate your ability to collect, work with, and clean a data set. The goal is to prepare tidy data that can be used for later analysis.

fileName <- "dataset.zip"
if (!file.exists(fileName)){
  fileURL <- "https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip"
  download.file(fileURL, fileName, method="curl")
}  
folderName <- "dataset"
if (!file.exists(folderName)) { 
  unzip(fileName) 
}
file.rename("UCI HAR DATASET", "dataset")
## [1] FALSE

(1st Requirement): Merges the training and the test sets to create one data set.

x_train <- read.csv("dataset/train/X_train.txt", sep="", header=FALSE)
y_train <- read.csv("dataset/train/Y_train.txt", sep="", header=FALSE)
subject_train <- read.csv("dataset/train/subject_train.txt", sep="", header=FALSE)

# Check the file
head(subject_train)
##   V1
## 1  1
## 2  1
## 3  1
## 4  1
## 5  1
## 6  1
# Create training set
training_Set <- cbind(x_train,y_train,subject_train)

x_test <- read.csv("dataset/test/X_test.txt", sep="", header=FALSE)
y_test <- read.csv("dataset/test/Y_test.txt", sep="", header=FALSE)

# Check the file
subject_test <- read.csv("dataset/test/subject_test.txt", sep="", header=FALSE)

# Create testing set
test_Set <- cbind(x_test,y_test,subject_test)
dataset <- rbind(training_Set,test_Set)

(2nd Requirement): Extracts only the measurements on the mean and standard deviation for each measurement.

features <- read.csv("dataset/features.txt", sep = "", header = FALSE)
features$V2 <- as.character(features$V2)
head(features)
##   V1                V2
## 1  1 tBodyAcc-mean()-X
## 2  2 tBodyAcc-mean()-Y
## 3  3 tBodyAcc-mean()-Z
## 4  4  tBodyAcc-std()-X
## 5  5  tBodyAcc-std()-Y
## 6  6  tBodyAcc-std()-Z

Next, add the new label for “activity” and “subjects” and rename the column names in the data set based on: * Name in the “features” data. * “-mean” to Mean. * “-std” to std

features <- rbind(features, c(562,"activity"))
features <- rbind(features, c(563,"subject"))
names(dataset) <- features$V2
names(dataset) <- gsub('-mean', 'Mean', names(dataset))
names(dataset) <- gsub('-std', 'Std', names(dataset))

Finally, retrieve all the data with mean, std, subject and activity from the data set.

final_dataset <- dataset[,grepl("Mean|Std|activity|subject", names(dataset))]

(3rd Requirement): Uses descriptive activity names to name the activities in the data set

activity_labels <- read.csv("dataset/activity_labels.txt", sep = "", header = FALSE)
activity_labels$V2 <- as.character(activity_labels$V2)

final_dataset$activity <- activity_labels[final_dataset$activity,2]
final_dataset$activity <- as.factor(final_dataset$activity)
final_dataset$subject <- as.factor(final_dataset$subject)

(4th Requirement): Appropriately labels the data set with descriptive variable names.

names(final_dataset) <- gsub('[-()]', '', names(final_dataset))
names(final_dataset) <- make.names(names(final_dataset))

head(final_dataset)
##   tBodyAccMeanX tBodyAccMeanY tBodyAccMeanZ tBodyAccStdX tBodyAccStdY
## 1     0.2885845   -0.02029417    -0.1329051   -0.9952786   -0.9831106
## 2     0.2784188   -0.01641057    -0.1235202   -0.9982453   -0.9753002
## 3     0.2796531   -0.01946716    -0.1134617   -0.9953796   -0.9671870
## 4     0.2791739   -0.02620065    -0.1232826   -0.9960915   -0.9834027
## 5     0.2766288   -0.01656965    -0.1153619   -0.9981386   -0.9808173
## 6     0.2771988   -0.01009785    -0.1051373   -0.9973350   -0.9904868
##   tBodyAccStdZ tGravityAccMeanX tGravityAccMeanY tGravityAccMeanZ
## 1   -0.9135264        0.9633961       -0.1408397       0.11537494
## 2   -0.9603220        0.9665611       -0.1415513       0.10937881
## 3   -0.9789440        0.9668781       -0.1420098       0.10188392
## 4   -0.9906751        0.9676152       -0.1439765       0.09985014
## 5   -0.9904816        0.9682244       -0.1487502       0.09448590
## 6   -0.9954200        0.9679482       -0.1482100       0.09190972
##   tGravityAccStdX tGravityAccStdY tGravityAccStdZ tBodyAccJerkMeanX
## 1      -0.9852497      -0.9817084      -0.8776250        0.07799634
## 2      -0.9974113      -0.9894474      -0.9316387        0.07400671
## 3      -0.9995740      -0.9928658      -0.9929172        0.07363596
## 4      -0.9966456      -0.9813928      -0.9784764        0.07732061
## 5      -0.9984293      -0.9880982      -0.9787449        0.07344436
## 6      -0.9989793      -0.9867539      -0.9973064        0.07793244
##   tBodyAccJerkMeanY tBodyAccJerkMeanZ tBodyAccJerkStdX tBodyAccJerkStdY
## 1       0.005000803      -0.067830808       -0.9935191       -0.9883600
## 2       0.005771104       0.029376633       -0.9955481       -0.9810636
## 3       0.003104037      -0.009045631       -0.9907428       -0.9809556
## 4       0.020057642      -0.009864772       -0.9926974       -0.9875527
## 5       0.019121574       0.016779979       -0.9964202       -0.9883587
## 6       0.018684046       0.009344434       -0.9948136       -0.9887145
##   tBodyAccJerkStdZ tBodyGyroMeanX tBodyGyroMeanY tBodyGyroMeanZ
## 1       -0.9935750   -0.006100849    -0.03136479     0.10772540
## 2       -0.9918457   -0.016111620    -0.08389378     0.10058429
## 3       -0.9896866   -0.031698294    -0.10233542     0.09612688
## 4       -0.9934976   -0.043409983    -0.09138618     0.08553770
## 5       -0.9924549   -0.033960416    -0.07470803     0.07739203
## 6       -0.9922663   -0.028775508    -0.07039311     0.07901214
##   tBodyGyroStdX tBodyGyroStdY tBodyGyroStdZ tBodyGyroJerkMeanX
## 1    -0.9853103    -0.9766234    -0.9922053        -0.09916740
## 2    -0.9831200    -0.9890458    -0.9891212        -0.11050283
## 3    -0.9762921    -0.9935518    -0.9863787        -0.10848567
## 4    -0.9913848    -0.9924073    -0.9875542        -0.09116989
## 5    -0.9851836    -0.9923781    -0.9874019        -0.09077010
## 6    -0.9851808    -0.9921175    -0.9830768        -0.09424758
##   tBodyGyroJerkMeanY tBodyGyroJerkMeanZ tBodyGyroJerkStdX
## 1        -0.05551737        -0.06198580        -0.9921107
## 2        -0.04481873        -0.05924282        -0.9898726
## 3        -0.04241031        -0.05582883        -0.9884618
## 4        -0.03633262        -0.06046466        -0.9911194
## 5        -0.03763253        -0.05828932        -0.9913545
## 6        -0.04335526        -0.04193600        -0.9916216
##   tBodyGyroJerkStdY tBodyGyroJerkStdZ tBodyAccMagMean tBodyAccMagStd
## 1        -0.9925193        -0.9920553      -0.9594339     -0.9505515
## 2        -0.9972926        -0.9938510      -0.9792892     -0.9760571
## 3        -0.9956321        -0.9915318      -0.9837031     -0.9880196
## 4        -0.9966410        -0.9933289      -0.9865418     -0.9864213
## 5        -0.9964730        -0.9945110      -0.9928271     -0.9912754
## 6        -0.9960147        -0.9930906      -0.9942950     -0.9952490
##   tGravityAccMagMean tGravityAccMagStd tBodyAccJerkMagMean
## 1         -0.9594339        -0.9505515          -0.9933059
## 2         -0.9792892        -0.9760571          -0.9912535
## 3         -0.9837031        -0.9880196          -0.9885313
## 4         -0.9865418        -0.9864213          -0.9930780
## 5         -0.9928271        -0.9912754          -0.9934800
## 6         -0.9942950        -0.9952490          -0.9930177
##   tBodyAccJerkMagStd tBodyGyroMagMean tBodyGyroMagStd tBodyGyroJerkMagMean
## 1         -0.9943364       -0.9689591      -0.9643352           -0.9942478
## 2         -0.9916944       -0.9806831      -0.9837542           -0.9951232
## 3         -0.9903969       -0.9763171      -0.9860515           -0.9934032
## 4         -0.9933808       -0.9820599      -0.9873511           -0.9955022
## 5         -0.9958537       -0.9852037      -0.9890626           -0.9958076
## 6         -0.9954243       -0.9858944      -0.9864403           -0.9952748
##   tBodyGyroJerkMagStd fBodyAccMeanX fBodyAccMeanY fBodyAccMeanZ
## 1          -0.9913676    -0.9947832    -0.9829841    -0.9392687
## 2          -0.9961016    -0.9974507    -0.9768517    -0.9735227
## 3          -0.9950910    -0.9935941    -0.9725115    -0.9833040
## 4          -0.9952666    -0.9954906    -0.9835697    -0.9910798
## 5          -0.9952580    -0.9972859    -0.9823010    -0.9883694
## 6          -0.9952050    -0.9966567    -0.9869395    -0.9927386
##   fBodyAccStdX fBodyAccStdY fBodyAccStdZ fBodyAccMeanFreqX
## 1   -0.9954217   -0.9831330   -0.9061650        0.25248290
## 2   -0.9986803   -0.9749298   -0.9554381        0.27130855
## 3   -0.9963128   -0.9655059   -0.9770493        0.12453124
## 4   -0.9963121   -0.9832444   -0.9902291        0.02904438
## 5   -0.9986065   -0.9801295   -0.9919150        0.18108977
## 6   -0.9976438   -0.9922637   -0.9970459        0.15738377
##   fBodyAccMeanFreqY fBodyAccMeanFreqZ fBodyAccJerkMeanX fBodyAccJerkMeanY
## 1        0.13183575       -0.05205025        -0.9923325        -0.9871699
## 2        0.04286364       -0.01430976        -0.9950322        -0.9813115
## 3       -0.06461056        0.08267692        -0.9909937        -0.9816423
## 4        0.08030227        0.18569468        -0.9944466        -0.9887272
## 5        0.05798789        0.55978632        -0.9962920        -0.9887900
## 6        0.31883523        0.60559943        -0.9948507        -0.9882443
##   fBodyAccJerkMeanZ fBodyAccJerkStdX fBodyAccJerkStdY fBodyAccJerkStdZ
## 1        -0.9896961       -0.9958207       -0.9909363       -0.9970517
## 2        -0.9897398       -0.9966523       -0.9820839       -0.9926268
## 3        -0.9875663       -0.9912488       -0.9814148       -0.9904159
## 4        -0.9913542       -0.9913783       -0.9869269       -0.9943908
## 5        -0.9906244       -0.9969025       -0.9886067       -0.9929065
## 6        -0.9901575       -0.9952180       -0.9901788       -0.9930667
##   fBodyAccJerkMeanFreqX fBodyAccJerkMeanFreqY fBodyAccJerkMeanFreqZ
## 1            0.87038451            0.21069700            0.26370789
## 2            0.60851352           -0.05367561            0.06314827
## 3            0.11543400           -0.19343634            0.03825433
## 4            0.03579805           -0.09303585            0.16809523
## 5            0.27335020            0.07913538            0.29238418
## 6            0.32883589            0.05477140            0.32094497
##   fBodyGyroMeanX fBodyGyroMeanY fBodyGyroMeanZ fBodyGyroStdX fBodyGyroStdY
## 1     -0.9865744     -0.9817615     -0.9895148    -0.9850326    -0.9738861
## 2     -0.9773867     -0.9925300     -0.9896058    -0.9849043    -0.9871681
## 3     -0.9754332     -0.9937147     -0.9867557    -0.9766422    -0.9933990
## 4     -0.9871096     -0.9936015     -0.9871913    -0.9928104    -0.9916460
## 5     -0.9824465     -0.9929838     -0.9886664    -0.9859818    -0.9919558
## 6     -0.9848902     -0.9927862     -0.9807784    -0.9852871    -0.9916595
##   fBodyGyroStdZ fBodyGyroMeanFreqX fBodyGyroMeanFreqY fBodyGyroMeanFreqZ
## 1    -0.9940349        -0.25754888         0.09794711         0.54715105
## 2    -0.9897847        -0.04816744        -0.40160791        -0.06817833
## 3    -0.9873282        -0.21668507        -0.01726417        -0.11072029
## 4    -0.9886776         0.21686246        -0.13524536        -0.04972798
## 5    -0.9879443        -0.15334258        -0.08840273        -0.16223039
## 6    -0.9853661        -0.36303968        -0.13323831         0.19483324
##   fBodyAccMagMean fBodyAccMagStd fBodyAccMagMeanFreq
## 1      -0.9521547     -0.9561340         -0.08843612
## 2      -0.9808566     -0.9758658         -0.04414989
## 3      -0.9877948     -0.9890155          0.25789914
## 4      -0.9875187     -0.9867420          0.07358150
## 5      -0.9935909     -0.9900635          0.39431033
## 6      -0.9948360     -0.9952833          0.43796212
##   fBodyBodyAccJerkMagMean fBodyBodyAccJerkMagStd
## 1              -0.9937257             -0.9937550
## 2              -0.9903355             -0.9919603
## 3              -0.9892801             -0.9908667
## 4              -0.9927689             -0.9916998
## 5              -0.9955228             -0.9943890
## 6              -0.9947329             -0.9951562
##   fBodyBodyAccJerkMagMeanFreq fBodyBodyGyroMagMean fBodyBodyGyroMagStd
## 1                   0.3469885           -0.9801349          -0.9613094
## 2                   0.5320605           -0.9882956          -0.9833219
## 3                   0.6607950           -0.9892548          -0.9860277
## 4                   0.6789213           -0.9894128          -0.9878358
## 5                   0.5590577           -0.9914330          -0.9890594
## 6                   0.2469096           -0.9905000          -0.9858609
##   fBodyBodyGyroMagMeanFreq fBodyBodyGyroJerkMagMean
## 1               -0.1289889               -0.9919904
## 2               -0.2719585               -0.9958539
## 3               -0.2127279               -0.9950305
## 4               -0.0356842               -0.9952207
## 5               -0.2735820               -0.9950928
## 6               -0.2973291               -0.9951433
##   fBodyBodyGyroJerkMagStd fBodyBodyGyroJerkMagMeanFreq
## 1              -0.9906975                  -0.07432303
## 2              -0.9963995                   0.15807454
## 3              -0.9951274                   0.41450281
## 4              -0.9952369                   0.40457253
## 5              -0.9954648                   0.08775301
## 6              -0.9952387                   0.01995331
##   angletBodyAccMean.gravity angletBodyAccJerkMean.gravityMean
## 1               -0.11275434                       0.030400372
## 2                0.05347695                      -0.007434566
## 3               -0.11855926                       0.177899480
## 4               -0.03678797                      -0.012892494
## 5                0.12332005                       0.122541960
## 6                0.08263215                      -0.143439010
##   angletBodyGyroMean.gravityMean angletBodyGyroJerkMean.gravityMean
## 1                     -0.4647614                        -0.01844588
## 2                     -0.7326262                         0.70351059
## 3                      0.1006992                         0.80852908
## 4                      0.6400110                        -0.48536645
## 5                      0.6935783                        -0.61597061
## 6                      0.2750408                        -0.36822404
##   angleX.gravityMean angleY.gravityMean angleZ.gravityMean activity
## 1         -0.8412468          0.1799406        -0.05862692 STANDING
## 2         -0.8447876          0.1802889        -0.05431672 STANDING
## 3         -0.8489335          0.1806373        -0.04911782 STANDING
## 4         -0.8486494          0.1819348        -0.04766318 STANDING
## 5         -0.8478653          0.1851512        -0.04389225 STANDING
## 6         -0.8496316          0.1848225        -0.04212638 STANDING
##   subject
## 1       1
## 2       1
## 3       1
## 4       1
## 5       1
## 6       1

(5th Requirement): From the data set in step 4, creates a second, independent tidy data set with the average of each variable for each activity and each subject.

tidy <- aggregate(final_dataset, by=list(activity = final_dataset$activity, 
                                         subject=final_dataset$subject), mean)
tidy[,90] = NULL
tidy[,89] = NULL

Export the data set

write.table(tidy, "tidy.txt", sep="\t", row.name=FALSE)