Getting-and-Cleanning-data
Repo for the Project from the Getting and Cleanning Data course Overview
This project intends to demonstrate the importance and the ability to collect, work with and clean a data set. The goal is to prepare a tidy data that can be use for later analysis. The data was collected from the web site that can be found in the following address https://d396qusza40orc.cloudfront.net/getdata%2Fprojectfiles%2FUCI%20HAR%20Dataset.zip Project Summary Develop a R script called run_analysis.R that does the following.
Merges the training and the test sets to create one data set.
Extracts only the measurements on the mean and standard deviation for each measurement.
Uses descriptive activity names to name the activities in the data set
Appropriately labels the data set with descriptive variable names.
finally creates a second independent tidy data set with the average of each variable for each activity and each subject.
Also in the repo should present the following documentation
The R script run_analysis containing all the information
A link to a Github repository with your script for performing the analysis
A code book that describes the variables, the data, and any transformations or work that was performed to clean up the data called CodeBook.md.
A README.md in the repo with your scripts. This repo explains how all of the scripts work and how they are connected.
library(dplyr)
setwd("C:/Users/Sergio Simioni/Desktop/Data_Science/getting_data/UCI HAR Dataset")
y_train <- read.table("train/y_train.txt", quote="\"")
y_test <- read.table("test/y_test.txt", quote="\"")
features <- read.table("features.txt", quote="\"")
activity_labels <- read.table("activity_labels.txt", quote="\"")
subject_train <- read.table("train/subject_train.txt", quote="\"")
subject_test <- read.table("test/subject_test.txt", quote="\"")
X_train <- read.table("train/X_train.txt", quote="\"")
X_test <- read.table("test/X_test.txt", quote="\"")
#Analysis of the 70% of the Volunteer select for generating the training data
colnames(activity_labels)<- c("V1","Activity")
#merging the y_train with the activity label
subject<- rename(subject_train, subject=V1)
train<- cbind(y_train,subject)
train1<- merge(train,activity_labels, by=("V1"))
#giving names from features to the X_train data frame
colnames(X_train)<- features[,2]
#Combining y_train, activity labels, X_train
train2<- cbind(train1,X_train)
#eliminating the first column from train2 to avoid error "duplicate column name"
train3<- train2[,-1]
#selecting only the columns that contains means and std
train4<- select(train3,contains("subject"), contains("Activity"), contains("mean"), contains("std"))
#Analysis of the 30% of the Volunteer select for generating the test data
colnames(activity_labels)<- c("V1","Activity")
#merging the y_test with the activity label
subjecta<- rename(subject_test, subject=V1)
test<- cbind(y_test,subjecta)
test1<- merge(test,activity_labels, by=("V1"))
#giving names from features to the X_test data frame
colnames(X_test)<- features[,2]
#Combining y_test, activity labels, X_test
test2<- cbind(test1,X_test)
#eliminating the first column from train2 to avoid error "duplicate column name"
test3<- test2[,-1]
#selecting only the columns that contains means and std
test4<- select(test3,contains("subject"), contains("Activity"), contains("mean"), contains("std"))
# Combining Train data with Test data
run_analysis1<- rbind(train4,test4)
#Summarizing the data
run_analysis<- (run_analysis1%>%
group_by(subject,Activity) %>%
summarise_each(funs( mean)))
print(run_analysis)
## Source: local data frame [180 x 88]
## Groups: subject
##
## subject Activity tBodyAcc-mean()-X tBodyAcc-mean()-Y
## 1 1 LAYING 0.2875423 -0.01642767
## 2 1 SITTING 0.2755084 -0.01574349
## 3 1 STANDING 0.2796792 -0.01236445
## 4 1 WALKING 0.2734165 -0.01666060
## 5 1 WALKING_DOWNSTAIRS 0.2794456 -0.01739220
## 6 1 WALKING_UPSTAIRS 0.2863678 -0.01711514
## 7 2 LAYING 0.2824690 -0.02254480
## 8 2 SITTING 0.2849584 -0.01987180
## 9 2 STANDING 0.2785656 -0.01480572
## 10 2 WALKING 0.2691054 -0.01840628
## .. ... ... ... ...
## Variables not shown: tBodyAcc-mean()-Z (dbl), tGravityAcc-mean()-X (dbl),
## tGravityAcc-mean()-Y (dbl), tGravityAcc-mean()-Z (dbl),
## tBodyAccJerk-mean()-X (dbl), tBodyAccJerk-mean()-Y (dbl),
## tBodyAccJerk-mean()-Z (dbl), tBodyGyro-mean()-X (dbl),
## tBodyGyro-mean()-Y (dbl), tBodyGyro-mean()-Z (dbl),
## tBodyGyroJerk-mean()-X (dbl), tBodyGyroJerk-mean()-Y (dbl),
## tBodyGyroJerk-mean()-Z (dbl), tBodyAccMag-mean() (dbl),
## tGravityAccMag-mean() (dbl), tBodyAccJerkMag-mean() (dbl),
## tBodyGyroMag-mean() (dbl), tBodyGyroJerkMag-mean() (dbl),
## fBodyAcc-mean()-X (dbl), fBodyAcc-mean()-Y (dbl), fBodyAcc-mean()-Z
## (dbl), fBodyAcc-meanFreq()-X (dbl), fBodyAcc-meanFreq()-Y (dbl),
## fBodyAcc-meanFreq()-Z (dbl), fBodyAccJerk-mean()-X (dbl),
## fBodyAccJerk-mean()-Y (dbl), fBodyAccJerk-mean()-Z (dbl),
## fBodyAccJerk-meanFreq()-X (dbl), fBodyAccJerk-meanFreq()-Y (dbl),
## fBodyAccJerk-meanFreq()-Z (dbl), fBodyGyro-mean()-X (dbl),
## fBodyGyro-mean()-Y (dbl), fBodyGyro-mean()-Z (dbl),
## fBodyGyro-meanFreq()-X (dbl), fBodyGyro-meanFreq()-Y (dbl),
## fBodyGyro-meanFreq()-Z (dbl), fBodyAccMag-mean() (dbl),
## fBodyAccMag-meanFreq() (dbl), fBodyBodyAccJerkMag-mean() (dbl),
## fBodyBodyAccJerkMag-meanFreq() (dbl), fBodyBodyGyroMag-mean() (dbl),
## fBodyBodyGyroMag-meanFreq() (dbl), fBodyBodyGyroJerkMag-mean() (dbl),
## fBodyBodyGyroJerkMag-meanFreq() (dbl), angle(tBodyAccMean,gravity)
## (dbl), angle(tBodyAccJerkMean),gravityMean) (dbl),
## angle(tBodyGyroMean,gravityMean) (dbl),
## angle(tBodyGyroJerkMean,gravityMean) (dbl), angle(X,gravityMean) (dbl),
## angle(Y,gravityMean) (dbl), angle(Z,gravityMean) (dbl), tBodyAcc-std()-X
## (dbl), tBodyAcc-std()-Y (dbl), tBodyAcc-std()-Z (dbl),
## tGravityAcc-std()-X (dbl), tGravityAcc-std()-Y (dbl),
## tGravityAcc-std()-Z (dbl), tBodyAccJerk-std()-X (dbl),
## tBodyAccJerk-std()-Y (dbl), tBodyAccJerk-std()-Z (dbl),
## tBodyGyro-std()-X (dbl), tBodyGyro-std()-Y (dbl), tBodyGyro-std()-Z
## (dbl), tBodyGyroJerk-std()-X (dbl), tBodyGyroJerk-std()-Y (dbl),
## tBodyGyroJerk-std()-Z (dbl), tBodyAccMag-std() (dbl),
## tGravityAccMag-std() (dbl), tBodyAccJerkMag-std() (dbl),
## tBodyGyroMag-std() (dbl), tBodyGyroJerkMag-std() (dbl), fBodyAcc-std()-X
## (dbl), fBodyAcc-std()-Y (dbl), fBodyAcc-std()-Z (dbl),
## fBodyAccJerk-std()-X (dbl), fBodyAccJerk-std()-Y (dbl),
## fBodyAccJerk-std()-Z (dbl), fBodyGyro-std()-X (dbl), fBodyGyro-std()-Y
## (dbl), fBodyGyro-std()-Z (dbl), fBodyAccMag-std() (dbl),
## fBodyBodyAccJerkMag-std() (dbl), fBodyBodyGyroMag-std() (dbl),
## fBodyBodyGyroJerkMag-std() (dbl)
write.table(run_analysis,"./run_analysis.txt",sep=" ",row.name=FALSE)