Human Activity Recognition (HAR) is a new key field of study that is emerging as a key research area to develop context aware systems. There are many potential applications of HAR. Some of those are following:

Weight Lifting Exercises Dataset:

Weight Lifting Exercises dataset is to investigate “how (well)” an activity was performed by the wearer. Six young health participants were asked to perform one set of 10 repetitions of the Unilateral Dumbbell Biceps Curl in five different fashions: exactly according to the specification (Class A), throwing the elbows to the front (Class B), lifting the dumbbell only halfway (Class C), lowering the dumbbell only halfway (Class D) and throwing the hips to the front (Class E).

Class A corresponds to the specified execution of the exercise, while the other 4 classes correspond to common mistakes.

Read more: http://groupware.les.inf.puc-rio.br/har#ixzz3v3mADg7n

library(caret)
library(dplyr)

Getting and Cleaning Data

setwd("C:\\Users\\Avinash\\Desktop\\R Files\\Coursera\\Machine Learning")

hardata <- read.csv("pml-training.csv")

#Cleaning Data 
hardata <- select(hardata, 
                  -X, 
                  -user_name, 
                  -raw_timestamp_part_1, 
                  -raw_timestamp_part_2, 
                  -cvtd_timestamp, 
                  -new_window, 
                  -num_window)
#Extracting belt, arm, dumbell, forearm related variables only
hardata <- hardata[, grepl("gyros|accel|magnet|classe", names(hardata))]
hardata <- select(hardata, -starts_with("var"))

dim(hardata)
## [1] 19622    41

Creating Training and Test Sets

set.seed(1456)
intrain <- createDataPartition(y = hardata$classe, 
                               p = 0.6, 
                               list = FALSE)
traincomplete <- hardata[intrain,]
testing <- hardata[-intrain,]
dim(traincomplete)
## [1] 11776    41
dim(testing)
## [1] 7846   41

Feature Plots for Exploratory Analysis

featurePlot(x = traincomplete[, c("total_accel_belt", 
                                  "total_accel_arm", 
                                  "total_accel_dumbbell",
                                  "total_accel_forearm")],
            y = traincomplete$classe,
            plot = "ellipse",
            auto.key = list(columns = 4))

Fitting Random Forest Classification Model

#Adding Repeated K Fold Cross Validation

fitCOntrol <- trainControl(method = "repeatedcv",
                           number = 10,
                           repeats = 3)

rffit <- train(classe ~ ., 
               data = traincomplete, 
               method = "rf",
               trControl = fitCOntrol)

print(rffit, digits = 3)
## Random Forest 
## 
## 11776 samples
##    40 predictor
##     5 classes: 'A', 'B', 'C', 'D', 'E' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times) 
## Summary of sample sizes: 10600, 10598, 10600, 10598, 10598, 10599, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy  Kappa  Accuracy SD  Kappa SD
##    2    0.982     0.977  0.00332      0.00421 
##   21    0.979     0.973  0.00392      0.00497 
##   40    0.973     0.966  0.00346      0.00438 
## 
## Accuracy was used to select the optimal model using  the largest value.
## The final value used for the model was mtry = 2.
print(rffit$finalModel)
## 
## Call:
##  randomForest(x = x, y = y, mtry = param$mtry) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 1.68%
## Confusion matrix:
##      A    B    C    D    E class.error
## A 3333    6    4    4    1 0.004480287
## B   40 2215   22    1    1 0.028082492
## C    0   29 2024    0    1 0.014605648
## D    1    0   72 1851    6 0.040932642
## E    1    1    2    6 2155 0.004618938
pred1 <- predict(rffit, testing)

confusionMatrix(testing$classe, pred1)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 2217    5    2    8    0
##          B   26 1471   21    0    0
##          C    0   25 1343    0    0
##          D    2    0   53 1227    4
##          E    0    1    0    1 1440
## 
## Overall Statistics
##                                          
##                Accuracy : 0.9811         
##                  95% CI : (0.9779, 0.984)
##     No Information Rate : 0.2861         
##     P-Value [Acc > NIR] : < 2.2e-16      
##                                          
##                   Kappa : 0.9761         
##  Mcnemar's Test P-Value : NA             
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9875   0.9794   0.9464   0.9927   0.9972
## Specificity            0.9973   0.9926   0.9961   0.9911   0.9997
## Pos Pred Value         0.9933   0.9690   0.9817   0.9541   0.9986
## Neg Pred Value         0.9950   0.9951   0.9883   0.9986   0.9994
## Prevalence             0.2861   0.1914   0.1809   0.1575   0.1840
## Detection Rate         0.2826   0.1875   0.1712   0.1564   0.1835
## Detection Prevalence   0.2845   0.1935   0.1744   0.1639   0.1838
## Balanced Accuracy      0.9924   0.9860   0.9713   0.9919   0.9985
plot(rffit, 
     log = "y", 
     lwd = 2, 
     main = "Random forest accuracy", 
     xlab = "Predictors", 
     ylab = "Accuracy")

Out of Sample Error

oosaccuracy <- sum(pred1 == testing$classe)/length(pred1)
oose <- round((1 - oosaccuracy)*100,3)

Prediction for 20 Test Cases

test <- read.csv("pml-testing.csv")
test <- rename(test, classe = problem_id)
test <- select(test, 
                  -X, 
                  -user_name, 
                  -raw_timestamp_part_1, 
                  -raw_timestamp_part_2, 
                  -cvtd_timestamp, 
                  -new_window, 
                  -num_window)
#Extracting belt, arm, dumbell, forearm related variables only
test <- test[, grepl("gyros|accel|magnet|classe", names(test))]
test <- select(test, -starts_with("var"))
pred2 <- predict(rffit, test)
pred2
##  [1] B A C A A E D B A A B C B A E E A B B B
## Levels: A B C D E