Human Activity Recognition (HAR) is a new key field of study that is emerging as a key research area to develop context aware systems. There are many potential applications of HAR. Some of those are following:
Weight Lifting Exercises Dataset:
Weight Lifting Exercises dataset is to investigate “how (well)” an activity was performed by the wearer. Six young health participants were asked to perform one set of 10 repetitions of the Unilateral Dumbbell Biceps Curl in five different fashions: exactly according to the specification (Class A), throwing the elbows to the front (Class B), lifting the dumbbell only halfway (Class C), lowering the dumbbell only halfway (Class D) and throwing the hips to the front (Class E).
Class A corresponds to the specified execution of the exercise, while the other 4 classes correspond to common mistakes.
Read more: http://groupware.les.inf.puc-rio.br/har#ixzz3v3mADg7n
library(caret)
library(dplyr)
setwd("C:\\Users\\Avinash\\Desktop\\R Files\\Coursera\\Machine Learning")
hardata <- read.csv("pml-training.csv")
#Cleaning Data
hardata <- select(hardata,
-X,
-user_name,
-raw_timestamp_part_1,
-raw_timestamp_part_2,
-cvtd_timestamp,
-new_window,
-num_window)
#Extracting belt, arm, dumbell, forearm related variables only
hardata <- hardata[, grepl("gyros|accel|magnet|classe", names(hardata))]
hardata <- select(hardata, -starts_with("var"))
dim(hardata)
## [1] 19622 41
set.seed(1456)
intrain <- createDataPartition(y = hardata$classe,
p = 0.6,
list = FALSE)
traincomplete <- hardata[intrain,]
testing <- hardata[-intrain,]
dim(traincomplete)
## [1] 11776 41
dim(testing)
## [1] 7846 41
featurePlot(x = traincomplete[, c("total_accel_belt",
"total_accel_arm",
"total_accel_dumbbell",
"total_accel_forearm")],
y = traincomplete$classe,
plot = "ellipse",
auto.key = list(columns = 4))
#Adding Repeated K Fold Cross Validation
fitCOntrol <- trainControl(method = "repeatedcv",
number = 10,
repeats = 3)
rffit <- train(classe ~ .,
data = traincomplete,
method = "rf",
trControl = fitCOntrol)
print(rffit, digits = 3)
## Random Forest
##
## 11776 samples
## 40 predictor
## 5 classes: 'A', 'B', 'C', 'D', 'E'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold, repeated 3 times)
## Summary of sample sizes: 10600, 10598, 10600, 10598, 10598, 10599, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa Accuracy SD Kappa SD
## 2 0.982 0.977 0.00332 0.00421
## 21 0.979 0.973 0.00392 0.00497
## 40 0.973 0.966 0.00346 0.00438
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
print(rffit$finalModel)
##
## Call:
## randomForest(x = x, y = y, mtry = param$mtry)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 1.68%
## Confusion matrix:
## A B C D E class.error
## A 3333 6 4 4 1 0.004480287
## B 40 2215 22 1 1 0.028082492
## C 0 29 2024 0 1 0.014605648
## D 1 0 72 1851 6 0.040932642
## E 1 1 2 6 2155 0.004618938
pred1 <- predict(rffit, testing)
confusionMatrix(testing$classe, pred1)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 2217 5 2 8 0
## B 26 1471 21 0 0
## C 0 25 1343 0 0
## D 2 0 53 1227 4
## E 0 1 0 1 1440
##
## Overall Statistics
##
## Accuracy : 0.9811
## 95% CI : (0.9779, 0.984)
## No Information Rate : 0.2861
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9761
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9875 0.9794 0.9464 0.9927 0.9972
## Specificity 0.9973 0.9926 0.9961 0.9911 0.9997
## Pos Pred Value 0.9933 0.9690 0.9817 0.9541 0.9986
## Neg Pred Value 0.9950 0.9951 0.9883 0.9986 0.9994
## Prevalence 0.2861 0.1914 0.1809 0.1575 0.1840
## Detection Rate 0.2826 0.1875 0.1712 0.1564 0.1835
## Detection Prevalence 0.2845 0.1935 0.1744 0.1639 0.1838
## Balanced Accuracy 0.9924 0.9860 0.9713 0.9919 0.9985
plot(rffit,
log = "y",
lwd = 2,
main = "Random forest accuracy",
xlab = "Predictors",
ylab = "Accuracy")
oosaccuracy <- sum(pred1 == testing$classe)/length(pred1)
oose <- round((1 - oosaccuracy)*100,3)
test <- read.csv("pml-testing.csv")
test <- rename(test, classe = problem_id)
test <- select(test,
-X,
-user_name,
-raw_timestamp_part_1,
-raw_timestamp_part_2,
-cvtd_timestamp,
-new_window,
-num_window)
#Extracting belt, arm, dumbell, forearm related variables only
test <- test[, grepl("gyros|accel|magnet|classe", names(test))]
test <- select(test, -starts_with("var"))
pred2 <- predict(rffit, test)
pred2
## [1] B A C A A E D B A A B C B A E E A B B B
## Levels: A B C D E