HAR (human activity recognition) research has focused to predict “which” activity was performed at a specific point in time (like with the Daily Living Activities).
Weight Lifting Exercises dataset investigated “how (well)” an activity was performed by the wearer.
*Six young health participants were asked to perform one set of 10 repetitions of the Unilateral Dumbbell Biceps Curl in five different fashions:
exactly according to the specification (Class A) # correct
throwing the elbows to the front (Class B)
lifting the dumbbell only halfway (Class C)
lowering the dumbbell only halfway (Class D)
throwing the hips to the front (Class E)
Participants were supervised by an experienced weight lifter to make sure that all participants could easily simulate the mistakes
Safe and controlled exercises by using a relatively light dumbbell (1.25kg).
source: http://web.archive.org/web/20161224072740/http:/groupware.les.inf.puc-rio.br/har
library(dplyr)
library(ggplot2)
library(caret)
library(rpart)
library(randomForest)
library(rattle)
training_data<-read.csv("https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv",
na.strings = c("NA","#DIV/0!",""),header=TRUE,sep = ",")
#*Downloaded the training data from the given url, removed columns with blank cells and "#DIV/0!"*
trfun <- function(x){sum(is.na(x))}
trainSet<- training_data[sapply(training_data,trfun)==0] # removed the columns with all NAs
trainSet <- trainSet[,-c(1:7)] # removed first 7 columns as they are not necessary for prediction
set.seed(1977)
Intrain <- createDataPartition(trainSet$classe,p=0.69,list = FALSE)
trainingOne <- trainSet[Intrain,]
testingOne <- trainSet[-Intrain,]
set.seed(1977)
library(rattle)
library(rpart.plot)
modelTree<- rpart(trainingOne$classe ~ .,data=trainingOne,method ="class")
fancyRpartPlot(modelTree,cex=0.4)
PredTree <- predict(modelTree,testingOne,type="class")
confusionMatrix(PredTree,testingOne$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1421 207 20 51 12
## B 85 763 71 59 46
## C 67 75 848 206 95
## D 97 72 47 602 69
## E 59 60 74 78 896
##
## Overall Statistics
##
## Accuracy : 0.7451
## 95% CI : (0.7339, 0.756)
## No Information Rate : 0.2844
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.6778
##
## Mcnemar's Test P-Value : < 2.2e-16
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.8219 0.6483 0.8000 0.60442 0.8014
## Specificity 0.9333 0.9468 0.9118 0.94394 0.9454
## Pos Pred Value 0.8305 0.7451 0.6569 0.67869 0.7678
## Neg Pred Value 0.9295 0.9181 0.9557 0.92413 0.9548
## Prevalence 0.2844 0.1936 0.1743 0.16382 0.1839
## Detection Rate 0.2337 0.1255 0.1395 0.09901 0.1474
## Detection Prevalence 0.2814 0.1684 0.2123 0.14589 0.1919
## Balanced Accuracy 0.8776 0.7975 0.8559 0.77418 0.8734
Accuracy_of model_ = 74.51%
(ii). method = Random Forest
set.seed(1980)
library(randomForest)
modelRF<- randomForest(trainingOne$classe ~ .,data=trainingOne)
modelRF
##
## Call:
## randomForest(formula = trainingOne$classe ~ ., data = trainingOne)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 7
##
## OOB estimate of error rate: 0.56%
## Confusion matrix:
## A B C D E class.error
## A 3849 2 0 0 0 0.0005193456
## B 15 2601 4 0 0 0.0072519084
## C 0 14 2344 4 0 0.0076206605
## D 0 0 25 2190 5 0.0135135135
## E 0 0 1 6 2482 0.0028123744
PredRF <- predict(modelRF,testingOne)
confRF <- confusionMatrix(PredRF,testingOne$classe)
plot(confRF$table,color = confRF$byClass, cex=.5,main = paste("Accuracy% =",100*round(confRF$overall['Accuracy'], 4)))
(iii). method = Support Vector Machine
set.seed(1990)
library(kernlab)
modelSVM <- ksvm(trainingOne$classe ~ ., data = trainingOne, type= "nu-svc",nu=0.1)
PredSVM <- predict(modelSVM,testingOne,type="response")
confusionMatrix(PredSVM,testingOne$classe)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1723 43 0 2 0
## B 5 1122 6 0 1
## C 0 6 1045 48 3
## D 0 0 6 946 13
## E 1 6 3 0 1101
##
## Overall Statistics
##
## Accuracy : 0.9765
## 95% CI : (0.9724, 0.9801)
## No Information Rate : 0.2844
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9702
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9965 0.9533 0.9858 0.9498 0.9848
## Specificity 0.9897 0.9976 0.9886 0.9963 0.9980
## Pos Pred Value 0.9745 0.9894 0.9483 0.9803 0.9910
## Neg Pred Value 0.9986 0.9889 0.9970 0.9902 0.9966
## Prevalence 0.2844 0.1936 0.1743 0.1638 0.1839
## Detection Rate 0.2834 0.1845 0.1719 0.1556 0.1811
## Detection Prevalence 0.2908 0.1865 0.1812 0.1587 0.1827
## Balanced Accuracy 0.9931 0.9754 0.9872 0.9730 0.9914
Accuracy of the model =97.65%
test_cases <- read.csv("https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv",
na.strings = c("NA","#DIV/0!",""),header=TRUE, sep=",")
#Downloaded the test data from the given url, removed columns with blank cells and "#DIV/0!"
Answers_Quiz <- predict(modelRF, newdata=test_cases)
print(Answers_Quiz)
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
## B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E