In this project, we develop a model by which a smartphone can detect its owner’s activity precisely. For the dataset, 30 people were used to perform 6 different activities. Each of them was wearing a Samsung Galaxy SII on their waist. Using the smartphone’s embedded sensors (the accelerometer and the gyroscope), the user’s speed and acceleration were measured in 3-axial directions. The sensor’s data is used to predict user’s activity. The user activities could be one of the six below: Walking, Walking_Upstairs, Walking_Downstairs, Sitting, Standing and Laying

setwd("C:\\Users\\user\\Desktop\\Blogs\\Human Activity Recognition")
train_X<-read.table("X_train.txt")
train_Y<-read.table("y_train.txt")

test_X<-read.table("X_test.txt")
test_Y<-read.table("y_test.txt")
col_names <- readLines("features.txt")
colnames(train_X)<-make.names(col_names)
colnames(test_X)<-make.names(col_names)
colnames(train_Y)<-"Activity"
colnames(test_Y)<-"Activity"
table(train_Y$Activity)
## 
##    1    2    3    4    5    6  ÿþ5 
## 1226 1073  986 1286 1373 1407    1
table(test_Y$Activity)
## 
##   1   2   3   4   5   6 ÿþ5 
## 496 471 420 491 531 537   1
which(train_Y==levels(train_Y$Activity)[7])
## [1] 1
which(test_Y==levels(test_Y$Activity)[7])
## [1] 1
train_X=train_X[-1,]
test_X=test_X[-1,]
train_Y=train_Y[2:nrow(train_Y),]

train_Y=data.frame(train_Y)
colnames(train_Y)="Activity"

test_Y=test_Y[2:nrow(test_Y),]
test_Y=data.frame(test_Y)
colnames(test_Y)="Activity"
train_final<-cbind(train_Y,train_X)
test_final<-cbind(test_Y,test_X)

final_data<-rbind(train_final,test_final)
final_data$Activity<-factor(final_data$Activity)
levels(final_data$Activity) <- c("WALKING", "WALKING_UPSTAIRS", "WALKING_DOWNSTAIRS", "SITTING", "STANDING", "LAYING")
table(final_data$Activity)
## 
##            WALKING   WALKING_UPSTAIRS WALKING_DOWNSTAIRS 
##               1722               1544               1406 
##            SITTING           STANDING             LAYING 
##               1777               1904               1944
which(colSums(is.na(final_data))!=0)
## named integer(0)

There are no missing values

library(randomForest)
## Warning: package 'randomForest' was built under R version 3.4.3
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Warning: package 'caret' was built under R version 3.4.4
## Loading required package: lattice
## Loading required package: ggplot2
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
## 
##     margin
library(Boruta)
## Warning: package 'Boruta' was built under R version 3.4.4
## Loading required package: ranger
## Warning: package 'ranger' was built under R version 3.4.4
## 
## Attaching package: 'ranger'
## The following object is masked from 'package:randomForest':
## 
##     importance
RFModel1=randomForest(Activity~.,data = final_data,ntree=200,importance = TRUE)
plot(RFModel1)

There seems to be no further reduction in error after any addition to 100 trees. Hence if we build our random forest model with no more than 100 trees, there would not be any loss in terms of accuracy that could have been achieved. We will however gain in computational speed

randomForest::varImpPlot(RFModel1)

The plot gives a glimpse of the relative importance of the variables. However with 561 predictors, the plot looks too busy and not all details are displayed in the plot.

Using Boruta package the attributes that make a singnificant contribution to the prediction of the “Activity” were extracted. The model takes a lot of time to be built (with 561 features) and hence after building it I extracted the selected attributes to use at a later stage without having to run the Boruta model. I have provided the codes below for reference but I am not running the BT code to save time.

BT=Boruta(Activity~., ntree = 50, data = final_data); SelectedAttributes=getSelectedAttributes(BT); write.csv(SelectedAttributes, file = “SelectedAttributes.csv”)

a=read.csv("SelectedAttributes.csv")
a=a[,-1]
a=as.character(a)
final_data=final_data[,c(a,"Activity")]
Index=createDataPartition(final_data$Activity, p=0.75, list = FALSE, times = 1)
Train=final_data[Index,]
Test=final_data[-Index,]
set.seed(234)
mtry=tuneRF(Train[,-159],
Train[,159],
mtryStart = 10,
ntreeTry = 50,
stepFactor = 1.5, 
improve = 0.05, 
trace=TRUE, 
plot = TRUE,
doBest = TRUE)
## mtry = 10  OOB error = 2.55% 
## Searching left ...
## mtry = 7     OOB error = 2.58% 
## -0.01015228 0.05 
## Searching right ...
## mtry = 15    OOB error = 2.59% 
## -0.01522843 0.05

RFModel.Final=randomForest(Activity~.,data = Train, mtry=10,ntree=100)
test.forest = predict(RFModel.Final, type = "class", newdata = Test)
confusionMatrix(Test$Activity,test.forest)
## Confusion Matrix and Statistics
## 
##                     Reference
## Prediction           WALKING WALKING_UPSTAIRS WALKING_DOWNSTAIRS SITTING
##   WALKING                418               10                  2       0
##   WALKING_UPSTAIRS         2              382                  2       0
##   WALKING_DOWNSTAIRS       1               11                339       0
##   SITTING                  0                0                  0     424
##   STANDING                 0                0                  0      31
##   LAYING                   0                0                  0       0
##                     Reference
## Prediction           STANDING LAYING
##   WALKING                   0      0
##   WALKING_UPSTAIRS          0      0
##   WALKING_DOWNSTAIRS        0      0
##   SITTING                  20      0
##   STANDING                445      0
##   LAYING                    0    486
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9693          
##                  95% CI : (0.9619, 0.9756)
##     No Information Rate : 0.1889          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9631          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: WALKING Class: WALKING_UPSTAIRS
## Sensitivity                  0.9929                  0.9479
## Specificity                  0.9944                  0.9982
## Pos Pred Value               0.9721                  0.9896
## Neg Pred Value               0.9986                  0.9904
## Prevalence                   0.1636                  0.1566
## Detection Rate               0.1625                  0.1485
## Detection Prevalence         0.1671                  0.1500
## Balanced Accuracy            0.9936                  0.9730
##                      Class: WALKING_DOWNSTAIRS Class: SITTING
## Sensitivity                             0.9883         0.9319
## Specificity                             0.9946         0.9906
## Pos Pred Value                          0.9658         0.9550
## Neg Pred Value                          0.9982         0.9854
## Prevalence                              0.1333         0.1768
## Detection Rate                          0.1318         0.1648
## Detection Prevalence                    0.1364         0.1726
## Balanced Accuracy                       0.9915         0.9612
##                      Class: STANDING Class: LAYING
## Sensitivity                   0.9570        1.0000
## Specificity                   0.9853        1.0000
## Pos Pred Value                0.9349        1.0000
## Neg Pred Value                0.9905        1.0000
## Prevalence                    0.1807        0.1889
## Detection Rate                0.1729        0.1889
## Detection Prevalence          0.1850        0.1889
## Balanced Accuracy             0.9711        1.0000
table(Actual=Test$Activity,Pred=test.forest)
##                     Pred
## Actual               WALKING WALKING_UPSTAIRS WALKING_DOWNSTAIRS SITTING
##   WALKING                418               10                  2       0
##   WALKING_UPSTAIRS         2              382                  2       0
##   WALKING_DOWNSTAIRS       1               11                339       0
##   SITTING                  0                0                  0     424
##   STANDING                 0                0                  0      31
##   LAYING                   0                0                  0       0
##                     Pred
## Actual               STANDING LAYING
##   WALKING                   0      0
##   WALKING_UPSTAIRS          0      0
##   WALKING_DOWNSTAIRS        0      0
##   SITTING                  20      0
##   STANDING                445      0
##   LAYING                    0    486