Course Project - Practical Machine Learning

Introduction

The goal of the project is to predict the manner in which a set of people did the exercise. This is the “classe” variable in the training set. We use variables in the data set to predict with. I have outlined a way to make the prediction using RandomForest and describe the merits of the method.

Include Packages

library(knitr)
library(caret)

## Loading required package: lattice

## Loading required package: ggplot2

library(RColorBrewer)
library(randomForest)

## randomForest 4.6-12

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

library(rpart)
library(rpart.plot)

Get data

set.seed(34423)


trainRaw <- read.csv2(url(
  "http://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv"), 
  sep=",", na.strings = c("NA", "", "#DIV/0")
  )
testRaw <- read.csv2(
  url("http://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv"), 
  sep = ",", na.strings = c("NA", "", "#DIV/0")   
  )

numObs <- dim(trainRaw)[1]
numVar <- dim(trainRaw)[2]

dim(trainRaw)

## [1] 19622   160

Clean data

A lot of points in the data set are NA variables. To get this out of our system we define a threshold. Let’s say columns having more than 5% of the values as ‘NA’ should be out of our predictor dataset. Here’s the code for it.

percNa <- c(1:numVar)
percNull <- c(1:numVar)

# Evaluate percentage as a ratio between 'NA' and total length of column
for (i in 1:numVar) {
  percNa[i] <- length(which(is.na(trainRaw[,i]))) / numObs
  percNull[i] <- length(which(is.null(trainRaw[,i]))) / numObs
}

# Grab indices of columns which have < 5% NA values
inds <- which(percNa < 0.05) 

# Make new table with only good data.
trainClean <- trainRaw[,inds]
testClean <- testRaw[,inds]

Split data set

Following code splits trainClean into two components, a training data set and a test data set. Model will be fit on the trainingSet dataset and a confusion matrix will be evaluated on the testingSet dataset.

inTrain <- createDataPartition(trainClean$classe, p=0.6, list=FALSE)
trainingSet <- trainClean[inTrain, ]
testingSet <- trainClean[-inTrain, ]

Few more quirks to this data

This dataset still has a number of factor variables, as can be seen from the command str(). In the below code, we identify all the factor variables still left in our data, and correct them to numeric.

## Illustrate factor variables in dataset
str(trainingSet)

## 'data.frame':    11776 obs. of  60 variables:
##  $ X                   : int  1 2 3 6 9 10 12 13 17 18 ...
##  $ user_name           : Factor w/ 6 levels "adelmo","carlitos",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ raw_timestamp_part_1: int  1323084231 1323084231 1323084231 1323084232 1323084232 1323084232 1323084232 1323084232 1323084232 1323084232 ...
##  $ raw_timestamp_part_2: int  788290 808298 820366 304277 484323 484434 528316 560359 692324 732306 ...
##  $ cvtd_timestamp      : Factor w/ 20 levels "02/12/2011 13:32",..: 9 9 9 9 9 9 9 9 9 9 ...
##  $ new_window          : Factor w/ 2 levels "no","yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ num_window          : int  11 11 11 12 12 12 12 12 12 12 ...
##  $ roll_belt           : Factor w/ 1330 levels "-0.01","-0.02",..: 771 771 772 775 773 775 773 772 781 785 ...
##  $ pitch_belt          : Factor w/ 1840 levels "-0.01","-0.02",..: 1681 1681 1681 1680 1690 1691 1692 1694 1686 1682 ...
##  $ yaw_belt            : Factor w/ 1957 levels "-0.02","-0.03",..: 1122 1122 1122 1122 1122 1122 1122 1122 1122 1122 ...
##  $ total_accel_belt    : int  3 3 3 3 3 3 3 3 3 3 ...
##  $ gyros_belt_x        : Factor w/ 140 levels "-0.02","-0.03",..: 64 65 64 65 65 66 65 65 64 64 ...
##  $ gyros_belt_y        : Factor w/ 69 levels "-0.02","-0.03",..: 33 33 33 33 33 33 33 33 33 34 ...
##  $ gyros_belt_z        : Factor w/ 169 levels "-0.02","-0.03",..: 1 1 1 1 1 80 1 80 1 80 ...
##  $ accel_belt_x        : int  -21 -22 -20 -21 -20 -21 -22 -22 -21 -21 ...
##  $ accel_belt_y        : int  4 4 5 4 2 4 2 4 4 5 ...
##  $ accel_belt_z        : int  22 22 23 21 24 22 23 21 22 21 ...
##  $ magnet_belt_x       : int  -3 -7 -2 0 1 -3 -2 -3 -6 1 ...
##  $ magnet_belt_y       : int  599 608 600 603 602 609 602 606 598 600 ...
##  $ magnet_belt_z       : int  -313 -311 -305 -312 -312 -308 -319 -309 -317 -316 ...
##  $ roll_arm            : Factor w/ 2654 levels "-0.02","-0.04",..: 120 120 120 120 120 120 120 120 121 121 ...
##  $ pitch_arm           : Factor w/ 3087 levels "-0.01","-0.02",..: 1882 1882 1882 1877 1874 1873 1872 1871 1870 1869 ...
##  $ yaw_arm             : Factor w/ 2876 levels "-0.02","-0.05",..: 212 212 212 212 212 212 212 212 212 212 ...
##  $ total_accel_arm     : int  34 34 34 34 34 34 34 34 34 34 ...
##  $ gyros_arm_x         : Factor w/ 643 levels "-0.02","-0.03",..: 356 357 357 357 357 357 357 357 357 357 ...
##  $ gyros_arm_y         : Factor w/ 376 levels "-0.02","-0.03",..: 202 1 1 2 2 2 2 1 202 1 ...
##  $ gyros_arm_z         : Factor w/ 248 levels "-0.02","-0.03",..: 1 1 1 113 1 1 113 1 1 2 ...
##  $ accel_arm_x         : int  -288 -290 -289 -289 -288 -288 -288 -287 -289 -288 ...
##  $ accel_arm_y         : int  109 110 110 111 109 110 111 111 110 108 ...
##  $ accel_arm_z         : int  -123 -125 -126 -122 -122 -124 -123 -124 -122 -124 ...
##  $ magnet_arm_x        : int  -368 -369 -368 -369 -369 -376 -363 -372 -371 -373 ...
##  $ magnet_arm_y        : int  337 337 344 342 341 334 343 338 337 336 ...
##  $ magnet_arm_z        : int  516 513 513 513 518 516 520 509 512 510 ...
##  $ roll_dumbbell       : Factor w/ 16523 levels "-0.970112066",..: 6515 6531 6197 6568 6537 6562 6528 6568 6512 6545 ...
##  $ pitch_dumbbell      : Factor w/ 16040 levels "-0.45631348",..: 9693 9701 9671 9717 9688 9720 9690 9717 9661 9683 ...
##  $ yaw_dumbbell        : Factor w/ 16381 levels "-0.585821306",..: 6625 6577 6688 6520 6636 6515 6630 6520 6716 6643 ...
##  $ total_accel_dumbbell: int  37 37 37 37 37 37 37 37 37 36 ...
##  $ gyros_dumbbell_x    : Factor w/ 241 levels "-0.02","-0.03",..: 115 115 115 115 115 115 115 115 115 116 ...
##  $ gyros_dumbbell_y    : Factor w/ 278 levels "-0.02","-0.03",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gyros_dumbbell_z    : Factor w/ 206 levels "-0.02","-0.03",..: 114 114 114 114 114 114 114 1 114 1 ...
##  $ accel_dumbbell_x    : int  -234 -233 -232 -234 -232 -235 -233 -234 -233 -231 ...
##  $ accel_dumbbell_y    : int  47 47 46 48 47 48 47 48 47 47 ...
##  $ accel_dumbbell_z    : int  -271 -269 -270 -269 -269 -270 -270 -269 -272 -268 ...
##  $ magnet_dumbbell_x   : int  -559 -555 -561 -558 -549 -558 -554 -552 -551 -557 ...
##  $ magnet_dumbbell_y   : int  293 296 298 294 292 291 291 302 296 292 ...
##  $ magnet_dumbbell_z   : Factor w/ 676 levels "-1","-10","-100",..: 208 207 206 209 208 212 208 212 198 205 ...
##  $ roll_forearm        : Factor w/ 2176 levels "-0.04","-0.05",..: 1391 1390 1390 1386 1384 1384 1382 1380 1379 1378 ...
##  $ pitch_forearm       : Factor w/ 2915 levels "-0.01","-0.02",..: 1073 1073 1073 1073 1072 1072 1072 1073 1074 1074 ...
##  $ yaw_forearm         : Factor w/ 1991 levels "-0.04","-0.05",..: 126 126 125 125 125 125 125 124 124 124 ...
##  $ total_accel_forearm : int  36 36 36 36 36 36 36 36 36 36 ...
##  $ gyros_forearm_x     : Factor w/ 298 levels "-0.02","-0.03",..: 153 152 153 152 153 152 152 151 152 152 ...
##  $ gyros_forearm_y     : Factor w/ 741 levels "-0.02","-0.03",..: 381 381 1 1 381 381 382 381 1 381 ...
##  $ gyros_forearm_z     : Factor w/ 307 levels "-0.02","-0.03",..: 1 1 144 2 1 1 2 2 144 1 ...
##  $ accel_forearm_x     : int  192 192 196 193 193 190 191 193 192 192 ...
##  $ accel_forearm_y     : int  203 203 204 203 204 205 203 205 204 206 ...
##  $ accel_forearm_z     : int  -215 -216 -213 -215 -214 -215 -215 -215 -213 -216 ...
##  $ magnet_forearm_x    : int  -17 -18 -18 -9 -16 -22 -11 -15 -13 -16 ...
##  $ magnet_forearm_y    : Factor w/ 1872 levels "-0.123","-1",..: 1521 1529 1525 1528 1520 1523 1524 1522 1520 1520 ...
##  $ magnet_forearm_z    : Factor w/ 1683 levels "-0.0917","-1",..: 1128 1125 1120 1130 1128 1125 1130 1124 1134 1124 ...
##  $ classe              : Factor w/ 5 levels "A","B","C","D",..: 1 1 1 1 1 1 1 1 1 1 ...

## trainingSet class conversion operation
trainingSet$roll_belt <- as.numeric(as.character(trainingSet$roll_belt))
trainingSet$pitch_belt <- as.numeric(as.character(trainingSet$pitch_belt))
trainingSet$yaw_belt <- as.numeric(as.character(trainingSet$yaw_belt))
trainingSet$gyros_belt_x <- as.numeric(as.character(trainingSet$gyros_belt_x))
trainingSet$gyros_belt_y <- as.numeric(as.character(trainingSet$gyros_belt_y))
trainingSet$gyros_belt_z <- as.numeric(as.character(trainingSet$gyros_belt_z))
trainingSet$accel_belt_x <- as.numeric(as.character(trainingSet$accel_belt_x))
trainingSet$accel_belt_y <- as.numeric(as.character(trainingSet$accel_belt_y))
trainingSet$accel_belt_z <- as.numeric(as.character(trainingSet$accel_belt_z))
trainingSet$roll_arm <- as.numeric(as.character(trainingSet$roll_arm))
trainingSet$pitch_arm <- as.numeric(as.character(trainingSet$pitch_arm))
trainingSet$yaw_arm <- as.numeric(as.character(trainingSet$yaw_arm))
trainingSet$gyros_arm_x <- as.numeric(as.character(trainingSet$gyros_arm_x))
trainingSet$gyros_arm_y <- as.numeric(as.character(trainingSet$gyros_arm_y))
trainingSet$gyros_arm_z <- as.numeric(as.character(trainingSet$gyros_arm_z))
trainingSet$roll_dumbbell <- as.numeric(as.character(trainingSet$roll_dumbbell))
trainingSet$pitch_dumbbell <- as.numeric(as.character(trainingSet$pitch_dumbbell))
trainingSet$yaw_dumbbell <- as.numeric(as.character(trainingSet$yaw_dumbbell))
trainingSet$gyros_dumbbell_x <- as.numeric(as.character(trainingSet$gyros_dumbbell_x))
trainingSet$gyros_dumbbell_y <- as.numeric(as.character(trainingSet$gyros_dumbbell_y))
trainingSet$gyros_dumbbell_z <- as.numeric(as.character(trainingSet$gyros_dumbbell_z))
trainingSet$magnet_dumbbell_z <- as.numeric(as.character(trainingSet$magnet_dumbbell_z))
trainingSet$roll_forearm <- as.numeric(as.character(trainingSet$roll_forearm))
trainingSet$pitch_forearm <- as.numeric(as.character(trainingSet$pitch_forearm))
trainingSet$yaw_forearm <- as.numeric(as.character(trainingSet$yaw_forearm))
trainingSet$gyros_forearm_x <- as.numeric(as.character(trainingSet$gyros_forearm_x))
trainingSet$gyros_forearm_y <- as.numeric(as.character(trainingSet$gyros_forearm_y))
trainingSet$gyros_forearm_z <- as.numeric(as.character(trainingSet$gyros_forearm_z))
trainingSet$magnet_forearm_y <- as.numeric(as.character(trainingSet$magnet_forearm_y))
trainingSet$magnet_forearm_z <- as.numeric(as.character(trainingSet$magnet_forearm_z))

## We modify factor variables into numeric variables for testingSet
testingSet$roll_belt <- as.numeric(as.character(testingSet$roll_belt))
testingSet$pitch_belt <- as.numeric(as.character(testingSet$pitch_belt))
testingSet$yaw_belt <- as.numeric(as.character(testingSet$yaw_belt))
testingSet$gyros_belt_x <- as.numeric(as.character(testingSet$gyros_belt_x))
testingSet$gyros_belt_y <- as.numeric(as.character(testingSet$gyros_belt_y))
testingSet$gyros_belt_z <- as.numeric(as.character(testingSet$gyros_belt_z))
testingSet$accel_belt_x <- as.numeric(as.character(testingSet$accel_belt_x))
testingSet$accel_belt_y <- as.numeric(as.character(testingSet$accel_belt_y))
testingSet$accel_belt_z <- as.numeric(as.character(testingSet$accel_belt_z))
testingSet$roll_arm <- as.numeric(as.character(testingSet$roll_arm))
testingSet$pitch_arm <- as.numeric(as.character(testingSet$pitch_arm))
testingSet$yaw_arm <- as.numeric(as.character(testingSet$yaw_arm))
testingSet$gyros_arm_x <- as.numeric(as.character(testingSet$gyros_arm_x))
testingSet$gyros_arm_y <- as.numeric(as.character(testingSet$gyros_arm_y))
testingSet$gyros_arm_z <- as.numeric(as.character(testingSet$gyros_arm_z))
testingSet$roll_dumbbell <- as.numeric(as.character(testingSet$roll_dumbbell))
testingSet$pitch_dumbbell <- as.numeric(as.character(testingSet$pitch_dumbbell))
testingSet$yaw_dumbbell <- as.numeric(as.character(testingSet$yaw_dumbbell))
testingSet$gyros_dumbbell_x <- as.numeric(as.character(testingSet$gyros_dumbbell_x))
testingSet$gyros_dumbbell_y <- as.numeric(as.character(testingSet$gyros_dumbbell_y))
testingSet$gyros_dumbbell_z <- as.numeric(as.character(testingSet$gyros_dumbbell_z))
testingSet$magnet_dumbbell_z <- as.numeric(as.character(testingSet$magnet_dumbbell_z))
testingSet$roll_forearm <- as.numeric(as.character(testingSet$roll_forearm))
testingSet$pitch_forearm <- as.numeric(as.character(testingSet$pitch_forearm))
testingSet$yaw_forearm <- as.numeric(as.character(testingSet$yaw_forearm))
testingSet$gyros_forearm_x <- as.numeric(as.character(testingSet$gyros_forearm_x))
testingSet$gyros_forearm_y <- as.numeric(as.character(testingSet$gyros_forearm_y))
testingSet$gyros_forearm_z <- as.numeric(as.character(testingSet$gyros_forearm_z))
testingSet$magnet_forearm_y <- as.numeric(as.character(testingSet$magnet_forearm_y))
testingSet$magnet_forearm_z <- as.numeric(as.character(testingSet$magnet_forearm_z))

Identification of weak influencers

In this following piece of code, we select a further subset of columns indexed by the variable impVars that contain strong influencers to the data. This is done by the nearZeroVar() function.

impVars <- which(!nearZeroVar(trainingSet, saveMetrics = TRUE)$nzv)
colInd <- as.integer(which(names(trainingSet) == "classe"))
l <- length(impVars)

trainingSetFinal <- trainingSet[, impVars]
testingSetFinal <- testingSet[, impVars]

dim(trainingSetFinal)

## [1] 11776    59

names(trainingSetFinal)

##  [1] "X"                    "user_name"            "raw_timestamp_part_1"
##  [4] "raw_timestamp_part_2" "cvtd_timestamp"       "num_window"          
##  [7] "roll_belt"            "pitch_belt"           "yaw_belt"            
## [10] "total_accel_belt"     "gyros_belt_x"         "gyros_belt_y"        
## [13] "gyros_belt_z"         "accel_belt_x"         "accel_belt_y"        
## [16] "accel_belt_z"         "magnet_belt_x"        "magnet_belt_y"       
## [19] "magnet_belt_z"        "roll_arm"             "pitch_arm"           
## [22] "yaw_arm"              "total_accel_arm"      "gyros_arm_x"         
## [25] "gyros_arm_y"          "gyros_arm_z"          "accel_arm_x"         
## [28] "accel_arm_y"          "accel_arm_z"          "magnet_arm_x"        
## [31] "magnet_arm_y"         "magnet_arm_z"         "roll_dumbbell"       
## [34] "pitch_dumbbell"       "yaw_dumbbell"         "total_accel_dumbbell"
## [37] "gyros_dumbbell_x"     "gyros_dumbbell_y"     "gyros_dumbbell_z"    
## [40] "accel_dumbbell_x"     "accel_dumbbell_y"     "accel_dumbbell_z"    
## [43] "magnet_dumbbell_x"    "magnet_dumbbell_y"    "magnet_dumbbell_z"   
## [46] "roll_forearm"         "pitch_forearm"        "yaw_forearm"         
## [49] "total_accel_forearm"  "gyros_forearm_x"      "gyros_forearm_y"     
## [52] "gyros_forearm_z"      "accel_forearm_x"      "accel_forearm_y"     
## [55] "accel_forearm_z"      "magnet_forearm_x"     "magnet_forearm_y"    
## [58] "magnet_forearm_z"     "classe"

Take out some seemingly unimportant variables

This code pulls out some variables that are just continuous in time and might hamper with the prediction.

trainingSetFinal$X <- NULL
trainingSetFinal$raw_timestamp_part_1 <- NULL
trainingSetFinal$raw_timestamp_part_2 <- NULL
trainingSetFinal$cvtd_timestamp <- NULL

testingSetFinal$X <- NULL
testingSetFinal$raw_timestamp_part_1 <- NULL
testingSetFinal$raw_timestamp_part_2 <- NULL
testingSetFinal$cvtd_timestamp <- NULL

Running a random forest prediction

Note that we use the preprocessing function built into the model randomForest() that does the calculations in the below code.

set.seed(23224)

modFit_RF <- randomForest(classe ~., 
                          data = trainingSetFinal, 
                          preProcess = c("center", "scale")
                          )
predict_RF <- predict(modFit_RF, testingSetFinal, type = "class")

Evaluation of prediction

confusionMatrix(predict_RF, testingSetFinal$classe)

## Confusion Matrix and Statistics
## 
##           Reference
## Prediction    A    B    C    D    E
##          A 2231    1    0    0    0
##          B    0 1516    3    0    0
##          C    0    1 1365   13    0
##          D    0    0    0 1273    0
##          E    1    0    0    0 1442
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9976          
##                  95% CI : (0.9962, 0.9985)
##     No Information Rate : 0.2845          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9969          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: A Class: B Class: C Class: D Class: E
## Sensitivity            0.9996   0.9987   0.9978   0.9899   1.0000
## Specificity            0.9998   0.9995   0.9978   1.0000   0.9998
## Pos Pred Value         0.9996   0.9980   0.9898   1.0000   0.9993
## Neg Pred Value         0.9998   0.9997   0.9995   0.9980   1.0000
## Prevalence             0.2845   0.1935   0.1744   0.1639   0.1838
## Detection Rate         0.2843   0.1932   0.1740   0.1622   0.1838
## Detection Prevalence   0.2845   0.1936   0.1758   0.1622   0.1839
## Balanced Accuracy      0.9997   0.9991   0.9978   0.9949   0.9999

Looks like we predict the result perfectly.

Applying predictors to test data

In the below code, we make the test data of the same rank as that of the train data and predict the outcomes of the “classe” variable.

testClean$roll_belt <- as.numeric(as.character(testClean$roll_belt))
testClean$pitch_belt <- as.numeric(as.character(testClean$pitch_belt))
testClean$yaw_belt <- as.numeric(as.character(testClean$yaw_belt))
testClean$gyros_belt_x <- as.numeric(as.character(testClean$gyros_belt_x))
testClean$gyros_belt_y <- as.numeric(as.character(testClean$gyros_belt_y))
testClean$gyros_belt_z <- as.numeric(as.character(testClean$gyros_belt_z))
testClean$accel_belt_x <- as.numeric(as.character(testClean$accel_belt_x))
testClean$accel_belt_y <- as.numeric(as.character(testClean$accel_belt_y))
testClean$accel_belt_z <- as.numeric(as.character(testClean$accel_belt_z))
testClean$roll_arm <- as.numeric(as.character(testClean$roll_arm))
testClean$pitch_arm <- as.numeric(as.character(testClean$pitch_arm))
testClean$yaw_arm <- as.numeric(as.character(testClean$yaw_arm))
testClean$gyros_arm_x <- as.numeric(as.character(testClean$gyros_arm_x))
testClean$gyros_arm_y <- as.numeric(as.character(testClean$gyros_arm_y))
testClean$gyros_arm_z <- as.numeric(as.character(testClean$gyros_arm_z))
testClean$roll_dumbbell <- as.numeric(as.character(testClean$roll_dumbbell))
testClean$pitch_dumbbell <- as.numeric(as.character(testClean$pitch_dumbbell))
testClean$yaw_dumbbell <- as.numeric(as.character(testClean$yaw_dumbbell))
testClean$gyros_dumbbell_x <- as.numeric(as.character(testClean$gyros_dumbbell_x))
testClean$gyros_dumbbell_y <- as.numeric(as.character(testClean$gyros_dumbbell_y))
testClean$gyros_dumbbell_z <- as.numeric(as.character(testClean$gyros_dumbbell_z))
testClean$magnet_dumbbell_z <- as.numeric(as.character(testClean$magnet_dumbbell_z))
testClean$roll_forearm <- as.numeric(as.character(testClean$roll_forearm))
testClean$pitch_forearm <- as.numeric(as.character(testClean$pitch_forearm))
testClean$yaw_forearm <- as.numeric(as.character(testClean$yaw_forearm))
testClean$gyros_forearm_x <- as.numeric(as.character(testClean$gyros_forearm_x))
testClean$gyros_forearm_y <- as.numeric(as.character(testClean$gyros_forearm_y))
testClean$gyros_forearm_z <- as.numeric(as.character(testClean$gyros_forearm_z))
testClean$magnet_forearm_y <- as.numeric(as.character(testClean$magnet_forearm_y))
testClean$magnet_forearm_z <- as.numeric(as.character(testClean$magnet_forearm_z))


testClean2 <- testClean[,impVars]

dim(testClean2)

## [1] 20 59

names(testClean2)

##  [1] "X"                    "user_name"            "raw_timestamp_part_1"
##  [4] "raw_timestamp_part_2" "cvtd_timestamp"       "num_window"          
##  [7] "roll_belt"            "pitch_belt"           "yaw_belt"            
## [10] "total_accel_belt"     "gyros_belt_x"         "gyros_belt_y"        
## [13] "gyros_belt_z"         "accel_belt_x"         "accel_belt_y"        
## [16] "accel_belt_z"         "magnet_belt_x"        "magnet_belt_y"       
## [19] "magnet_belt_z"        "roll_arm"             "pitch_arm"           
## [22] "yaw_arm"              "total_accel_arm"      "gyros_arm_x"         
## [25] "gyros_arm_y"          "gyros_arm_z"          "accel_arm_x"         
## [28] "accel_arm_y"          "accel_arm_z"          "magnet_arm_x"        
## [31] "magnet_arm_y"         "magnet_arm_z"         "roll_dumbbell"       
## [34] "pitch_dumbbell"       "yaw_dumbbell"         "total_accel_dumbbell"
## [37] "gyros_dumbbell_x"     "gyros_dumbbell_y"     "gyros_dumbbell_z"    
## [40] "accel_dumbbell_x"     "accel_dumbbell_y"     "accel_dumbbell_z"    
## [43] "magnet_dumbbell_x"    "magnet_dumbbell_y"    "magnet_dumbbell_z"   
## [46] "roll_forearm"         "pitch_forearm"        "yaw_forearm"         
## [49] "total_accel_forearm"  "gyros_forearm_x"      "gyros_forearm_y"     
## [52] "gyros_forearm_z"      "accel_forearm_x"      "accel_forearm_y"     
## [55] "accel_forearm_z"      "magnet_forearm_x"     "magnet_forearm_y"    
## [58] "magnet_forearm_z"     "problem_id"

## Take out some unwanted variables from testClean2 not used for prediction

testClean2$X <- NULL
testClean2$raw_timestamp_part_1 <- NULL
testClean2$raw_timestamp_part_2 <- NULL
testClean2$cvtd_timestamp <- NULL

levels(testClean2) <- levels(trainingSetFinal)

predValues <- predict(modFit_RF, testClean2, type = "class")
predValues

##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 
##  B  A  B  A  A  E  D  B  A  A  B  C  B  A  E  E  A  B  B  B 
## Levels: A B C D E