download.file('https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv','training.csv','curl')
download.file('https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv','testing.csv','curl')
training <- read.csv('training.csv')
testing <- read.csv('testing.csv')
library(caret)
## Loading required package: lattice
## Loading required package: ggplot2
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'default/
## America/Sao_Paulo'
library(ggplot2)
set.seed(1)
inTrain <- createDataPartition(y = training$classe, p = 0.7, list = FALSE)
train <- training[inTrain,]
validate <- training[-inTrain,]
dim(training)
## [1] 19622 160
head(training)
## X user_name raw_timestamp_part_1 raw_timestamp_part_2 cvtd_timestamp
## 1 1 carlitos 1323084231 788290 05/12/2011 11:23
## 2 2 carlitos 1323084231 808298 05/12/2011 11:23
## 3 3 carlitos 1323084231 820366 05/12/2011 11:23
## 4 4 carlitos 1323084232 120339 05/12/2011 11:23
## 5 5 carlitos 1323084232 196328 05/12/2011 11:23
## 6 6 carlitos 1323084232 304277 05/12/2011 11:23
## new_window num_window roll_belt pitch_belt yaw_belt total_accel_belt
## 1 no 11 1.41 8.07 -94.4 3
## 2 no 11 1.41 8.07 -94.4 3
## 3 no 11 1.42 8.07 -94.4 3
## 4 no 12 1.48 8.05 -94.4 3
## 5 no 12 1.48 8.07 -94.4 3
## 6 no 12 1.45 8.06 -94.4 3
## kurtosis_roll_belt kurtosis_picth_belt kurtosis_yaw_belt
## 1
## 2
## 3
## 4
## 5
## 6
## skewness_roll_belt skewness_roll_belt.1 skewness_yaw_belt max_roll_belt
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## max_picth_belt max_yaw_belt min_roll_belt min_pitch_belt min_yaw_belt
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## amplitude_roll_belt amplitude_pitch_belt amplitude_yaw_belt
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## var_total_accel_belt avg_roll_belt stddev_roll_belt var_roll_belt
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## avg_pitch_belt stddev_pitch_belt var_pitch_belt avg_yaw_belt
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## stddev_yaw_belt var_yaw_belt gyros_belt_x gyros_belt_y gyros_belt_z
## 1 NA NA 0.00 0.00 -0.02
## 2 NA NA 0.02 0.00 -0.02
## 3 NA NA 0.00 0.00 -0.02
## 4 NA NA 0.02 0.00 -0.03
## 5 NA NA 0.02 0.02 -0.02
## 6 NA NA 0.02 0.00 -0.02
## accel_belt_x accel_belt_y accel_belt_z magnet_belt_x magnet_belt_y
## 1 -21 4 22 -3 599
## 2 -22 4 22 -7 608
## 3 -20 5 23 -2 600
## 4 -22 3 21 -6 604
## 5 -21 2 24 -6 600
## 6 -21 4 21 0 603
## magnet_belt_z roll_arm pitch_arm yaw_arm total_accel_arm var_accel_arm
## 1 -313 -128 22.5 -161 34 NA
## 2 -311 -128 22.5 -161 34 NA
## 3 -305 -128 22.5 -161 34 NA
## 4 -310 -128 22.1 -161 34 NA
## 5 -302 -128 22.1 -161 34 NA
## 6 -312 -128 22.0 -161 34 NA
## avg_roll_arm stddev_roll_arm var_roll_arm avg_pitch_arm stddev_pitch_arm
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## var_pitch_arm avg_yaw_arm stddev_yaw_arm var_yaw_arm gyros_arm_x
## 1 NA NA NA NA 0.00
## 2 NA NA NA NA 0.02
## 3 NA NA NA NA 0.02
## 4 NA NA NA NA 0.02
## 5 NA NA NA NA 0.00
## 6 NA NA NA NA 0.02
## gyros_arm_y gyros_arm_z accel_arm_x accel_arm_y accel_arm_z magnet_arm_x
## 1 0.00 -0.02 -288 109 -123 -368
## 2 -0.02 -0.02 -290 110 -125 -369
## 3 -0.02 -0.02 -289 110 -126 -368
## 4 -0.03 0.02 -289 111 -123 -372
## 5 -0.03 0.00 -289 111 -123 -374
## 6 -0.03 0.00 -289 111 -122 -369
## magnet_arm_y magnet_arm_z kurtosis_roll_arm kurtosis_picth_arm
## 1 337 516
## 2 337 513
## 3 344 513
## 4 344 512
## 5 337 506
## 6 342 513
## kurtosis_yaw_arm skewness_roll_arm skewness_pitch_arm skewness_yaw_arm
## 1
## 2
## 3
## 4
## 5
## 6
## max_roll_arm max_picth_arm max_yaw_arm min_roll_arm min_pitch_arm
## 1 NA NA NA NA NA
## 2 NA NA NA NA NA
## 3 NA NA NA NA NA
## 4 NA NA NA NA NA
## 5 NA NA NA NA NA
## 6 NA NA NA NA NA
## min_yaw_arm amplitude_roll_arm amplitude_pitch_arm amplitude_yaw_arm
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## roll_dumbbell pitch_dumbbell yaw_dumbbell kurtosis_roll_dumbbell
## 1 13.05217 -70.49400 -84.87394
## 2 13.13074 -70.63751 -84.71065
## 3 12.85075 -70.27812 -85.14078
## 4 13.43120 -70.39379 -84.87363
## 5 13.37872 -70.42856 -84.85306
## 6 13.38246 -70.81759 -84.46500
## kurtosis_picth_dumbbell kurtosis_yaw_dumbbell skewness_roll_dumbbell
## 1
## 2
## 3
## 4
## 5
## 6
## skewness_pitch_dumbbell skewness_yaw_dumbbell max_roll_dumbbell
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## max_picth_dumbbell max_yaw_dumbbell min_roll_dumbbell min_pitch_dumbbell
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## min_yaw_dumbbell amplitude_roll_dumbbell amplitude_pitch_dumbbell
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## amplitude_yaw_dumbbell total_accel_dumbbell var_accel_dumbbell
## 1 37 NA
## 2 37 NA
## 3 37 NA
## 4 37 NA
## 5 37 NA
## 6 37 NA
## avg_roll_dumbbell stddev_roll_dumbbell var_roll_dumbbell
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## avg_pitch_dumbbell stddev_pitch_dumbbell var_pitch_dumbbell
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## avg_yaw_dumbbell stddev_yaw_dumbbell var_yaw_dumbbell gyros_dumbbell_x
## 1 NA NA NA 0
## 2 NA NA NA 0
## 3 NA NA NA 0
## 4 NA NA NA 0
## 5 NA NA NA 0
## 6 NA NA NA 0
## gyros_dumbbell_y gyros_dumbbell_z accel_dumbbell_x accel_dumbbell_y
## 1 -0.02 0.00 -234 47
## 2 -0.02 0.00 -233 47
## 3 -0.02 0.00 -232 46
## 4 -0.02 -0.02 -232 48
## 5 -0.02 0.00 -233 48
## 6 -0.02 0.00 -234 48
## accel_dumbbell_z magnet_dumbbell_x magnet_dumbbell_y magnet_dumbbell_z
## 1 -271 -559 293 -65
## 2 -269 -555 296 -64
## 3 -270 -561 298 -63
## 4 -269 -552 303 -60
## 5 -270 -554 292 -68
## 6 -269 -558 294 -66
## roll_forearm pitch_forearm yaw_forearm kurtosis_roll_forearm
## 1 28.4 -63.9 -153
## 2 28.3 -63.9 -153
## 3 28.3 -63.9 -152
## 4 28.1 -63.9 -152
## 5 28.0 -63.9 -152
## 6 27.9 -63.9 -152
## kurtosis_picth_forearm kurtosis_yaw_forearm skewness_roll_forearm
## 1
## 2
## 3
## 4
## 5
## 6
## skewness_pitch_forearm skewness_yaw_forearm max_roll_forearm
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## max_picth_forearm max_yaw_forearm min_roll_forearm min_pitch_forearm
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## min_yaw_forearm amplitude_roll_forearm amplitude_pitch_forearm
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
## amplitude_yaw_forearm total_accel_forearm var_accel_forearm
## 1 36 NA
## 2 36 NA
## 3 36 NA
## 4 36 NA
## 5 36 NA
## 6 36 NA
## avg_roll_forearm stddev_roll_forearm var_roll_forearm avg_pitch_forearm
## 1 NA NA NA NA
## 2 NA NA NA NA
## 3 NA NA NA NA
## 4 NA NA NA NA
## 5 NA NA NA NA
## 6 NA NA NA NA
## stddev_pitch_forearm var_pitch_forearm avg_yaw_forearm
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## stddev_yaw_forearm var_yaw_forearm gyros_forearm_x gyros_forearm_y
## 1 NA NA 0.03 0.00
## 2 NA NA 0.02 0.00
## 3 NA NA 0.03 -0.02
## 4 NA NA 0.02 -0.02
## 5 NA NA 0.02 0.00
## 6 NA NA 0.02 -0.02
## gyros_forearm_z accel_forearm_x accel_forearm_y accel_forearm_z
## 1 -0.02 192 203 -215
## 2 -0.02 192 203 -216
## 3 0.00 196 204 -213
## 4 0.00 189 206 -214
## 5 -0.02 189 206 -214
## 6 -0.03 193 203 -215
## magnet_forearm_x magnet_forearm_y magnet_forearm_z classe
## 1 -17 654 476 A
## 2 -18 661 473 A
## 3 -18 658 469 A
## 4 -16 658 469 A
## 5 -17 655 473 A
## 6 -9 660 478 A
train <- train[colSums(is.na(train)) == 0]
dim(train)
## [1] 13737 93
train <- train[,-c(1:7)]
zerovar <- nearZeroVar(train,saveMetrics = TRUE)
train <- train[,zerovar$nzv ==FALSE]
fitControl <- trainControl(method = "cv",
number = 5,
allowParallel = TRUE)
trainx <- train[,-length(train)]
trainy <- train[,length(train)]
model2 <- train(trainx,trainy, trControl = fitControl, method = 'rpart')
print(model2, digits = 4)
## CART
##
## 13737 samples
## 52 predictor
## 5 classes: 'A', 'B', 'C', 'D', 'E'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 10990, 10990, 10989, 10990, 10989
## Resampling results across tuning parameters:
##
## cp Accuracy Kappa
## 0.03662 0.5040 0.35298
## 0.05947 0.4426 0.25367
## 0.11789 0.3331 0.07482
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.03662.
predict_dt <- predict(model2, validate)
confusionMatrix(validate$classe, predict_dt)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1510 27 132 0 5
## B 463 389 287 0 0
## C 462 36 528 0 0
## D 427 163 374 0 0
## E 179 141 299 0 463
##
## Overall Statistics
##
## Accuracy : 0.4911
## 95% CI : (0.4782, 0.5039)
## No Information Rate : 0.5167
## P-Value [Acc > NIR] : 1
##
## Kappa : 0.3352
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.4965 0.5146 0.32593 NA 0.98932
## Specificity 0.9423 0.8538 0.88324 0.8362 0.88573
## Pos Pred Value 0.9020 0.3415 0.51462 NA 0.42791
## Neg Pred Value 0.6364 0.9227 0.77526 NA 0.99896
## Prevalence 0.5167 0.1285 0.27528 0.0000 0.07952
## Detection Rate 0.2566 0.0661 0.08972 0.0000 0.07867
## Detection Prevalence 0.2845 0.1935 0.17434 0.1638 0.18386
## Balanced Accuracy 0.7194 0.6842 0.60458 NA 0.93752
predict(model2, testing)
## [1] C A C A A C C A A A C C C A C A A A A C
## Levels: A B C D E
model3 <- train(trainx,trainy, method = 'rf', trControl = fitControl)
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
print(model3, digits = 4)
## Random Forest
##
## 13737 samples
## 52 predictor
## 5 classes: 'A', 'B', 'C', 'D', 'E'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 10989, 10990, 10990, 10991, 10988
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.9911 0.9888
## 27 0.9905 0.9880
## 52 0.9827 0.9781
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
predict_rf <- predict(model3, validate)
confusionMatrix(validate$classe, predict_rf)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1670 1 2 0 1
## B 4 1135 0 0 0
## C 0 9 1016 1 0
## D 0 0 8 954 2
## E 0 0 0 2 1080
##
## Overall Statistics
##
## Accuracy : 0.9949
## 95% CI : (0.9927, 0.9966)
## No Information Rate : 0.2845
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9936
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9976 0.9913 0.9903 0.9969 0.9972
## Specificity 0.9991 0.9992 0.9979 0.9980 0.9996
## Pos Pred Value 0.9976 0.9965 0.9903 0.9896 0.9982
## Neg Pred Value 0.9991 0.9979 0.9979 0.9994 0.9994
## Prevalence 0.2845 0.1946 0.1743 0.1626 0.1840
## Detection Rate 0.2838 0.1929 0.1726 0.1621 0.1835
## Detection Prevalence 0.2845 0.1935 0.1743 0.1638 0.1839
## Balanced Accuracy 0.9983 0.9952 0.9941 0.9974 0.9984
predict(model3, testing)
## [1] B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E
model4 <- train(trainx,trainy, trControl = fitControl, method = 'gbm')
## Loading required package: survival
##
## Attaching package: 'survival'
## The following object is masked from 'package:caret':
##
## cluster
## Loading required package: splines
## Loading required package: parallel
## Loaded gbm 2.1.3
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1317
## 2 1.5237 nan 0.1000 0.0891
## 3 1.4651 nan 0.1000 0.0659
## 4 1.4212 nan 0.1000 0.0562
## 5 1.3840 nan 0.1000 0.0444
## 6 1.3541 nan 0.1000 0.0465
## 7 1.3253 nan 0.1000 0.0403
## 8 1.2996 nan 0.1000 0.0334
## 9 1.2781 nan 0.1000 0.0311
## 10 1.2586 nan 0.1000 0.0296
## 20 1.0991 nan 0.1000 0.0147
## 40 0.9298 nan 0.1000 0.0095
## 60 0.8248 nan 0.1000 0.0057
## 80 0.7423 nan 0.1000 0.0038
## 100 0.6807 nan 0.1000 0.0038
## 120 0.6305 nan 0.1000 0.0020
## 140 0.5869 nan 0.1000 0.0031
## 150 0.5668 nan 0.1000 0.0025
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1915
## 2 1.4882 nan 0.1000 0.1279
## 3 1.4045 nan 0.1000 0.1071
## 4 1.3365 nan 0.1000 0.0820
## 5 1.2830 nan 0.1000 0.0678
## 6 1.2376 nan 0.1000 0.0636
## 7 1.1962 nan 0.1000 0.0611
## 8 1.1582 nan 0.1000 0.0470
## 9 1.1276 nan 0.1000 0.0534
## 10 1.0942 nan 0.1000 0.0391
## 20 0.8945 nan 0.1000 0.0241
## 40 0.6849 nan 0.1000 0.0104
## 60 0.5568 nan 0.1000 0.0063
## 80 0.4672 nan 0.1000 0.0069
## 100 0.4007 nan 0.1000 0.0034
## 120 0.3490 nan 0.1000 0.0021
## 140 0.3077 nan 0.1000 0.0025
## 150 0.2916 nan 0.1000 0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.2397
## 2 1.4578 nan 0.1000 0.1559
## 3 1.3583 nan 0.1000 0.1249
## 4 1.2778 nan 0.1000 0.1092
## 5 1.2098 nan 0.1000 0.0909
## 6 1.1512 nan 0.1000 0.0757
## 7 1.1026 nan 0.1000 0.0659
## 8 1.0613 nan 0.1000 0.0543
## 9 1.0255 nan 0.1000 0.0615
## 10 0.9873 nan 0.1000 0.0503
## 20 0.7523 nan 0.1000 0.0311
## 40 0.5320 nan 0.1000 0.0099
## 60 0.4011 nan 0.1000 0.0063
## 80 0.3230 nan 0.1000 0.0046
## 100 0.2663 nan 0.1000 0.0044
## 120 0.2204 nan 0.1000 0.0023
## 140 0.1878 nan 0.1000 0.0013
## 150 0.1747 nan 0.1000 0.0012
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1267
## 2 1.5226 nan 0.1000 0.0907
## 3 1.4641 nan 0.1000 0.0681
## 4 1.4194 nan 0.1000 0.0538
## 5 1.3837 nan 0.1000 0.0512
## 6 1.3499 nan 0.1000 0.0387
## 7 1.3248 nan 0.1000 0.0387
## 8 1.2993 nan 0.1000 0.0365
## 9 1.2746 nan 0.1000 0.0309
## 10 1.2549 nan 0.1000 0.0309
## 20 1.0975 nan 0.1000 0.0193
## 40 0.9267 nan 0.1000 0.0068
## 60 0.8165 nan 0.1000 0.0057
## 80 0.7377 nan 0.1000 0.0060
## 100 0.6751 nan 0.1000 0.0050
## 120 0.6228 nan 0.1000 0.0031
## 140 0.5818 nan 0.1000 0.0031
## 150 0.5629 nan 0.1000 0.0023
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1830
## 2 1.4888 nan 0.1000 0.1302
## 3 1.4037 nan 0.1000 0.1093
## 4 1.3333 nan 0.1000 0.0843
## 5 1.2784 nan 0.1000 0.0729
## 6 1.2317 nan 0.1000 0.0719
## 7 1.1875 nan 0.1000 0.0526
## 8 1.1533 nan 0.1000 0.0563
## 9 1.1185 nan 0.1000 0.0453
## 10 1.0897 nan 0.1000 0.0390
## 20 0.8910 nan 0.1000 0.0187
## 40 0.6791 nan 0.1000 0.0144
## 60 0.5517 nan 0.1000 0.0063
## 80 0.4661 nan 0.1000 0.0028
## 100 0.3985 nan 0.1000 0.0044
## 120 0.3487 nan 0.1000 0.0025
## 140 0.3060 nan 0.1000 0.0014
## 150 0.2888 nan 0.1000 0.0016
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.2290
## 2 1.4627 nan 0.1000 0.1682
## 3 1.3562 nan 0.1000 0.1290
## 4 1.2741 nan 0.1000 0.0989
## 5 1.2108 nan 0.1000 0.0868
## 6 1.1558 nan 0.1000 0.0703
## 7 1.1109 nan 0.1000 0.0717
## 8 1.0652 nan 0.1000 0.0627
## 9 1.0246 nan 0.1000 0.0580
## 10 0.9883 nan 0.1000 0.0492
## 20 0.7506 nan 0.1000 0.0243
## 40 0.5258 nan 0.1000 0.0131
## 60 0.4037 nan 0.1000 0.0065
## 80 0.3182 nan 0.1000 0.0033
## 100 0.2601 nan 0.1000 0.0022
## 120 0.2206 nan 0.1000 0.0028
## 140 0.1872 nan 0.1000 0.0021
## 150 0.1734 nan 0.1000 0.0005
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1277
## 2 1.5219 nan 0.1000 0.0892
## 3 1.4637 nan 0.1000 0.0648
## 4 1.4197 nan 0.1000 0.0571
## 5 1.3820 nan 0.1000 0.0456
## 6 1.3513 nan 0.1000 0.0443
## 7 1.3228 nan 0.1000 0.0408
## 8 1.2967 nan 0.1000 0.0299
## 9 1.2768 nan 0.1000 0.0331
## 10 1.2551 nan 0.1000 0.0317
## 20 1.0992 nan 0.1000 0.0166
## 40 0.9264 nan 0.1000 0.0065
## 60 0.8169 nan 0.1000 0.0059
## 80 0.7381 nan 0.1000 0.0029
## 100 0.6771 nan 0.1000 0.0037
## 120 0.6244 nan 0.1000 0.0031
## 140 0.5831 nan 0.1000 0.0025
## 150 0.5646 nan 0.1000 0.0015
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1911
## 2 1.4879 nan 0.1000 0.1332
## 3 1.4014 nan 0.1000 0.1019
## 4 1.3362 nan 0.1000 0.0793
## 5 1.2835 nan 0.1000 0.0778
## 6 1.2341 nan 0.1000 0.0633
## 7 1.1920 nan 0.1000 0.0582
## 8 1.1555 nan 0.1000 0.0553
## 9 1.1212 nan 0.1000 0.0463
## 10 1.0917 nan 0.1000 0.0434
## 20 0.8905 nan 0.1000 0.0218
## 40 0.6822 nan 0.1000 0.0095
## 60 0.5498 nan 0.1000 0.0076
## 80 0.4613 nan 0.1000 0.0036
## 100 0.3964 nan 0.1000 0.0033
## 120 0.3452 nan 0.1000 0.0023
## 140 0.3051 nan 0.1000 0.0025
## 150 0.2883 nan 0.1000 0.0016
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.2374
## 2 1.4571 nan 0.1000 0.1628
## 3 1.3540 nan 0.1000 0.1242
## 4 1.2764 nan 0.1000 0.1166
## 5 1.2042 nan 0.1000 0.0825
## 6 1.1521 nan 0.1000 0.0778
## 7 1.1035 nan 0.1000 0.0712
## 8 1.0573 nan 0.1000 0.0539
## 9 1.0231 nan 0.1000 0.0675
## 10 0.9818 nan 0.1000 0.0421
## 20 0.7490 nan 0.1000 0.0240
## 40 0.5302 nan 0.1000 0.0106
## 60 0.4044 nan 0.1000 0.0061
## 80 0.3253 nan 0.1000 0.0061
## 100 0.2644 nan 0.1000 0.0021
## 120 0.2208 nan 0.1000 0.0014
## 140 0.1869 nan 0.1000 0.0030
## 150 0.1730 nan 0.1000 0.0016
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1275
## 2 1.5211 nan 0.1000 0.0893
## 3 1.4612 nan 0.1000 0.0664
## 4 1.4165 nan 0.1000 0.0547
## 5 1.3794 nan 0.1000 0.0433
## 6 1.3506 nan 0.1000 0.0458
## 7 1.3214 nan 0.1000 0.0348
## 8 1.2985 nan 0.1000 0.0326
## 9 1.2777 nan 0.1000 0.0289
## 10 1.2574 nan 0.1000 0.0320
## 20 1.1032 nan 0.1000 0.0178
## 40 0.9337 nan 0.1000 0.0109
## 60 0.8239 nan 0.1000 0.0065
## 80 0.7436 nan 0.1000 0.0052
## 100 0.6818 nan 0.1000 0.0037
## 120 0.6310 nan 0.1000 0.0024
## 140 0.5860 nan 0.1000 0.0035
## 150 0.5667 nan 0.1000 0.0019
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1853
## 2 1.4881 nan 0.1000 0.1278
## 3 1.4031 nan 0.1000 0.1033
## 4 1.3366 nan 0.1000 0.0815
## 5 1.2831 nan 0.1000 0.0738
## 6 1.2356 nan 0.1000 0.0640
## 7 1.1952 nan 0.1000 0.0609
## 8 1.1559 nan 0.1000 0.0514
## 9 1.1232 nan 0.1000 0.0440
## 10 1.0946 nan 0.1000 0.0457
## 20 0.8993 nan 0.1000 0.0237
## 40 0.6854 nan 0.1000 0.0113
## 60 0.5567 nan 0.1000 0.0074
## 80 0.4726 nan 0.1000 0.0063
## 100 0.4075 nan 0.1000 0.0051
## 120 0.3533 nan 0.1000 0.0028
## 140 0.3112 nan 0.1000 0.0028
## 150 0.2923 nan 0.1000 0.0017
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.2363
## 2 1.4597 nan 0.1000 0.1572
## 3 1.3586 nan 0.1000 0.1248
## 4 1.2781 nan 0.1000 0.1057
## 5 1.2108 nan 0.1000 0.0876
## 6 1.1553 nan 0.1000 0.0762
## 7 1.1073 nan 0.1000 0.0627
## 8 1.0678 nan 0.1000 0.0594
## 9 1.0296 nan 0.1000 0.0562
## 10 0.9940 nan 0.1000 0.0486
## 20 0.7585 nan 0.1000 0.0257
## 40 0.5386 nan 0.1000 0.0131
## 60 0.4113 nan 0.1000 0.0060
## 80 0.3288 nan 0.1000 0.0045
## 100 0.2680 nan 0.1000 0.0037
## 120 0.2255 nan 0.1000 0.0018
## 140 0.1905 nan 0.1000 0.0009
## 150 0.1766 nan 0.1000 0.0008
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1305
## 2 1.5209 nan 0.1000 0.0876
## 3 1.4606 nan 0.1000 0.0620
## 4 1.4178 nan 0.1000 0.0577
## 5 1.3799 nan 0.1000 0.0457
## 6 1.3498 nan 0.1000 0.0462
## 7 1.3203 nan 0.1000 0.0382
## 8 1.2946 nan 0.1000 0.0313
## 9 1.2736 nan 0.1000 0.0270
## 10 1.2555 nan 0.1000 0.0313
## 20 1.0943 nan 0.1000 0.0176
## 40 0.9244 nan 0.1000 0.0082
## 60 0.8181 nan 0.1000 0.0056
## 80 0.7385 nan 0.1000 0.0049
## 100 0.6750 nan 0.1000 0.0038
## 120 0.6255 nan 0.1000 0.0023
## 140 0.5830 nan 0.1000 0.0031
## 150 0.5629 nan 0.1000 0.0022
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.1875
## 2 1.4863 nan 0.1000 0.1280
## 3 1.4009 nan 0.1000 0.0982
## 4 1.3364 nan 0.1000 0.0877
## 5 1.2802 nan 0.1000 0.0723
## 6 1.2339 nan 0.1000 0.0655
## 7 1.1911 nan 0.1000 0.0567
## 8 1.1544 nan 0.1000 0.0513
## 9 1.1218 nan 0.1000 0.0488
## 10 1.0893 nan 0.1000 0.0415
## 20 0.8881 nan 0.1000 0.0230
## 40 0.6783 nan 0.1000 0.0144
## 60 0.5502 nan 0.1000 0.0078
## 80 0.4672 nan 0.1000 0.0064
## 100 0.3999 nan 0.1000 0.0032
## 120 0.3481 nan 0.1000 0.0022
## 140 0.3069 nan 0.1000 0.0028
## 150 0.2882 nan 0.1000 0.0028
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.2349
## 2 1.4595 nan 0.1000 0.1662
## 3 1.3527 nan 0.1000 0.1313
## 4 1.2684 nan 0.1000 0.1036
## 5 1.2030 nan 0.1000 0.0838
## 6 1.1491 nan 0.1000 0.0762
## 7 1.1004 nan 0.1000 0.0648
## 8 1.0575 nan 0.1000 0.0505
## 9 1.0250 nan 0.1000 0.0539
## 10 0.9908 nan 0.1000 0.0545
## 20 0.7531 nan 0.1000 0.0290
## 40 0.5288 nan 0.1000 0.0125
## 60 0.4024 nan 0.1000 0.0057
## 80 0.3230 nan 0.1000 0.0054
## 100 0.2620 nan 0.1000 0.0018
## 120 0.2220 nan 0.1000 0.0014
## 140 0.1895 nan 0.1000 0.0017
## 150 0.1753 nan 0.1000 0.0009
##
## Iter TrainDeviance ValidDeviance StepSize Improve
## 1 1.6094 nan 0.1000 0.2295
## 2 1.4609 nan 0.1000 0.1650
## 3 1.3576 nan 0.1000 0.1234
## 4 1.2799 nan 0.1000 0.1063
## 5 1.2117 nan 0.1000 0.0897
## 6 1.1553 nan 0.1000 0.0753
## 7 1.1078 nan 0.1000 0.0643
## 8 1.0670 nan 0.1000 0.0690
## 9 1.0248 nan 0.1000 0.0559
## 10 0.9886 nan 0.1000 0.0511
## 20 0.7607 nan 0.1000 0.0249
## 40 0.5301 nan 0.1000 0.0109
## 60 0.4091 nan 0.1000 0.0072
## 80 0.3251 nan 0.1000 0.0041
## 100 0.2698 nan 0.1000 0.0035
## 120 0.2239 nan 0.1000 0.0035
## 140 0.1896 nan 0.1000 0.0016
## 150 0.1765 nan 0.1000 0.0020
print(model4, digits = 4)
## Stochastic Gradient Boosting
##
## 13737 samples
## 52 predictor
## 5 classes: 'A', 'B', 'C', 'D', 'E'
##
## No pre-processing
## Resampling: Cross-Validated (5 fold)
## Summary of sample sizes: 10989, 10991, 10990, 10988, 10990
## Resampling results across tuning parameters:
##
## interaction.depth n.trees Accuracy Kappa
## 1 50 0.7466 0.6787
## 1 100 0.8209 0.7733
## 1 150 0.8516 0.8121
## 2 50 0.8528 0.8134
## 2 100 0.9048 0.8795
## 2 150 0.9319 0.9138
## 3 50 0.8952 0.8673
## 3 100 0.9403 0.9245
## 3 150 0.9605 0.9500
##
## Tuning parameter 'shrinkage' was held constant at a value of 0.1
##
## Tuning parameter 'n.minobsinnode' was held constant at a value of 10
## Accuracy was used to select the optimal model using the largest value.
## The final values used for the model were n.trees = 150,
## interaction.depth = 3, shrinkage = 0.1 and n.minobsinnode = 10.
predict_gbm <- predict(model4, validate)
confusionMatrix(validate$classe, predict_gbm)
## Confusion Matrix and Statistics
##
## Reference
## Prediction A B C D E
## A 1640 18 10 4 2
## B 31 1079 28 1 0
## C 0 37 977 10 2
## D 1 3 22 931 7
## E 2 9 9 7 1055
##
## Overall Statistics
##
## Accuracy : 0.9655
## 95% CI : (0.9605, 0.97)
## No Information Rate : 0.2845
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9564
## Mcnemar's Test P-Value : 0.0001046
##
## Statistics by Class:
##
## Class: A Class: B Class: C Class: D Class: E
## Sensitivity 0.9797 0.9415 0.9340 0.9769 0.9897
## Specificity 0.9919 0.9873 0.9899 0.9933 0.9944
## Pos Pred Value 0.9797 0.9473 0.9522 0.9658 0.9750
## Neg Pred Value 0.9919 0.9859 0.9858 0.9955 0.9977
## Prevalence 0.2845 0.1947 0.1777 0.1619 0.1811
## Detection Rate 0.2787 0.1833 0.1660 0.1582 0.1793
## Detection Prevalence 0.2845 0.1935 0.1743 0.1638 0.1839
## Balanced Accuracy 0.9858 0.9644 0.9620 0.9851 0.9920
predict(model4, testing)
## [1] B A B A A E D B A A B C B A E E A B B B
## Levels: A B C D E