About the data

You can find more information about the study here

body sensors
body sensors

Load the data

training <- read.csv("https://d396qusza40orc.cloudfront.net/predmachlearn/pml-training.csv", na.strings = c("", NA))
testing <- read.csv("https://d396qusza40orc.cloudfront.net/predmachlearn/pml-testing.csv", na.strings = c("", NA))

Load necessary libraries

library(DataExplorer)
library(dplyr)
library(randomForest)
library(caret)
library(ggplot2)
library(gridExtra)
library(reshape2)
library(rpart)
library(rpart.plot)
library(skimr)
library(corrplot)
library(caretEnsemble)
library(pryr)
library(gbm3)
library(glmnet)

Initial looks

introduce(training)
##    rows columns discrete_columns continuous_columns all_missing_columns
## 1 19622     160               37                123                   0
##   total_missing_values complete_rows total_observations memory_usage
## 1              1921600           406            3139520     22810544
dim(testing)
## [1]  20 160
colnames(training)
##   [1] "X"                        "user_name"               
##   [3] "raw_timestamp_part_1"     "raw_timestamp_part_2"    
##   [5] "cvtd_timestamp"           "new_window"              
##   [7] "num_window"               "roll_belt"               
##   [9] "pitch_belt"               "yaw_belt"                
##  [11] "total_accel_belt"         "kurtosis_roll_belt"      
##  [13] "kurtosis_picth_belt"      "kurtosis_yaw_belt"       
##  [15] "skewness_roll_belt"       "skewness_roll_belt.1"    
##  [17] "skewness_yaw_belt"        "max_roll_belt"           
##  [19] "max_picth_belt"           "max_yaw_belt"            
##  [21] "min_roll_belt"            "min_pitch_belt"          
##  [23] "min_yaw_belt"             "amplitude_roll_belt"     
##  [25] "amplitude_pitch_belt"     "amplitude_yaw_belt"      
##  [27] "var_total_accel_belt"     "avg_roll_belt"           
##  [29] "stddev_roll_belt"         "var_roll_belt"           
##  [31] "avg_pitch_belt"           "stddev_pitch_belt"       
##  [33] "var_pitch_belt"           "avg_yaw_belt"            
##  [35] "stddev_yaw_belt"          "var_yaw_belt"            
##  [37] "gyros_belt_x"             "gyros_belt_y"            
##  [39] "gyros_belt_z"             "accel_belt_x"            
##  [41] "accel_belt_y"             "accel_belt_z"            
##  [43] "magnet_belt_x"            "magnet_belt_y"           
##  [45] "magnet_belt_z"            "roll_arm"                
##  [47] "pitch_arm"                "yaw_arm"                 
##  [49] "total_accel_arm"          "var_accel_arm"           
##  [51] "avg_roll_arm"             "stddev_roll_arm"         
##  [53] "var_roll_arm"             "avg_pitch_arm"           
##  [55] "stddev_pitch_arm"         "var_pitch_arm"           
##  [57] "avg_yaw_arm"              "stddev_yaw_arm"          
##  [59] "var_yaw_arm"              "gyros_arm_x"             
##  [61] "gyros_arm_y"              "gyros_arm_z"             
##  [63] "accel_arm_x"              "accel_arm_y"             
##  [65] "accel_arm_z"              "magnet_arm_x"            
##  [67] "magnet_arm_y"             "magnet_arm_z"            
##  [69] "kurtosis_roll_arm"        "kurtosis_picth_arm"      
##  [71] "kurtosis_yaw_arm"         "skewness_roll_arm"       
##  [73] "skewness_pitch_arm"       "skewness_yaw_arm"        
##  [75] "max_roll_arm"             "max_picth_arm"           
##  [77] "max_yaw_arm"              "min_roll_arm"            
##  [79] "min_pitch_arm"            "min_yaw_arm"             
##  [81] "amplitude_roll_arm"       "amplitude_pitch_arm"     
##  [83] "amplitude_yaw_arm"        "roll_dumbbell"           
##  [85] "pitch_dumbbell"           "yaw_dumbbell"            
##  [87] "kurtosis_roll_dumbbell"   "kurtosis_picth_dumbbell" 
##  [89] "kurtosis_yaw_dumbbell"    "skewness_roll_dumbbell"  
##  [91] "skewness_pitch_dumbbell"  "skewness_yaw_dumbbell"   
##  [93] "max_roll_dumbbell"        "max_picth_dumbbell"      
##  [95] "max_yaw_dumbbell"         "min_roll_dumbbell"       
##  [97] "min_pitch_dumbbell"       "min_yaw_dumbbell"        
##  [99] "amplitude_roll_dumbbell"  "amplitude_pitch_dumbbell"
## [101] "amplitude_yaw_dumbbell"   "total_accel_dumbbell"    
## [103] "var_accel_dumbbell"       "avg_roll_dumbbell"       
## [105] "stddev_roll_dumbbell"     "var_roll_dumbbell"       
## [107] "avg_pitch_dumbbell"       "stddev_pitch_dumbbell"   
## [109] "var_pitch_dumbbell"       "avg_yaw_dumbbell"        
## [111] "stddev_yaw_dumbbell"      "var_yaw_dumbbell"        
## [113] "gyros_dumbbell_x"         "gyros_dumbbell_y"        
## [115] "gyros_dumbbell_z"         "accel_dumbbell_x"        
## [117] "accel_dumbbell_y"         "accel_dumbbell_z"        
## [119] "magnet_dumbbell_x"        "magnet_dumbbell_y"       
## [121] "magnet_dumbbell_z"        "roll_forearm"            
## [123] "pitch_forearm"            "yaw_forearm"             
## [125] "kurtosis_roll_forearm"    "kurtosis_picth_forearm"  
## [127] "kurtosis_yaw_forearm"     "skewness_roll_forearm"   
## [129] "skewness_pitch_forearm"   "skewness_yaw_forearm"    
## [131] "max_roll_forearm"         "max_picth_forearm"       
## [133] "max_yaw_forearm"          "min_roll_forearm"        
## [135] "min_pitch_forearm"        "min_yaw_forearm"         
## [137] "amplitude_roll_forearm"   "amplitude_pitch_forearm" 
## [139] "amplitude_yaw_forearm"    "total_accel_forearm"     
## [141] "var_accel_forearm"        "avg_roll_forearm"        
## [143] "stddev_roll_forearm"      "var_roll_forearm"        
## [145] "avg_pitch_forearm"        "stddev_pitch_forearm"    
## [147] "var_pitch_forearm"        "avg_yaw_forearm"         
## [149] "stddev_yaw_forearm"       "var_yaw_forearm"         
## [151] "gyros_forearm_x"          "gyros_forearm_y"         
## [153] "gyros_forearm_z"          "accel_forearm_x"         
## [155] "accel_forearm_y"          "accel_forearm_z"         
## [157] "magnet_forearm_x"         "magnet_forearm_y"        
## [159] "magnet_forearm_z"         "classe"
#omit metadata
training2 <- training %>% select(-c(X,user_name,raw_timestamp_part_1,raw_timestamp_part_2,cvtd_timestamp,new_window,num_window))
testing2 <- testing %>% select(-c(X,user_name,raw_timestamp_part_1,raw_timestamp_part_2,cvtd_timestamp,new_window,num_window))

#convert outcome into factor
training2$classe <- as.factor(training2$classe)

# Handle missing values: remove columns with more than 70% missing data
na_props <- training2 %>% summarise(across(everything(), ~ sum(is.na(.)) / length(.)))
high_na_cols <- na_props %>% select(where(~ . > 0.7))
training3 <- training2[, !(colnames(training2) %in% colnames(high_na_cols))]
testing3 <- testing2[, !(colnames(testing2) %in% colnames(high_na_cols))]

Splitting the training data into training and validation sets

set.seed(123)
inTrain <- createDataPartition(y = training3$classe, p = 0.7, list = FALSE)
training4 <- training3[inTrain, ]
validation <- training3[-inTrain, ]
testing4 <- testing3

Insights on cleaned training data

# a second look using skim function
skim(training4)
Data summary
Name training4
Number of rows 13737
Number of columns 53
_______________________
Column type frequency:
factor 1
numeric 52
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
classe 0 1 FALSE 5 A: 3906, B: 2658, E: 2525, C: 2396

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
roll_belt 0 1 64.25 62.72 -28.80 1.10 113.00 123.00 162.00 ▇▁▁▅▅
pitch_belt 0 1 0.45 22.24 -55.80 1.92 5.32 15.00 60.30 ▃▁▇▅▁
yaw_belt 0 1 -11.67 94.86 -180.00 -88.30 -13.60 11.90 179.00 ▁▇▅▁▃
total_accel_belt 0 1 11.29 7.75 0.00 3.00 17.00 18.00 29.00 ▇▁▂▆▁
gyros_belt_x 0 1 -0.01 0.21 -1.04 -0.03 0.03 0.11 2.20 ▁▇▁▁▁
gyros_belt_y 0 1 0.04 0.08 -0.64 0.00 0.02 0.11 0.61 ▁▁▇▃▁
gyros_belt_z 0 1 -0.13 0.24 -1.46 -0.20 -0.10 -0.02 1.61 ▁▂▇▁▁
accel_belt_x 0 1 -5.79 29.47 -120.00 -21.00 -15.00 -5.00 85.00 ▁▁▇▁▂
accel_belt_y 0 1 30.10 28.63 -69.00 3.00 33.00 61.00 164.00 ▁▇▇▁▁
accel_belt_z 0 1 -72.38 100.40 -275.00 -162.00 -152.00 27.00 105.00 ▁▇▁▅▃
magnet_belt_x 0 1 55.00 63.93 -52.00 8.00 34.00 58.00 485.00 ▇▁▂▁▁
magnet_belt_y 0 1 593.76 35.35 354.00 581.00 601.00 610.00 673.00 ▁▁▁▇▃
magnet_belt_z 0 1 -345.41 64.17 -621.00 -375.00 -319.00 -306.00 287.00 ▁▇▁▁▁
roll_arm 0 1 17.78 73.20 -178.00 -32.20 0.00 77.50 180.00 ▁▃▇▆▂
pitch_arm 0 1 -4.46 30.72 -88.20 -26.00 0.00 11.50 88.50 ▁▅▇▂▁
yaw_arm 0 1 -0.52 71.61 -180.00 -43.10 0.00 46.70 180.00 ▁▃▇▃▂
total_accel_arm 0 1 25.53 10.45 1.00 17.00 27.00 33.00 65.00 ▃▆▇▁▁
gyros_arm_x 0 1 0.02 2.00 -6.37 -1.36 0.06 1.53 4.87 ▁▃▇▆▂
gyros_arm_y 0 1 -0.25 0.86 -3.44 -0.79 -0.24 0.16 2.81 ▁▂▇▂▁
gyros_arm_z 0 1 0.27 0.55 -2.33 -0.08 0.23 0.72 2.66 ▁▂▇▃▁
accel_arm_x 0 1 -60.05 182.68 -404.00 -241.00 -46.00 84.00 435.00 ▇▆▇▅▁
accel_arm_y 0 1 32.61 109.66 -318.00 -54.00 14.00 139.00 308.00 ▁▃▇▇▂
accel_arm_z 0 1 -70.84 133.98 -630.00 -142.00 -48.00 23.00 292.00 ▁▁▅▇▁
magnet_arm_x 0 1 192.20 444.52 -584.00 -301.00 290.00 640.00 782.00 ▆▃▂▃▇
magnet_arm_y 0 1 155.96 201.65 -392.00 -8.00 200.00 322.00 583.00 ▁▃▅▇▂
magnet_arm_z 0 1 307.10 325.84 -597.00 129.00 445.00 546.00 694.00 ▁▂▂▃▇
roll_dumbbell 0 1 23.56 70.25 -153.71 -19.94 48.07 67.78 153.55 ▂▂▃▇▂
pitch_dumbbell 0 1 -10.84 37.03 -149.59 -40.96 -20.85 17.35 149.40 ▁▆▇▂▁
yaw_dumbbell 0 1 2.07 82.36 -150.87 -77.57 0.00 79.97 154.95 ▃▇▅▅▆
total_accel_dumbbell 0 1 13.65 10.24 0.00 4.00 10.00 19.00 58.00 ▇▅▃▁▁
gyros_dumbbell_x 0 1 0.15 1.78 -204.00 -0.03 0.13 0.35 2.20 ▁▁▁▁▇
gyros_dumbbell_y 0 1 0.05 0.66 -2.10 -0.14 0.03 0.21 52.00 ▇▁▁▁▁
gyros_dumbbell_z 0 1 -0.12 2.72 -2.38 -0.31 -0.13 0.03 317.00 ▇▁▁▁▁
accel_dumbbell_x 0 1 -28.43 67.35 -419.00 -50.00 -8.00 10.00 235.00 ▁▁▆▇▁
accel_dumbbell_y 0 1 51.96 80.82 -189.00 -9.00 40.00 110.00 315.00 ▁▇▇▅▁
accel_dumbbell_z 0 1 -37.52 109.28 -273.00 -141.00 0.00 38.00 318.00 ▃▃▇▂▁
magnet_dumbbell_x 0 1 -328.71 340.26 -643.00 -535.00 -480.00 -307.00 584.00 ▇▂▁▁▂
magnet_dumbbell_y 0 1 219.91 328.00 -3600.00 231.00 310.00 390.00 633.00 ▁▁▁▁▇
magnet_dumbbell_z 0 1 45.00 139.39 -262.00 -46.00 12.00 95.00 452.00 ▁▇▆▂▂
roll_forearm 0 1 33.19 108.26 -180.00 -2.44 20.60 140.00 180.00 ▃▂▇▂▇
pitch_forearm 0 1 10.77 28.42 -72.50 0.00 9.60 28.90 89.80 ▁▁▇▃▁
yaw_forearm 0 1 19.03 103.90 -180.00 -70.50 0.00 111.00 180.00 ▅▅▇▆▇
total_accel_forearm 0 1 34.72 10.08 0.00 29.00 36.00 41.00 108.00 ▁▇▂▁▁
gyros_forearm_x 0 1 0.16 0.66 -22.00 -0.22 0.05 0.56 3.52 ▁▁▁▁▇
gyros_forearm_y 0 1 0.09 3.43 -7.02 -1.46 0.03 1.64 311.00 ▇▁▁▁▁
gyros_forearm_z 0 1 0.16 2.06 -5.55 -0.18 0.08 0.49 231.00 ▇▁▁▁▁
accel_forearm_x 0 1 -62.46 180.51 -496.00 -179.00 -57.00 75.00 477.00 ▂▆▇▅▁
accel_forearm_y 0 1 163.12 200.28 -595.00 55.00 201.00 312.00 923.00 ▁▃▇▃▁
accel_forearm_z 0 1 -56.51 138.45 -446.00 -182.00 -41.00 26.00 287.00 ▁▇▅▅▃
magnet_forearm_x 0 1 -314.03 346.44 -1280.00 -617.00 -381.00 -77.00 672.00 ▁▇▆▅▁
magnet_forearm_y 0 1 377.48 507.68 -892.00 4.00 586.00 735.00 1480.00 ▂▂▂▇▁
magnet_forearm_z 0 1 391.60 368.89 -966.00 186.00 509.00 651.00 1070.00 ▁▁▃▇▃
#check correlations
numeric_features <- training4 %>% select_if(is.numeric)
cor_matrix <- cor(numeric_features)
corrplot(cor_matrix, 
         tl.cex = .7,     # Reduce text label size
         tl.srt = 70,      # Rotate the labels by 45 degrees
         method = "circle" # Use circles for a clean look
)

#check class imbalance
table(training4$classe)
## 
##    A    B    C    D    E 
## 3906 2658 2396 2252 2525

Preprocess the data

#center and scale training4
preProc_train <- preProcess(training4, method = c("center", "scale"))

#transform the data
training5 <- predict(preProc_train, newdata = training4)
validation2 <- predict(preProc_train, newdata = validation)
testing5 <- predict(preProc_train, newdata = testing4)

#apply pca
pca <- prcomp(training5[, -which(names(training5) %in% c("classe"))])
pca_var <- pca$sdev^2 / sum(pca$sdev^2)
cum_var <- cumsum(pca_var)

#cumulative explained variance vs. number of principal components
plot(cum_var, xlab = "Number of Principal Components", 
     ylab = "Cumulative Proportion of Variance Explained", 
     type = "b", col = "blue", pch = 19,
     main = "Cumulative Variance Explained by Principal Components")
abline(h = 0.9, col = "red", lty = 2)

pca_transformed_data <- pca$x[, 1:18]

#transform the data using principal components by 90% cutoff on explained variance
training6 <- data.frame(pca_transformed_data, classe = training5$classe)
validation3 <- data.frame(predict(pca, newdata = validation2)[, 1:18],classe = validation2$classe)
testing6 <- predict(pca, newdata = testing5)[, 1:18]

Model Selection

Multinomial Logistic Regression

control_base <- trainControl(method = "cv", number = 5, savePredictions = "final", classProbs = TRUE)
Rprof("memory_profile.out", memory.profiling = TRUE)
model_glm <- train(classe ~ ., 
                   data = training6, 
                   method = "multinom", 
                   trControl = control_base,
                   verbose = FALSE)
Rprof(NULL)
time_glm1 <- summaryRprof("memory_profile.out", memory = "both")$by.total
memory_glm1 <- summaryRprof("memory_profile.out", memory = "both")$memory
tm_glm <- sapply(list(time_glm1$total.time,time_glm1$mem.total),max)
model_glm_preds <- predict(model_glm,validation3)
conf_matrix <- confusionMatrix(model_glm_preds, validation3$classe)
model_glm$results
##   decay  Accuracy     Kappa  AccuracySD    KappaSD
## 1 0e+00 0.5145256 0.3815310 0.008621836 0.01095427
## 2 1e-04 0.5145256 0.3815310 0.008621836 0.01095427
## 3 1e-01 0.5145256 0.3815327 0.008621836 0.01095683
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.4968564

Tuning model_glm

tune_grid <- expand.grid(decay = seq(0, 0.1, by = 0.01))

Rprof("memory_profile.out", memory.profiling = TRUE)
model_glm_tuned <- train(
    classe ~ ., 
    data = training6, 
    method = "multinom", 
    trControl = control_base, 
    tuneGrid = tune_grid, 
    maxit = 200,
    verbose = FALSE
)
Rprof(NULL)
time_glm_tuned <- summaryRprof("memory_profile.out", memory = "both")$by.total
tm_glm_tuned <- sapply(list(time_glm_tuned$total.time,time_glm_tuned$mem.total),max)
head(model_glm_tuned$results)
##   decay  Accuracy     Kappa AccuracySD    KappaSD
## 1  0.00 0.5146675 0.3817817 0.01229979 0.01566126
## 2  0.01 0.5147403 0.3818760 0.01229335 0.01565946
## 3  0.02 0.5147403 0.3818760 0.01229335 0.01565946
## 4  0.03 0.5146675 0.3817817 0.01229979 0.01566126
## 5  0.04 0.5147403 0.3818760 0.01229335 0.01565946
## 6  0.05 0.5146675 0.3817817 0.01229979 0.01566126
model_glm_tuned_preds <- predict(model_glm_tuned,validation3)
conf_matrix <- confusionMatrix(model_glm_tuned_preds, validation3$classe)
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.4968564

KNN

  • We train k-nearest-neighbors model using control_base again and 10 different numbers of \(k\).
Rprof("memory_profile.out", memory.profiling = TRUE)
model_knn <- train(classe ~ ., data = training6, method = "knn", trControl = control_base, tuneLength = 10)
Rprof(NULL)
time_knn <- summaryRprof("memory_profile.out", memory = "both")$by.total
tm_knn <- sapply(list(time_knn$total.time,time_knn$mem.total),max)
head(model_knn$results)
##    k  Accuracy     Kappa  AccuracySD     KappaSD
## 1  5 0.9406720 0.9249422 0.005791014 0.007312515
## 2  7 0.9240012 0.9038475 0.005780462 0.007307526
## 3  9 0.9109709 0.8873732 0.007378862 0.009360324
## 4 11 0.8986685 0.8717871 0.005768835 0.007342700
## 5 13 0.8881855 0.8585090 0.004984859 0.006333097
## 6 15 0.8764658 0.8436722 0.004374847 0.005573689
model_knn_preds <- predict(model_knn,validation3)
conf_matrix <- confusionMatrix(model_knn_preds, validation3$classe)
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.9502124
  • Knn performed better than glm but we will try decision trees for a much better accuracy.

Decision Tree

tree_tune_grid <- expand.grid(cp = seq(0.001, 0.05, by = 0.001))
Rprof("memory_profile.out", memory.profiling = TRUE)
model_tree <- train(classe ~ ., data = training6, 
                    method = "rpart", 
                    trControl = control_base, 
                    tuneGrid = tree_tune_grid)
Rprof(NULL)
time_tree <- summaryRprof("memory_profile.out", memory = "both")$by.total
tm_tree <- sapply(list(time_tree$total.time,time_tree$mem.total),max)
model_tree_preds <- predict(model_tree,validation3)
conf_matrix <- confusionMatrix(model_tree_preds, validation3$classe)
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.7231946

Random Forest

Rprof("memory_profile.out", memory.profiling = TRUE)
model_rf <- train(classe ~ ., data = training6, method = "rf",trControl = control_base, importance = TRUE)
Rprof(NULL)
time_rf <- summaryRprof("memory_profile.out", memory = "both")$by.total
tm_rf <- sapply(list(time_rf$total.time,time_rf$mem.total),max)
model_rf_preds <- predict(model_rf$finalModel,validation3)
conf_matrix <- confusionMatrix(model_rf_preds, validation3$classe)
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.9712829
  • Random Forest performs well on training data. We will try Boosting to see if it can increase accuracy.

Boosting

Rprof("memory_profile.out", memory.profiling = TRUE)
model_boost <- train(
  classe ~ ., 
  method = "gbm", 
  data = training6, 
  trControl = control_base,
  verbose = FALSE,  # Prevents printing during training
  tuneLength = 10   # This will try 10 different hyperparameter combinations
)
Rprof(NULL)
time_boost <- summaryRprof("memory_profile.out", memory = "both")$by.total
tm_boost <- sapply(list(time_boost$total.time,time_boost$mem.total),max)
model_boost_preds <- predict(model_boost,validation3)
conf_matrix <- confusionMatrix(model_boost_preds, validation3$classe)
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.9604078
  • Finally, we stack all models with glmnet as a base learner.

Model Stacking

# Model Stacking with Profiling
base_learners <- list(
    rpart = model_tree,    # Using the caret model directly
    rf = model_rf,         # Full caret model
    knn = model_knn,       # Full caret model
    glm = model_glm_tuned, # Logistic regression tuned model
    gbm = model_boost      # Boosted model
)

# Control for stacking models
stack_control <- trainControl(method = "cv", number = 5, savePredictions = "final", classProbs = TRUE)

# Profiling the stacking process
Rprof("memory_profile_stack.out", memory.profiling = TRUE)

# Stacking the models
model_stack <- caretStack(
    base_learners,
    method = "glmnet",    # Use glmnet for model stacking
    metric = "Accuracy",  # Metric to optimize
    trControl = stack_control
)

Rprof(NULL)
# Profiling results
time_stack <- summaryRprof("memory_profile_stack.out", memory = "both")$by.total
tm_stack <- sapply(list(time_stack$total.time, time_stack$mem.total), max)

summary(model_stack)
## The following models were ensembled: rpart, rf, knn, glm, gbm  
## 
## Model Importance:
## rpart_B rpart_C rpart_D rpart_E    rf_B    rf_C    rf_D    rf_E   knn_B   knn_C 
##  0.0055  0.0031  0.0033  0.0008  0.2975  0.1697  0.1926  0.1654  0.0063  0.0176 
##   knn_D   knn_E   glm_B   glm_C   glm_D   glm_E   gbm_B   gbm_C   gbm_D   gbm_E 
##  0.0253  0.0298  0.0065  0.0017  0.0019  0.0029  0.0136  0.0340  0.0122  0.0104 
## 
## Model accuracy:
##    model_name   metric     value          sd
##        <char>   <char>     <num>       <num>
## 1:   ensemble Accuracy 0.9708828 0.003681060
## 2:      rpart Accuracy 0.7328383 0.003144346
## 3:         rf Accuracy 0.9657134 0.004275948
## 4:        knn Accuracy 0.9406720 0.005791014
## 5:   multinom Accuracy 0.5147403 0.012293350
## 6:        gbm Accuracy 0.9501347 0.002287768
model_stack_preds <- predict(model_stack, newdata = validation3)
model_stack_preds <- apply(model_stack_preds, 1, function(x) colnames(model_stack_preds)[which.max(x)])
model_stack_preds <- as.factor(model_stack_preds)
validation_actual <- as.factor(validation3$classe)
conf_matrix <- confusionMatrix(model_stack_preds, validation_actual)
conf_matrix$overall["Accuracy"]
##  Accuracy 
## 0.9785896

Comparing the Models

models <- list(
    glm = model_glm,
    glm_tuned = model_glm_tuned,
    knn = model_knn,
    tree = model_tree,
    rf = model_rf,
    boost = model_boost,
    stack = model_stack
)

# Initialize a list to store predictions
predictions <- list()

# Generate predictions for each model
for (model_name in names(models)) {
    if (model_name == "stack") {
        # For the stacked model, predictions may be probabilities
        pred_probs <- predict(models[[model_name]], newdata = validation3)
        # Convert probabilities to class labels
        preds <- apply(pred_probs, 1, function(x) colnames(pred_probs)[which.max(x)])
        predictions[[model_name]] <- as.factor(preds)
    } else {
        # For other models, directly predict class labels
        predictions[[model_name]] <- predict(models[[model_name]], newdata = validation3)
    }
}



# Initialize a data frame to store performance metrics
performance_metrics <- data.frame(
    Model = character(),
    Class = character(),
    Sensitivity = numeric(),
    Specificity = numeric(),
    Pos_Pred_Value = numeric(),
    Neg_Pred_Value = numeric(),
    Prevalence = numeric(),
    Detection_Rate = numeric(),
    Detection_Prevalence = numeric(),
    Balanced_Accuracy = numeric(),
    stringsAsFactors = FALSE
)

# Loop over each model to extract per-class metrics
for (model_name in names(predictions)) {
    cm <- confusionMatrix(predictions[[model_name]], validation3$classe)
    
    # Extract the per-class metrics
    byClass <- as.data.frame(cm$byClass)
    
    # Since in multiclass, the row names might be like "Class: A", extract the class labels
    byClass$Class <- rownames(byClass)
    byClass$Class <- sub("Class: ", "", byClass$Class)
    
    # Add the model name
    byClass$Model <- model_name
    
    # Reorder columns
    byClass <- byClass[, c("Model", "Class", names(byClass)[1:(ncol(byClass)-2)])]
    
    # Append to the performance_metrics data frame
    performance_metrics <- rbind(performance_metrics, byClass)
}

# Rename columns for clarity
colnames(performance_metrics) <- c(
    "Model", "Class", "Sensitivity", "Specificity",
    "Pos_Pred_Value", "Neg_Pred_Value", "Precision",
    "Recall", "F1", "Prevalence", "Detection_Rate",
    "Detection_Prevalence", "Balanced_Accuracy"
)

performance_metrics <- performance_metrics %>% select(-Recall) %>% arrange(desc(Balanced_Accuracy))

profiling_data <- data.frame(
  Model = c("glm", "glm_tuned", "knn", "tree", "rf", "boost", "stack"),
  Training_Time = c(tm_glm[1], tm_glm_tuned[1], tm_knn[1],
                    tm_tree[1], tm_rf[1], tm_boost[1], tm_stack[1]),
  Memory_Usage = c(tm_glm[2], tm_glm_tuned[2], tm_knn[2],
                    tm_tree[2], tm_rf[2], tm_boost[2], tm_stack[2])
)
profiling_data <- profiling_data %>% arrange(desc(Memory_Usage),desc(Training_Time)) %>% mutate(Training_Time = round(Training_Time/60,3))
rownames(performance_metrics) <- 1:35
head(performance_metrics,10)
##    Model Class Sensitivity Specificity Pos_Pred_Value Neg_Pred_Value Precision
## 1  stack     E   0.9889094   0.9977098      0.9898242      0.9975021 0.9898242
## 2  stack     A   0.9916368   0.9938257      0.9845789      0.9966659 0.9845789
## 3     rf     A   0.9898447   0.9919259      0.9798936      0.9959466 0.9798936
## 4  boost     A   0.9880526   0.9905011      0.9763872      0.9952279 0.9763872
## 5     rf     E   0.9759704   0.9983344      0.9924812      0.9946069 0.9924812
## 6    knn     A   0.9826762   0.9871764      0.9682166      0.9930721 0.9682166
## 7  stack     B   0.9727831   0.9949431      0.9787986      0.9934778 0.9787986
## 8  stack     C   0.9707602   0.9892982      0.9503817      0.9937978 0.9503817
## 9  boost     E   0.9611830   0.9966687      0.9848485      0.9913025 0.9848485
## 10 stack     D   0.9595436   0.9973583      0.9861407      0.9921164 0.9861407
##           F1 Prevalence Detection_Rate Detection_Prevalence Balanced_Accuracy
## 1  0.9893666  0.1838573      0.1818182            0.1836873         0.9933096
## 2  0.9880952  0.2844520      0.2820731            0.2864911         0.9927312
## 3  0.9848440  0.2844520      0.2815633            0.2873407         0.9908853
## 4  0.9821853  0.2844520      0.2810535            0.2878505         0.9892768
## 5  0.9841566  0.1838573      0.1794393            0.1807986         0.9871524
## 6  0.9753928  0.2844520      0.2795242            0.2887001         0.9849263
## 7  0.9757816  0.1935429      0.1882753            0.1923534         0.9838631
## 8  0.9604629  0.1743415      0.1692438            0.1780799         0.9800292
## 9  0.9728718  0.1838573      0.1767205            0.1794393         0.9789259
## 10 0.9726604  0.1638063      0.1571793            0.1593883         0.9784509
  • As seen in the comparing table, stacking model outperforms other models and we choose it for predictions and evaluation on unseen data.
profiling_data
##       Model Training_Time Memory_Usage
## 1        rf         6.001      17746.9
## 2     boost        38.109      16900.4
## 3      tree         0.137       4141.1
## 4     stack         0.721       3347.8
## 5       knn         1.052       2349.2
## 6 glm_tuned         1.935       2207.5
## 7       glm         0.631        758.3