Data 622 Homework 2

Part A

I chose SVM and Decision Tree classifiers.

library(caret)
library(pROC)
library(tidyverse)
library (kableExtra)

Load Heart Data

file<-"/Users/aaronzalki/Documents/heart.csv"
heart<-read.csv(file,head=T,sep=',',stringsAsFactors=F)
head(heart)

##   age sex cp trestbps chol fbs restecg thalach exang oldpeak slope ca thal
## 1  63   1  3      145  233   1       0     150     0     2.3     0  0    1
## 2  37   1  2      130  250   0       1     187     0     3.5     0  0    2
## 3  41   0  1      130  204   0       0     172     0     1.4     2  0    2
## 4  56   1  1      120  236   0       1     178     0     0.8     2  0    2
## 5  57   0  0      120  354   0       1     163     1     0.6     2  0    2
## 6  57   1  0      140  192   0       1     148     0     0.4     1  0    1
##   target
## 1      1
## 2      1
## 3      1
## 4      1
## 5      1
## 6      1

The function factor is used to encode a vector as a factor.

heart$target <- as.factor(heart$target)
heart$sex <- as.factor(heart$sex)
heart$cp <- as.factor(heart$cp)
heart$fbs <- as.factor(heart$fbs)
heart$restecg <- as.factor(heart$restecg)
heart$exang <- as.factor(heart$exang)
heart$slope <- as.factor(heart$slope)
heart$ca <- as.factor(heart$ca)
heart$thal <- as.factor(heart$thal)

80/20 Split

set.seed(43)
x <- floor(0.8 * nrow(heart))
train_ind <- sample(seq_len(nrow(heart)), size = x)
train_heart <- heart[ train_ind,]
test_heart  <- heart[-train_ind,]

The following function utilizes confusionMatrix function from the caret library in order to return metrics such as accuracy, area under curve etc. For a better understanding of some of these metrics, please see Data 621. The same function contains various if, else if, else clauses so that we can also run a bootstrap of n resamples as well as randomForest for part B.

Create Function

cm_calc <- function(train_type, tr, label){

  timer <- proc.time()
  
  set.seed(43)
  
  # base
  if (grepl("Base", label, fixed=TRUE)) {
    train_model = train(
    form = target ~ .,
    data = train_heart,
    trControl = tr,
    method = train_type
    )
    print(train_model)
    
    # confusion Matrix from caret
    model_cm <- confusionMatrix(predict(train_model, subset(test_heart, select = -c(target))), test_heart$target)
    
    print(paste(label, 'Results'))
    print(model_cm)
    
    
    duration <- (proc.time() - timer)[[3]]
  
    #metrics
    accuracy <- model_cm$overall[[1]]
    auc <- as.numeric(auc(roc(test_heart$target, factor(predict(train_model, test_heart), ordered = TRUE))))
    sensitivity <- model_cm$byClass[[1]]
    specificity <- model_cm$byClass[[2]]
    precision <- model_cm$byClass[[5]]
    recall <- model_cm$byClass[[6]]
    f1_score <- model_cm$byClass[[7]]
    
  # bootstrap
  } else if (grepl("Boot", label, fixed=TRUE)){
    
    
    train_model = train(
    form = target ~ .,
    data = heart,
    trControl = tr,
    method = train_type
    )
    
    duration <- (proc.time() - timer)[[3]]
    
    accuracy <- c()
    auc <- c()
    sensitivity <- c()
    specificity <- c()
    precision <- c()
    recall <- c()
    f1_score <- c()
    i <- 1
    
    pred_df <- train_model$pred
    for (resample in unique(pred_df$Resample)){
      temp <- filter(pred_df, Resample == resample)
      model_cm <- confusionMatrix(temp$pred, temp$obs)
      accuracy[i] <- model_cm$overall[[1]]
      auc[[i]] <- auc(roc(as.numeric(temp$pred, ordered = TRUE), as.numeric(temp$obs, ordered = TRUE)))
      sensitivity[[i]] <- model_cm$byClass[[1]]
      specificity[[i]] <- model_cm$byClass[[2]]
      precision[[i]] <- model_cm$byClass[[5]]
      recall[[i]] <- model_cm$byClass[[6]]
      f1_score[[i]] <- model_cm$byClass[[7]]
      i <- i + 1
    }
  
    accuracy <- mean(accuracy)
    auc <- mean(auc)
    sensitivity <- mean(sensitivity)
    specificity <- mean(specificity)
    precision <- mean(precision)
    recall <- mean(recall)
    f1_score <- mean(f1_score)
    
    
  # random forest
  } else if (grepl("RF", label, fixed=TRUE)){
  # train model
    train_model = train(
    form = target ~ .,
    data = train_heart,
    trControl = tr,
    ntree = as.numeric(str_sub(label, start= -2)),
    method = train_type
    )
    print(train_model)
    
 # confusion Matrix from caret

    model_cm <- confusionMatrix(predict(train_model, subset(test_heart, select = -c(target))), test_heart$target)
    
    print(paste(label, 'Results'))
    print(model_cm)
    
    duration <- (proc.time() - timer)[[3]]
  
    # metrics
    accuracy <- model_cm$overall[[1]]
    auc <- as.numeric(auc(roc(test_heart$target, factor(predict(train_model, test_heart), ordered = TRUE))))
    sensitivity <- model_cm$byClass[[1]]
    specificity <- model_cm$byClass[[2]]
    precision <- model_cm$byClass[[5]]
    recall <- model_cm$byClass[[6]]
    f1_score <- model_cm$byClass[[7]]
    
    
  # cross validation for 5 fold and 10 fold
  } else {
    
  train_model = train(
  form = target ~ .,
  data = heart,
  trControl = tr,
  method = train_type
  )

  print(train_model)
  
  #confusion matrix
  model_cm <- confusionMatrix(train_model$pred[order(train_model$pred$rowIndex),]$pred, heart$target)
  
  print(paste(label, 'Results'))
  print(model_cm)
  
 
  duration <- (proc.time() - timer)[[3]]

  # metrics
  accuracy <- model_cm$overall[[1]]
  auc <- as.numeric(auc(roc(test_heart$target, factor(predict(train_model, test_heart), ordered = TRUE))))
  sensitivity <- model_cm$byClass[[1]]
  specificity <- model_cm$byClass[[2]]
  precision <- model_cm$byClass[[5]]
  recall <- model_cm$byClass[[6]]
  f1_score <- model_cm$byClass[[7]]
  
  
  }
  
  #combine metric rows
  result <- rbind(accuracy,
                        auc,
                        sensitivity,
                        specificity,
                        precision,
                        recall,
                        f1_score,
                        duration)
  colnames(result) <- c(label)
  return(result)
  
}

Base Decision Tree

#input for function 'rpart' used for decision tree base. 
#label = "Decision Tree Base"
base_decision_tree <- cm_calc("rpart", trainControl(method="none"), "Decision Tree Base")

## CART 
## 
## 242 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: None 
## [1] "Decision Tree Base Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0  0  0
##          1 27 34
##                                           
##                Accuracy : 0.5574          
##                  95% CI : (0.4245, 0.6845)
##     No Information Rate : 0.5574          
##     P-Value [Acc > NIR] : 0.5531          
##                                           
##                   Kappa : 0               
##                                           
##  Mcnemar's Test P-Value : 5.624e-07       
##                                           
##             Sensitivity : 0.0000          
##             Specificity : 1.0000          
##          Pos Pred Value :    NaN          
##          Neg Pred Value : 0.5574          
##              Prevalence : 0.4426          
##          Detection Rate : 0.0000          
##    Detection Prevalence : 0.0000          
##       Balanced Accuracy : 0.5000          
##                                           
##        'Positive' Class : 0               
##

Base Support Vector Machine

#input for function 'svmLinear' used for svm base. 
#label = "SVM Base"
svm_base <- cm_calc("svmLinear", trainControl(method="none"), "SVM Base")

## Support Vector Machines with Linear Kernel 
## 
## 242 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: None 
## [1] "SVM Base Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 20  2
##          1  7 32
##                                           
##                Accuracy : 0.8525          
##                  95% CI : (0.7383, 0.9302)
##     No Information Rate : 0.5574          
##     P-Value [Acc > NIR] : 8.993e-07       
##                                           
##                   Kappa : 0.6952          
##                                           
##  Mcnemar's Test P-Value : 0.1824          
##                                           
##             Sensitivity : 0.7407          
##             Specificity : 0.9412          
##          Pos Pred Value : 0.9091          
##          Neg Pred Value : 0.8205          
##              Prevalence : 0.4426          
##          Detection Rate : 0.3279          
##    Detection Prevalence : 0.3607          
##       Balanced Accuracy : 0.8410          
##                                           
##        'Positive' Class : 0               
##

Cross Validation of 5

Decision Tree

#input for function 'rpart' used for decision tree 
# method type 'cv' denotes cross validation of 5
#label = "Decision Tree 5 Fold"
decision_tree_5fold <- cm_calc("rpart", tr = trainControl(method = "cv", number = 5, savePredictions = 'final'), "Decision Tree 5 Fold")

## CART 
## 
## 303 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 243, 242, 242, 243, 242 
## Resampling results across tuning parameters:
## 
##   cp          Accuracy   Kappa    
##   0.03623188  0.7621311  0.5186339
##   0.03985507  0.7391803  0.4718201
##   0.48550725  0.6400000  0.2408988
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.03623188.
## [1] "Decision Tree 5 Fold Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0  98  32
##          1  40 133
##                                           
##                Accuracy : 0.7624          
##                  95% CI : (0.7104, 0.8092)
##     No Information Rate : 0.5446          
##     P-Value [Acc > NIR] : 3.29e-15        
##                                           
##                   Kappa : 0.5187          
##                                           
##  Mcnemar's Test P-Value : 0.4094          
##                                           
##             Sensitivity : 0.7101          
##             Specificity : 0.8061          
##          Pos Pred Value : 0.7538          
##          Neg Pred Value : 0.7688          
##              Prevalence : 0.4554          
##          Detection Rate : 0.3234          
##    Detection Prevalence : 0.4290          
##       Balanced Accuracy : 0.7581          
##                                           
##        'Positive' Class : 0               
##

SVM

#input for function 'svmLinear' used for svm
# method type 'cv' denotes cross validation of 5
#label = "SVM 5 Fold"
svm_5fold <- cm_calc("svmLinear", tr = trainControl(method = "cv", number = 5, savePredictions = 'final'), "SVM 5 Fold")

## Support Vector Machines with Linear Kernel 
## 
## 303 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (5 fold) 
## Summary of sample sizes: 243, 242, 242, 243, 242 
## Resampling results:
## 
##   Accuracy  Kappa    
##   0.835082  0.6664592
## 
## Tuning parameter 'C' was held constant at a value of 1
## [1] "SVM 5 Fold Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 110  22
##          1  28 143
##                                          
##                Accuracy : 0.835          
##                  95% CI : (0.7883, 0.875)
##     No Information Rate : 0.5446         
##     P-Value [Acc > NIR] : <2e-16         
##                                          
##                   Kappa : 0.6661         
##                                          
##  Mcnemar's Test P-Value : 0.4795         
##                                          
##             Sensitivity : 0.7971         
##             Specificity : 0.8667         
##          Pos Pred Value : 0.8333         
##          Neg Pred Value : 0.8363         
##              Prevalence : 0.4554         
##          Detection Rate : 0.3630         
##    Detection Prevalence : 0.4356         
##       Balanced Accuracy : 0.8319         
##                                          
##        'Positive' Class : 0              
##

Cross Validation of 10

Decision Tree

#input for function 'rpart' used for decision tree 
# method type 'cv' denotes cross validation of 10
#label = "Decision Tree 10 Fold"
decision_tree_10fold <- cm_calc("rpart", trainControl(method = "cv", number = 10, savePredictions = 'final'), "Decision Tree 10 Fold")

## CART 
## 
## 303 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 273, 272, 273, 273, 273, 273, ... 
## Resampling results across tuning parameters:
## 
##   cp          Accuracy   Kappa    
##   0.03623188  0.7493548  0.4924633
##   0.03985507  0.7493548  0.4917245
##   0.48550725  0.6560215  0.2761508
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was cp = 0.03985507.
## [1] "Decision Tree 10 Fold Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0  96  34
##          1  42 131
##                                          
##                Accuracy : 0.7492         
##                  95% CI : (0.6964, 0.797)
##     No Information Rate : 0.5446         
##     P-Value [Acc > NIR] : 1.515e-13      
##                                          
##                   Kappa : 0.4919         
##                                          
##  Mcnemar's Test P-Value : 0.422          
##                                          
##             Sensitivity : 0.6957         
##             Specificity : 0.7939         
##          Pos Pred Value : 0.7385         
##          Neg Pred Value : 0.7572         
##              Prevalence : 0.4554         
##          Detection Rate : 0.3168         
##    Detection Prevalence : 0.4290         
##       Balanced Accuracy : 0.7448         
##                                          
##        'Positive' Class : 0              
##

SVM

#input for function 'svmLinear' used for svm
# method type 'cv' denotes cross validation of 10
#label = "SVM 10 Fold"
svm_10fold <- cm_calc("svmLinear", trainControl(method = "cv", number = 10, savePredictions = 'final'), "SVM 10 Fold")

## Support Vector Machines with Linear Kernel 
## 
## 303 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 273, 272, 273, 273, 273, 273, ... 
## Resampling results:
## 
##   Accuracy   Kappa   
##   0.8383871  0.671804
## 
## Tuning parameter 'C' was held constant at a value of 1
## [1] "SVM 10 Fold Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction   0   1
##          0 107  18
##          1  31 147
##                                           
##                Accuracy : 0.8383          
##                  95% CI : (0.7919, 0.8779)
##     No Information Rate : 0.5446          
##     P-Value [Acc > NIR] : < 2e-16         
##                                           
##                   Kappa : 0.6714          
##                                           
##  Mcnemar's Test P-Value : 0.08648         
##                                           
##             Sensitivity : 0.7754          
##             Specificity : 0.8909          
##          Pos Pred Value : 0.8560          
##          Neg Pred Value : 0.8258          
##              Prevalence : 0.4554          
##          Detection Rate : 0.3531          
##    Detection Prevalence : 0.4125          
##       Balanced Accuracy : 0.8331          
##                                           
##        'Positive' Class : 0               
##

Bootstrap of 200 Resamples

Decision Tree

#input for function 'rpart' used for decision tree 
# method type 'boot' denotes bootstrap of 200 resample
#label = "Decision Tree Bootstrap"
decision_tree_boot <- cm_calc("rpart", trainControl(method="boot", number=200, savePredictions = 'final', returnResamp = 'final'), "Decision Tree Bootstrap")
print(decision_tree_boot)

##             Decision Tree Bootstrap
## accuracy                  0.7420239
## auc                              NA
## sensitivity                      NA
## specificity                      NA
## precision                        NA
## recall                           NA
## f1_score                         NA
## duration                  4.2100000

SVM

#input for function 'svmLinear' used for svm
# method type 'boot' denotes bootstrap of 200 resample
#label = "SVM Bootstrap"
svm_boot <- cm_calc("svmLinear", trainControl(method="boot", number=200, savePredictions = 'final', returnResamp = 'final'), "SVM Bootstrap")
print(svm_boot)

##             SVM Bootstrap
## accuracy         0.823332
## auc                    NA
## sensitivity            NA
## specificity            NA
## precision              NA
## recall                 NA
## f1_score               NA
## duration        12.130000

Final Results

df <- data.frame(cbind(base_decision_tree, decision_tree_5fold, decision_tree_10fold, decision_tree_boot, svm_base, svm_5fold, svm_10fold, svm_boot))

kable(df) %>%
  kable_styling(bootstrap_options = "bordered") %>%
  row_spec(0, bold = T, color = "black", background = "#7fcdbb")

	Decision.Tree.Base	Decision.Tree.5.Fold	Decision.Tree.10.Fold	Decision.Tree.Bootstrap	SVM.Base	SVM.5.Fold	SVM.10.Fold	SVM.Bootstrap
accuracy	0.557377	0.7623762	0.7491749	0.7420239	0.8524590	0.8349835	0.8382838	0.823332
auc	0.500000	0.7854031	0.7412854	NA	0.8409586	0.8300654	0.8300654	NA
sensitivity	0.000000	0.7101449	0.6956522	NA	0.7407407	0.7971014	0.7753623	NA
specificity	1.000000	0.8060606	0.7939394	NA	0.9411765	0.8666667	0.8909091	NA
precision	NA	0.7538462	0.7384615	NA	0.9090909	0.8333333	0.8560000	NA
recall	0.000000	0.7101449	0.6956522	NA	0.7407407	0.7971014	0.7753623	NA
f1_score	NA	0.7313433	0.7164179	NA	0.8163265	0.8148148	0.8136882	NA
duration	0.524000	0.6940000	0.8250000	4.2100000	0.9780000	1.4590000	0.6610000	12.130000

Part B - Random Forest

80/20 Split

set.seed(43)
x <- floor(0.8 * nrow(heart))
train_ind <- sample(seq_len(nrow(heart)), size = x)
train_heart <- heart[ train_ind,]
test_heart  <- heart[-train_ind,]

16 Trees

#input for function 'rf' used for randomForest

random_forest_16 <- cm_calc("rf", trainControl(), "RF 16")

## Random Forest 
## 
## 242 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 242, 242, 242, 242, 242, 242, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.7849624  0.5649484
##   12    0.7666805  0.5264895
##   22    0.7581384  0.5095112
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
## [1] "RF 16 Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 18  2
##          1  9 32
##                                           
##                Accuracy : 0.8197          
##                  95% CI : (0.7002, 0.9064)
##     No Information Rate : 0.5574          
##     P-Value [Acc > NIR] : 1.469e-05       
##                                           
##                   Kappa : 0.6245          
##                                           
##  Mcnemar's Test P-Value : 0.07044         
##                                           
##             Sensitivity : 0.6667          
##             Specificity : 0.9412          
##          Pos Pred Value : 0.9000          
##          Neg Pred Value : 0.7805          
##              Prevalence : 0.4426          
##          Detection Rate : 0.2951          
##    Detection Prevalence : 0.3279          
##       Balanced Accuracy : 0.8039          
##                                           
##        'Positive' Class : 0               
##

64 Trees

#input for function 'rf' used for randomForest

random_forest_64 <- cm_calc("rf", trainControl(), "RF 64")

## Random Forest 
## 
## 242 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 242, 242, 242, 242, 242, 242, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.7992027  0.5938642
##   12    0.7671618  0.5274464
##   22    0.7542574  0.5012315
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
## [1] "RF 64 Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 19  3
##          1  8 31
##                                           
##                Accuracy : 0.8197          
##                  95% CI : (0.7002, 0.9064)
##     No Information Rate : 0.5574          
##     P-Value [Acc > NIR] : 1.469e-05       
##                                           
##                   Kappa : 0.6274          
##                                           
##  Mcnemar's Test P-Value : 0.2278          
##                                           
##             Sensitivity : 0.7037          
##             Specificity : 0.9118          
##          Pos Pred Value : 0.8636          
##          Neg Pred Value : 0.7949          
##              Prevalence : 0.4426          
##          Detection Rate : 0.3115          
##    Detection Prevalence : 0.3607          
##       Balanced Accuracy : 0.8077          
##                                           
##        'Positive' Class : 0               
##

128 Trees

#input for function 'rf' used for randomForest

random_forest_128 <- cm_calc("rf", trainControl(), "RF 128")

## Random Forest 
## 
## 242 samples
##  13 predictor
##   2 classes: '0', '1' 
## 
## No pre-processing
## Resampling: Bootstrapped (25 reps) 
## Summary of sample sizes: 242, 242, 242, 242, 242, 242, ... 
## Resampling results across tuning parameters:
## 
##   mtry  Accuracy   Kappa    
##    2    0.7917296  0.5780993
##   12    0.7601981  0.5139962
##   22    0.7581195  0.5088020
## 
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
## [1] "RF 128 Results"
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction  0  1
##          0 18  3
##          1  9 31
##                                          
##                Accuracy : 0.8033         
##                  95% CI : (0.6816, 0.894)
##     No Information Rate : 0.5574         
##     P-Value [Acc > NIR] : 5.056e-05      
##                                          
##                   Kappa : 0.592          
##                                          
##  Mcnemar's Test P-Value : 0.1489         
##                                          
##             Sensitivity : 0.6667         
##             Specificity : 0.9118         
##          Pos Pred Value : 0.8571         
##          Neg Pred Value : 0.7750         
##              Prevalence : 0.4426         
##          Detection Rate : 0.2951         
##    Detection Prevalence : 0.3443         
##       Balanced Accuracy : 0.7892         
##                                          
##        'Positive' Class : 0              
##

Final Results

In comparison to Decision Tree from Part A, the randomForest results are overall better. However, randomForest does require more computing cost(elapsed time) for each run. SVM performed the best among all three experiments.

df2 <- data.frame(cbind(random_forest_16, random_forest_64, random_forest_128))
kable(df2) %>%
  kable_styling(bootstrap_options = "bordered") %>%
  row_spec(0, bold = T, color = "black", background = "#7fcdbb")

	RF.16	RF.64	RF.128
accuracy	0.8196721	0.8196721	0.8032787
auc	0.8039216	0.8077342	0.8224401
sensitivity	0.6666667	0.7037037	0.6666667
specificity	0.9411765	0.9117647	0.9117647
precision	0.9000000	0.8636364	0.8571429
recall	0.6666667	0.7037037	0.6666667
f1_score	0.7659574	0.7755102	0.7500000
duration	1.1550000	2.1220000	1.3610000

Part C - Conclusion

When comparing bootstrap for Decision Tree vs. SVM, SVM yielded stronger model performance but did require more computation time. When comparing cross validation for Decision Tree 5-Fold vs. Decision Tree 10-Fold vs. SVM 5-Fold vs. SVM-10 Fold, the 5-fold cross validation should be used for both classifiers. Cross validation provides stability to the model results. Razor also suggests that the 5-fold should be used as it is simpler in comparison to the 10-fold. Source

final_df <- data.frame(cbind(base_decision_tree, decision_tree_5fold, decision_tree_10fold, decision_tree_boot, svm_base, svm_5fold, svm_10fold, svm_boot,random_forest_16, random_forest_64, random_forest_128))
kable(final_df) %>%
  kable_styling(bootstrap_options = "bordered") %>%
  row_spec(0, bold = T, color = "black", background = "#7fcdbb")

	Decision.Tree.Base	Decision.Tree.5.Fold	Decision.Tree.10.Fold	Decision.Tree.Bootstrap	SVM.Base	SVM.5.Fold	SVM.10.Fold	SVM.Bootstrap	RF.16	RF.64	RF.128
accuracy	0.557377	0.7623762	0.7491749	0.7420239	0.8524590	0.8349835	0.8382838	0.823332	0.8196721	0.8196721	0.8032787
auc	0.500000	0.7854031	0.7412854	NA	0.8409586	0.8300654	0.8300654	NA	0.8039216	0.8077342	0.8224401
sensitivity	0.000000	0.7101449	0.6956522	NA	0.7407407	0.7971014	0.7753623	NA	0.6666667	0.7037037	0.6666667
specificity	1.000000	0.8060606	0.7939394	NA	0.9411765	0.8666667	0.8909091	NA	0.9411765	0.9117647	0.9117647
precision	NA	0.7538462	0.7384615	NA	0.9090909	0.8333333	0.8560000	NA	0.9000000	0.8636364	0.8571429
recall	0.000000	0.7101449	0.6956522	NA	0.7407407	0.7971014	0.7753623	NA	0.6666667	0.7037037	0.6666667
f1_score	NA	0.7313433	0.7164179	NA	0.8163265	0.8148148	0.8136882	NA	0.7659574	0.7755102	0.7500000
duration	0.524000	0.6940000	0.8250000	4.2100000	0.9780000	1.4590000	0.6610000	12.130000	1.1550000	2.1220000	1.3610000

Data 622 Homework 2

Aaron Zalki

12/5/2020

Part A

Load Heart Data

Create Function

Base Decision Tree

Base Support Vector Machine

Cross Validation of 5

Cross Validation of 10

Bootstrap of 200 Resamples

Final Results

Part B - Random Forest

Final Results

Part C - Conclusion