CW041525

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.4.3

library(caret)

## Warning: package 'caret' was built under R version 4.4.3

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.4.3

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 4.4.3

library(ggplot2)
library(lattice)
library(randomForest)

## Warning: package 'randomForest' was built under R version 4.4.3

## randomForest 4.7-1.2

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following object is masked from 'package:ggplot2':
## 
##     margin

data(PimaIndiansDiabetes)
trainControl <- trainControl(method = "repeatedcv", number = 10, repeats = 3)

set.seed(7)
fit.cart <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "rpart", trControl = trainControl)

set.seed(7)
fit.lda <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "lda", trControl = trainControl)

set.seed(7)
fit.svm <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "svmRadial", trControl = trainControl)

set.seed(7)
fit.knn <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "knn", trControl = trainControl)

set.seed(7)
fit.rf <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "rf", trControl = trainControl)


results_pima <- resamples(list(CART = fit.cart, LDA = fit.lda, SVM = fit.svm, KNN = fit.knn, RF = fit.rf))

summary(results_pima)

## 
## Call:
## summary.resamples(object = results_pima)
## 
## Models: CART, LDA, SVM, KNN, RF 
## Number of resamples: 30 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## CART 0.6753247 0.7272727 0.7532468 0.7469697 0.7662338 0.7922078    0
## LDA  0.7142857 0.7508117 0.7662338 0.7791069 0.8000256 0.9078947    0
## SVM  0.7236842 0.7508117 0.7631579 0.7712919 0.7915243 0.8947368    0
## KNN  0.6753247 0.7036056 0.7272727 0.7369503 0.7662338 0.8311688    0
## RF   0.6842105 0.7305195 0.7597403 0.7638528 0.8019481 0.8421053    0
## 
## Kappa 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## CART 0.2762566 0.3620724 0.4241878 0.4151867 0.4861107 0.5250000    0
## LDA  0.3011551 0.4192537 0.4662541 0.4862025 0.5308596 0.7812500    0
## SVM  0.3391908 0.3997116 0.4460612 0.4621585 0.5234605 0.7475083    0
## KNN  0.2553191 0.3406000 0.3841761 0.3984995 0.4539789 0.6195363    0
## RF   0.2951613 0.3778304 0.4640696 0.4630809 0.5447483 0.6426332    0

bwplot(results_pima)

densityplot(results_pima, pch = "|")

dotplot(results_pima)

splom(results_pima)

trainControl <- trainControl(method = "cv", number = 10)
metric <- "Accuracy"

set.seed(7)
fit.lda <- train(Species ~ ., data = iris, method = "lda", metric = metric, trControl = trainControl)

set.seed(7)
fit.cart <- train(Species ~ ., data = iris, method = "rpart", metric = metric, trControl = trainControl)

set.seed(7)
fit.knn <- train(Species ~ ., data = iris, method = "knn", metric = metric, trControl = trainControl)

set.seed(7)
fit.svm <- train(Species ~ ., data = iris, method = "svmRadial", metric = metric, trControl = trainControl)

set.seed(7)
fit.rf <- train(Species ~ ., data = iris, method = "rf", metric = metric, trControl = trainControl)

results_iris <- resamples(list(lda = fit.lda, cart = fit.cart, knn = fit.knn, svm = fit.svm, rf = fit.rf))

summary(results_iris)

## 
## Call:
## summary.resamples(object = results_iris)
## 
## Models: lda, cart, knn, svm, rf 
## Number of resamples: 10 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean   3rd Qu. Max. NA's
## lda  0.9333333 0.9500000 1.0000000 0.9800000 1.0000000    1    0
## cart 0.8666667 0.9333333 0.9333333 0.9400000 0.9833333    1    0
## knn  0.8666667 0.9333333 1.0000000 0.9666667 1.0000000    1    0
## svm  0.8000000 0.9333333 0.9666667 0.9466667 1.0000000    1    0
## rf   0.8666667 0.9333333 0.9666667 0.9600000 1.0000000    1    0
## 
## Kappa 
##      Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## lda   0.9   0.925   1.00 0.97   1.000    1    0
## cart  0.8   0.900   0.90 0.91   0.975    1    0
## knn   0.8   0.900   1.00 0.95   1.000    1    0
## svm   0.7   0.900   0.95 0.92   1.000    1    0
## rf    0.8   0.900   0.95 0.94   1.000    1    0

dotplot(results_iris)

bwplot(results_iris)

densityplot(results_iris, pch = "|")

splom(results_iris)

set.seed(123)
sample_index <- sample(1:nrow(iris), 0.8 * nrow(iris))
train_iris <- iris[sample_index, ]
test_iris <- iris[-sample_index, ]

model_iris <- train(Species ~ ., data = train_iris, method = "lda")
preds_iris <- predict(model_iris, test_iris)
confusionMatrix(preds_iris, test_iris$Species)

## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         14         0
##   virginica       0          1         5
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 2.887e-08       
##                                           
##                   Kappa : 0.9464          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9333           1.0000
## Specificity                 1.0000            1.0000           0.9600
## Pos Pred Value              1.0000            1.0000           0.8333
## Neg Pred Value              1.0000            0.9375           1.0000
## Prevalence                  0.3333            0.5000           0.1667
## Detection Rate              0.3333            0.4667           0.1667
## Detection Prevalence        0.3333            0.4667           0.2000
## Balanced Accuracy           1.0000            0.9667           0.9800

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

CW041525

Kamal Khalil

2025-04-15

R Markdown