R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(mlbench)
## Warning: package 'mlbench' was built under R version 4.4.3
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: lattice
library(lattice)

data(PimaIndiansDiabetes)

set.seed(7)
train_control <- trainControl(method = "repeatedcv", number = 10, repeats = 3)

fit.cart <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "rpart", trControl = train_control)


fit.lda <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "lda", trControl = train_control)


fit.svm <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "svmRadial", trControl = train_control)


fit.knn <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "knn", trControl = train_control)


fit.rf <- train(diabetes ~ ., data = PimaIndiansDiabetes, method = "rf", trControl = train_control)

results <- resamples(list(CART=fit.cart, LDA=fit.lda, SVM=fit.svm, KNN=fit.knn, RF=fit.rf))


summary(results)
## 
## Call:
## summary.resamples(object = results)
## 
## Models: CART, LDA, SVM, KNN, RF 
## Number of resamples: 30 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## CART 0.6753247 0.7272727 0.7532468 0.7469697 0.7662338 0.7922078    0
## LDA  0.7105263 0.7532468 0.7662338 0.7755867 0.8051948 0.8441558    0
## SVM  0.6623377 0.7245813 0.7582023 0.7604295 0.8000256 0.8571429    0
## KNN  0.6710526 0.7166353 0.7272727 0.7382319 0.7500000 0.8311688    0
## RF   0.6883117 0.7402597 0.7662338 0.7691274 0.8051948 0.8421053    0
## 
## Kappa 
##           Min.   1st Qu.    Median      Mean   3rd Qu.      Max. NA's
## CART 0.2762566 0.3620724 0.4241878 0.4151867 0.4861107 0.5250000    0
## LDA  0.2996968 0.4131568 0.4614831 0.4791233 0.5429981 0.6357827    0
## SVM  0.1740924 0.3557056 0.4112320 0.4338005 0.5261585 0.6602487    0
## KNN  0.2484177 0.3406000 0.3866611 0.4017113 0.4353306 0.6260740    0
## RF   0.3036925 0.4070045 0.4562480 0.4770543 0.5531915 0.6286645    0
bwplot(results, scales = list(x = list(relation = "free"), y = list(relation = "free")))

densityplot(results, pch = "|")

dotplot(results)

splom(results)

diffs <- diff(results)
summary(diffs)
## 
## Call:
## summary.diff.resamples(object = diffs)
## 
## p-value adjustment: bonferroni 
## Upper diagonal: estimates of the difference
## Lower diagonal: p-value for H0: difference = 0
## 
## Accuracy 
##      CART     LDA       SVM       KNN       RF       
## CART          -0.028617 -0.013460  0.008738 -0.022158
## LDA  0.028794            0.015157  0.037355  0.006459
## SVM  1.000000 1.000000             0.022198 -0.008698
## KNN  1.000000 0.001726  0.624760            -0.030895
## RF   0.362334 1.000000  1.000000  0.052505           
## 
## Kappa 
##      CART    LDA       SVM       KNN       RF       
## CART         -0.063937 -0.018614  0.013475 -0.061868
## LDA  0.03352            0.045323  0.077412  0.002069
## SVM  1.00000 1.00000              0.032089 -0.043254
## KNN  1.00000 0.01666   1.00000             -0.075343
## RF   0.14265 1.00000   1.00000   0.02578
set.seed(101)
index <- createDataPartition(PimaIndiansDiabetes$diabetes, p = 0.8, list = FALSE)
train_set <- PimaIndiansDiabetes[index, ]
test_set <- PimaIndiansDiabetes[-index, ]


final_model <- train(diabetes ~ ., data = train_set, method = "lda")


predictions <- predict(final_model, test_set)


confusionMatrix(predictions, test_set$diabetes)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction neg pos
##        neg  83  24
##        pos  17  29
##                                           
##                Accuracy : 0.732           
##                  95% CI : (0.6545, 0.8003)
##     No Information Rate : 0.6536          
##     P-Value [Acc > NIR] : 0.02367         
##                                           
##                   Kappa : 0.3893          
##                                           
##  Mcnemar's Test P-Value : 0.34874         
##                                           
##             Sensitivity : 0.8300          
##             Specificity : 0.5472          
##          Pos Pred Value : 0.7757          
##          Neg Pred Value : 0.6304          
##              Prevalence : 0.6536          
##          Detection Rate : 0.5425          
##    Detection Prevalence : 0.6993          
##       Balanced Accuracy : 0.6886          
##                                           
##        'Positive' Class : neg             
## 
data(iris)
head(iris)
##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa
set.seed(42)
control_iris <- trainControl(method = "cv", number = 10)
metric <- "Accuracy"
fit.lda.iris <- train(Species ~ ., data = iris, method = "lda", metric = metric, trControl = control_iris)
fit.cart.iris <- train(Species ~ ., data = iris, method = "rpart", metric = metric, trControl = control_iris)
fit.knn.iris <- train(Species ~ ., data = iris, method = "knn", metric = metric, trControl = control_iris)
fit.svm.iris <- train(Species ~ ., data = iris, method = "svmRadial", metric = metric, trControl = control_iris)
fit.rf.iris <- train(Species ~ ., data = iris, method = "rf", metric = metric, trControl = control_iris)
results_iris <- resamples(list(LDA=fit.lda.iris, CART=fit.cart.iris, KNN=fit.knn.iris, SVM=fit.svm.iris, RF=fit.rf.iris))
summary(results_iris)
## 
## Call:
## summary.resamples(object = results_iris)
## 
## Models: LDA, CART, KNN, SVM, RF 
## Number of resamples: 10 
## 
## Accuracy 
##           Min.   1st Qu.    Median      Mean 3rd Qu. Max. NA's
## LDA  0.9333333 0.9500000 1.0000000 0.9800000       1    1    0
## CART 0.8000000 0.9333333 0.9333333 0.9400000       1    1    0
## KNN  0.8666667 0.9500000 1.0000000 0.9733333       1    1    0
## SVM  0.8666667 0.9333333 1.0000000 0.9666667       1    1    0
## RF   0.8666667 0.9333333 0.9666667 0.9600000       1    1    0
## 
## Kappa 
##      Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## LDA   0.9   0.925   1.00 0.97       1    1    0
## CART  0.7   0.900   0.90 0.91       1    1    0
## KNN   0.8   0.925   1.00 0.96       1    1    0
## SVM   0.8   0.900   1.00 0.95       1    1    0
## RF    0.8   0.900   0.95 0.94       1    1    0
dotplot(results_iris)

bwplot(results_iris)

densityplot(results_iris, pch = "|")

splom(results_iris)

set.seed(123)
sample_index <- sample(1:nrow(iris), 0.8 * nrow(iris))
train_iris <- iris[sample_index, ]
test_iris <- iris[-sample_index, ]

model_iris <- train(Species ~ ., data = train_iris, method = "lda")
preds_iris <- predict(model_iris, test_iris)
confusionMatrix(preds_iris, test_iris$Species)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         14         0
##   virginica       0          1         5
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.5             
##     P-Value [Acc > NIR] : 2.887e-08       
##                                           
##                   Kappa : 0.9464          
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            0.9333           1.0000
## Specificity                 1.0000            1.0000           0.9600
## Pos Pred Value              1.0000            1.0000           0.8333
## Neg Pred Value              1.0000            0.9375           1.0000
## Prevalence                  0.3333            0.5000           0.1667
## Detection Rate              0.3333            0.4667           0.1667
## Detection Prevalence        0.3333            0.4667           0.2000
## Balanced Accuracy           1.0000            0.9667           0.9800

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.