## Load the packages
library(caret)
## Warning: package 'caret' was built under R version 4.2.1
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.2.1
## Loading required package: lattice
library(klaR)
## Warning: package 'klaR' was built under R version 4.2.2
## Loading required package: MASS
library(ggplot2)
library(lattice)
## Load Iris dataset
data(iris)
trainIndex <- createDataPartition(iris$Species, p=0.80, list=FALSE)
dataTrain<-iris[ trainIndex, ]
dataTest<-iris[ -trainIndex, ]
fit <- NaiveBayes(Species~., data=dataTrain)
Predictions<-predict(fit,dataTest[ , 1:4])
confusionMatrix(Predictions$class, dataTest$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 0
## virginica 0 0 10
##
## Overall Statistics
##
## Accuracy : 1
## 95% CI : (0.8843, 1)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 4.857e-15
##
## Kappa : 1
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 1.0000 1.0000 1.0000
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3333
## Detection Prevalence 0.3333 0.3333 0.3333
## Balanced Accuracy 1.0000 1.0000 1.0000
library(caret)
data(iris)
## define the training control
trainControl<-trainControl(method="cv", number=10)
## Evaluate the model
fit2<- train(Species~., data=iris, trControl=trainControl, method="rf")
## display the results
print(fit2)
## Random Forest
##
## 150 samples
## 4 predictor
## 3 classes: 'setosa', 'versicolor', 'virginica'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 135, 135, 135, 135, 135, 135, ...
## Resampling results across tuning parameters:
##
## mtry Accuracy Kappa
## 2 0.96 0.94
## 3 0.96 0.94
## 4 0.96 0.94
##
## Accuracy was used to select the optimal model using the largest value.
## The final value used for the model was mtry = 2.
plot(fit2)
## Calculating Accuracy and Kappa metrics - using the Pima Indians
dataset
library(caret)
library(mlbench)
## Warning: package 'mlbench' was built under R version 4.2.1
library(ggplot2)
data(PimaIndiansDiabetes)
str(PimaIndiansDiabetes)
## 'data.frame': 768 obs. of 9 variables:
## $ pregnant: num 6 1 8 1 0 5 3 10 2 8 ...
## $ glucose : num 148 85 183 89 137 116 78 115 197 125 ...
## $ pressure: num 72 66 64 66 40 74 50 0 70 96 ...
## $ triceps : num 35 29 0 23 35 0 32 0 45 0 ...
## $ insulin : num 0 0 0 94 168 0 88 0 543 0 ...
## $ mass : num 33.6 26.6 23.3 28.1 43.1 25.6 31 35.3 30.5 0 ...
## $ pedigree: num 0.627 0.351 0.672 0.167 2.288 ...
## $ age : num 50 31 32 21 33 30 26 29 53 54 ...
## $ diabetes: Factor w/ 2 levels "neg","pos": 2 1 2 1 2 1 2 1 2 2 ...
## Prepare resampling method
trainControl2<-trainControl(method="cv", number=5)
set.seed(7)
fit3<-train(diabetes~., data=PimaIndiansDiabetes, method="glm", metric="Accuracy", trControl=trainControl)
print(fit3)
## Generalized Linear Model
##
## 768 samples
## 8 predictor
## 2 classes: 'neg', 'pos'
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 691, 691, 691, 691, 691, 691, ...
## Resampling results:
##
## Accuracy Kappa
## 0.7839884 0.5003949
## Load package
library(caret)
## Load data
data(longley)
str(longley)
## 'data.frame': 16 obs. of 7 variables:
## $ GNP.deflator: num 83 88.5 88.2 89.5 96.2 ...
## $ GNP : num 234 259 258 285 329 ...
## $ Unemployed : num 236 232 368 335 210 ...
## $ Armed.Forces: num 159 146 162 165 310 ...
## $ Population : num 108 109 110 111 112 ...
## $ Year : int 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 ...
## $ Employed : num 60.3 61.1 60.2 61.2 63.2 ...
## Prepare resampling method
trainControl3<-trainControl(method="cv", number=5)
set.seed(7)
fit4<-train(Employed~., data=longley, method="lm", metric="RMSE", trControl=trainControl)
## Warning in nominalTrainWorkflow(x = x, y = y, wts = weights, info = trainInfo, :
## There were missing values in resampled performance measures.
print(fit4)
## Linear Regression
##
## 16 samples
## 6 predictor
##
## No pre-processing
## Resampling: Cross-Validated (10 fold)
## Summary of sample sizes: 14, 14, 14, 14, 14, 15, ...
## Resampling results:
##
## RMSE Rsquared MAE
## 0.3357221 1 0.3140253
##
## Tuning parameter 'intercept' was held constant at a value of TRUE