data(iris)
## split in train/test
idx<-sample(150,50)
trainset<-iris[-idx,]
testset<-iris[idx,]
knn_model <- caret::knn3(Species ~ .,data = trainset, k=3)
# predictins on trainset
predictions <- predict(knn_model,trainset,type="class")
caret::confusionMatrix(predictions,trainset$Species)
Confusion Matrix and Statistics
Reference
Prediction setosa versicolor virginica
setosa 33 0 0
versicolor 0 33 1
virginica 0 1 32
Overall Statistics
Accuracy : 0.98
95% CI : (0.9296, 0.9976)
No Information Rate : 0.34
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.97
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: setosa Class: versicolor Class: virginica
Sensitivity 1.00 0.9706 0.9697
Specificity 1.00 0.9848 0.9851
Pos Pred Value 1.00 0.9706 0.9697
Neg Pred Value 1.00 0.9848 0.9851
Prevalence 0.33 0.3400 0.3300
Detection Rate 0.33 0.3300 0.3200
Detection Prevalence 0.33 0.3400 0.3300
Balanced Accuracy 1.00 0.9777 0.9774
# Prediction on testset
predictions <- predict(knn_model,testset,type="class")
# Calculate Confusion MAtrix
caret::confusionMatrix(predictions,testset$Species)
Confusion Matrix and Statistics
Reference
Prediction setosa versicolor virginica
setosa 17 0 0
versicolor 0 15 1
virginica 0 1 16
Overall Statistics
Accuracy : 0.96
95% CI : (0.8629, 0.9951)
No Information Rate : 0.34
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.94
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: setosa Class: versicolor Class: virginica
Sensitivity 1.00 0.9375 0.9412
Specificity 1.00 0.9706 0.9697
Pos Pred Value 1.00 0.9375 0.9412
Neg Pred Value 1.00 0.9706 0.9697
Prevalence 0.34 0.3200 0.3400
Detection Rate 0.34 0.3000 0.3200
Detection Prevalence 0.34 0.3200 0.3400
Balanced Accuracy 1.00 0.9540 0.9554
library(randomForest)
library(dplyr)
data(iris)
#set.seed(71)
idx<-sample(150,50)
trainset<-iris[-idx,]
testset<-iris[idx,]
iris.rf <- randomForest(Species ~ Petal.Width, data=trainset)
iris.rf <- randomForest(Species ~ ., data=trainset,)
# Calculate importance
importance(iris.rf)
MeanDecreaseGini
Sepal.Length 7.102603
Sepal.Width 1.416911
Petal.Length 27.407175
Petal.Width 29.662431
predictions<-predict(iris.rf,testset)
caret::confusionMatrix(as.factor(predictions),testset$Species)
Confusion Matrix and Statistics
Reference
Prediction setosa versicolor virginica
setosa 12 0 0
versicolor 0 20 1
virginica 0 1 16
Overall Statistics
Accuracy : 0.96
95% CI : (0.8629, 0.9951)
No Information Rate : 0.42
P-Value [Acc > NIR] : 3.498e-16
Kappa : 0.9385
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: setosa Class: versicolor Class: virginica
Sensitivity 1.00 0.9524 0.9412
Specificity 1.00 0.9655 0.9697
Pos Pred Value 1.00 0.9524 0.9412
Neg Pred Value 1.00 0.9655 0.9697
Prevalence 0.24 0.4200 0.3400
Detection Rate 0.24 0.4000 0.3200
Detection Prevalence 0.24 0.4200 0.3400
Balanced Accuracy 1.00 0.9589 0.9554
El paquete rpart
implementa un tipo de arbol de decision
(CART)
library(rpart)
library(rpart.plot)
idx<-sample(150,50)
trainset<-iris[-idx,]
testset<-iris[idx,]
iris.rpart<-rpart(Species ~., data=trainset)
rpart.plot(iris.rpart)
predictions <- predict(iris.rpart,testset,type="class")
caret::confusionMatrix(predictions,testset$Species)
Confusion Matrix and Statistics
Reference
Prediction setosa versicolor virginica
setosa 20 0 0
versicolor 0 15 0
virginica 0 2 13
Overall Statistics
Accuracy : 0.96
95% CI : (0.8629, 0.9951)
No Information Rate : 0.4
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.9394
Mcnemar's Test P-Value : NA
Statistics by Class:
Class: setosa Class: versicolor Class: virginica
Sensitivity 1.0 0.8824 1.0000
Specificity 1.0 1.0000 0.9459
Pos Pred Value 1.0 1.0000 0.8667
Neg Pred Value 1.0 0.9429 1.0000
Prevalence 0.4 0.3400 0.2600
Detection Rate 0.4 0.3000 0.2600
Detection Prevalence 0.4 0.3000 0.3000
Balanced Accuracy 1.0 0.9412 0.9730
data(iris)
library(naivebayes)
naivebayes 0.9.7 loaded
m <- naivebayes::naive_bayes(Species~., data = iris)
preds<- predict(m,newdata=iris,type='prob')
Warning: predict.naive_bayes(): more features in the newdata are provided as there are probability tables in the object. Calculation is performed based on features to be found in the tables.
apply(preds, 1,
function(x) {
names(x)[which.max(x)]
})
[1] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[9] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[17] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[25] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[33] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[41] "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa" "setosa"
[49] "setosa" "setosa" "versicolor" "versicolor" "virginica" "versicolor" "versicolor" "versicolor"
[57] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor"
[65] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "virginica" "versicolor"
[73] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "virginica" "versicolor" "versicolor"
[81] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor"
[89] "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor" "versicolor"
[97] "versicolor" "versicolor" "versicolor" "versicolor" "virginica" "virginica" "virginica" "virginica"
[105] "virginica" "virginica" "versicolor" "virginica" "virginica" "virginica" "virginica" "virginica"
[113] "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "versicolor"
[121] "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "virginica"
[129] "virginica" "virginica" "virginica" "virginica" "virginica" "versicolor" "virginica" "virginica"
[137] "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "virginica" "virginica"
[145] "virginica" "virginica" "virginica" "virginica" "virginica" "virginica"
Lo mismo pero utilizando el paquete Caret que nos permite cambiar facilmente el algoritmo a utilizar
library(caret)
Loading required package: ggplot2
Attaching package: ‘ggplot2’
The following object is masked from ‘package:randomForest’:
margin
Loading required package: lattice
ctrl_fast <- trainControl(method="cv",
number=5,
verboseIter=T,
classProbs=F,
allowParallel = TRUE)
train_formula<-formula(Species~.)
model_caret<- train(train_formula,
data = iris,
method = "naive_bayes",
#method = "rf",
#method = "knn",
trControl = ctrl_fast)
model_caret
model_caret$finalModel