Comenzamos creando la semilla:
set.seed(13579)
Instalamos el paquete mlbench y lo ejecutamos:
install.packages("mlbench")
library("mlbench")
Eliminamos los valores perdidos del data.frame BreastCancer:
data(BreastCancer)
BreastCancer=data.frame(na.omit(BreastCancer))
Eliminamos la primera columna del data.frame correspondiente a la columna de ID:
BreastCancer=BreastCancer[,!colnames(BreastCancer)=="Id"]
dim(BreastCancer)
## [1] 683 10
n= nrow(BreastCancer)
summary(BreastCancer)
## Cl.thickness Cell.size Cell.shape Marg.adhesion Epith.c.size
## 1 :139 1 :373 1 :346 1 :393 2 :376
## 5 :128 10 : 67 2 : 58 2 : 58 3 : 71
## 3 :104 3 : 52 10 : 58 3 : 58 4 : 48
## 4 : 79 2 : 45 3 : 53 10 : 55 1 : 44
## 10 : 69 4 : 38 4 : 43 4 : 33 6 : 40
## 2 : 50 5 : 30 5 : 32 8 : 25 5 : 39
## (Other):114 (Other): 78 (Other): 93 (Other): 61 (Other): 65
## Bare.nuclei Bl.cromatin Normal.nucleoli Mitoses Class
## 1 :402 3 :161 1 :432 1 :563 benign :444
## 10 :132 2 :160 10 : 60 2 : 35 malignant:239
## 2 : 30 1 :150 3 : 42 3 : 33
## 5 : 30 7 : 71 2 : 36 10 : 14
## 3 : 28 4 : 39 8 : 23 4 : 12
## 8 : 21 5 : 34 6 : 22 7 : 9
## (Other): 40 (Other): 68 (Other): 68 (Other): 17
indices=1:n
Realizamos la partición 70% entrenamiento - 30% test
inditest = sample(1:n, size = trunc(3*n/10))
BreastCancer.entr = BreastCancer[-inditest,]
BreastCancer.test = BreastCancer[inditest,]
install.packages("e1071")
library(e1071)
modeloNB = naiveBayes(Class ~ ., data = BreastCancer.entr)
modeloNB
##
## Naive Bayes Classifier for Discrete Predictors
##
## Call:
## naiveBayes.default(x = X, y = Y, laplace = laplace)
##
## A-priori probabilities:
## Y
## benign malignant
## 0.6430063 0.3569937
##
## Conditional probabilities:
## Cl.thickness
## Y 1 2 3 4 5
## benign 0.314935065 0.090909091 0.214285714 0.146103896 0.198051948
## malignant 0.017543860 0.017543860 0.052631579 0.052631579 0.192982456
## Cl.thickness
## Y 6 7 8 9 10
## benign 0.032467532 0.000000000 0.003246753 0.000000000 0.000000000
## malignant 0.070175439 0.076023392 0.169590643 0.046783626 0.304093567
##
## Cell.size
## Y 1 2 3 4 5
## benign 0.818181818 0.081168831 0.074675325 0.016233766 0.000000000
## malignant 0.011695906 0.029239766 0.116959064 0.099415205 0.140350877
## Cell.size
## Y 6 7 8 9 10
## benign 0.000000000 0.003246753 0.003246753 0.003246753 0.000000000
## malignant 0.087719298 0.081871345 0.116959064 0.017543860 0.298245614
##
## Cell.shape
## Y 1 2 3 4 5
## benign 0.762987013 0.120129870 0.071428571 0.029220779 0.003246753
## malignant 0.005847953 0.017543860 0.105263158 0.128654971 0.128654971
## Cell.shape
## Y 6 7 8 9 10
## benign 0.003246753 0.006493506 0.003246753 0.000000000 0.000000000
## malignant 0.093567251 0.140350877 0.099415205 0.035087719 0.245614035
##
## Marg.adhesion
## Y 1 2 3 4 5
## benign 0.811688312 0.100649351 0.058441558 0.012987013 0.006493506
## malignant 0.134502924 0.070175439 0.122807018 0.122807018 0.081871345
## Marg.adhesion
## Y 6 7 8 9 10
## benign 0.006493506 0.000000000 0.000000000 0.000000000 0.003246753
## malignant 0.087719298 0.046783626 0.087719298 0.017543860 0.228070175
##
## Epith.c.size
## Y 1 2 3 4 5
## benign 0.103896104 0.795454545 0.064935065 0.016233766 0.006493506
## malignant 0.005847953 0.099415205 0.175438596 0.169590643 0.146198830
## Epith.c.size
## Y 6 7 8 9 10
## benign 0.003246753 0.006493506 0.003246753 0.000000000 0.000000000
## malignant 0.163742690 0.035087719 0.070175439 0.005847953 0.128654971
##
## Bare.nuclei
## Y 1 2 3 4 5
## benign 0.863636364 0.048701299 0.035714286 0.012987013 0.025974026
## malignant 0.058479532 0.029239766 0.064327485 0.052631579 0.076023392
## Bare.nuclei
## Y 6 7 8 9 10
## benign 0.000000000 0.000000000 0.006493506 0.000000000 0.006493506
## malignant 0.017543860 0.035087719 0.076023392 0.029239766 0.561403509
##
## Bl.cromatin
## Y 1 2 3 4 5
## benign 0.340909091 0.331168831 0.279220779 0.016233766 0.012987013
## malignant 0.011695906 0.029239766 0.175438596 0.116959064 0.140350877
## Bl.cromatin
## Y 6 7 8 9 10
## benign 0.003246753 0.016233766 0.000000000 0.000000000 0.000000000
## malignant 0.023391813 0.263157895 0.116959064 0.035087719 0.087719298
##
## Normal.nucleoli
## Y 1 2 3 4 5
## benign 0.883116883 0.061688312 0.025974026 0.003246753 0.003246753
## malignant 0.169590643 0.035087719 0.134502924 0.058479532 0.064327485
## Normal.nucleoli
## Y 6 7 8 9 10
## benign 0.009740260 0.006493506 0.006493506 0.000000000 0.000000000
## malignant 0.076023392 0.064327485 0.081871345 0.058479532 0.257309942
##
## Mitoses
## Y 1 2 3 4 5
## benign 0.967532468 0.022727273 0.003246753 0.000000000 0.000000000
## malignant 0.526315789 0.122807018 0.140350877 0.052631579 0.029239766
## Mitoses
## Y 6 7 8 10
## benign 0.000000000 0.003246753 0.003246753 0.000000000
## malignant 0.005847953 0.029239766 0.035087719 0.058479532
preditest = predict(modeloNB,BreastCancer.test[,1:9])
confutest = table(BreastCancer.test[,10],preditest)
confutest
## preditest
## benign malignant
## benign 132 4
## malignant 1 67
cat(" Tasa de acierto test= \t",
100*(confutest[1,1]+confutest[2,2])/sum(confutest),"\n",
"Sensitividad test= \t",
100*confutest[2,2]/sum(confutest[2,]),"\n",
"Especificidad test= \t",
100*confutest[1,1]/sum(confutest[1,]) ,"\n")
## Tasa de acierto test= 97.54902
## Sensitividad test= 98.52941
## Especificidad test= 97.05882
cbind(confutest,Acierto=round(100*diag(prop.table(confutest,1)),2))
## benign malignant Acierto
## benign 132 4 97.06
## malignant 1 67 98.53
probabitest_g=modeloNB$prob[,2]
install.packages("ROCR")
install.packages("gplots")
library(ROCR)
## Loading required package: gplots
##
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
##
## lowess
probabi = predict(modeloNB,BreastCancer.test[,1:9],type="raw")[,2] #Prob. S?
prediobj = prediction(probabi,BreastCancer.test[,10])
plot(performance(prediobj, "tpr","fpr"),main="COR TEST. Naive Bayes, BreastCancer",
xlab="Tasa de falsos positivos", ylab="Tasa de verdaderos positivos")
abline(a=0,b=1,col="blue",lty=2)
auc<- as.numeric(performance(prediobj,"auc")@y.values)
legend("bottomright",legend=paste("AUC=",round(auc,3)))
cat("Área bajo la curva COR Test= ",auc,"\n")
## Área bajo la curva COR Test= 0.9940528
install.packages("kknn")
library(kknn)
(fit.train1 = train.kknn(
Class ~ ., BreastCancer.entr, kmax = 15,
kernel = c("triangular", "rectangular",
"epanechnikov", "optimal"),
distance = 2) )
##
## Call:
## train.kknn(formula = Class ~ ., data = BreastCancer.entr, kmax = 15, distance = 2, kernel = c("triangular", "rectangular", "epanechnikov", "optimal"))
##
## Type of response variable: nominal
## Minimal misclassification: 0.03549061
## Best kernel: triangular
## Best k: 3
(k=fit.train1$best.parameters$k)
## [1] 3
(nucleo=fit.train1$best.parameters$kernel)
## [1] "triangular"
modeloKKNN <- kknn(Class ~ ., BreastCancer.entr, k =k,
test=BreastCancer.test,
kernel = nucleo, distance = 2)
(confutest=table(BreastCancer.test$Class,modeloKKNN$fitted.values))
##
## benign malignant
## benign 133 3
## malignant 2 66
cat(" Tasa de acierto test= \t",
100*(confutest[1,1]+confutest[2,2])/sum(confutest),"\n",
"Sensitividad test= \t",
100*confutest[2,2]/sum(confutest[2,]),"\n",
"Especificidad test= \t",
100*confutest[1,1]/sum(confutest[1,]) ,"\n")
## Tasa de acierto test= 97.54902
## Sensitividad test= 97.05882
## Especificidad test= 97.79412
cbind(confutest,Acierto=round(100*diag(prop.table(confutest,1)),2))
## benign malignant Acierto
## benign 133 3 97.79
## malignant 2 66 97.06
probabitest_g=modeloKKNN$prob[,2]
install.packages("gplots")
install.packages("ROCR")
library(ROCR)
pred = prediction( probabitest_g, BreastCancer.test$Class)
perf = performance(pred,"tpr","fpr")
plot(perf,main="COR Test. KKNN, BreastCancer")
abline(a=0,b=1,col="blue",lty=2)
grid()
auc = as.numeric(performance(pred,"auc")@y.values)
legend("bottomright",legend=paste("AUC=",round(auc,3)))
cat("Área bajo la curva COR Test= ",auc,"\n")
## Área bajo la curva COR Test= 0.9799957
library(“mlbench”)
data(Glass) Glass=data.frame(Glass)
dim(Glass) n= nrow(Glass) summary(Glass) indices=1:n
inditest = sample(1:n, size = trunc(3*n/10)) Glass.entr = Glass[-inditest,] Glass.test = Glass[inditest,]
library(kknn)
(fit.train1 = train.kknn( Type ~ ., Glass.entr, kmax = 15, kernel = c(“triangular”, “rectangular”, “epanechnikov”, “optimal”), distance = 2) )
(k=fit.train1\(best.parameters\)k) (nucleo=fit.train1\(best.parameters\)kernel) fit.train1$distance
modeloKKNN <- kknn(Type ~ ., Glass.entr, k =k, test=Glass.test, kernel = nucleo, distance = 2)
confutest=table(Glass.test\(Type,modeloKKNN\)fitted.values) confutest
cat(" Tasa de acierto test= “, 100(confutest[1,1]+confutest[2,2])/sum(confutest),“”, “Sensitividad test= ”, 100confutest[2,2]/sum(confutest[2,]),”“,”Especificidad test= “, 100confutest[1,1]/sum(confutest[1,]) ,“”) cbind(confutest,Acierto=round(100diag(prop.table(confutest,1)),2)) probabitest_g=modeloKKNN$prob[,2]
library(ROCR) pred = prediction( probabitest_g, Glass.test$Type) perf = performance(pred,“tpr”,“fpr”) plot(perf,main=“KKNN, Glass”) abline(a=0,b=1,col=“blue”,lty=2) grid() auc = as.numeric(performance(pred,“auc”)@y.values) legend(“bottomright”,legend=paste(“AUC=”,round(auc,3))) cat(“Área bajo la curva COR Test=”,auc,“”)