Carga de librerias
library(rpart)
## Warning: package 'rpart' was built under R version 3.5.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.5.3
library(C50)
## Warning: package 'C50' was built under R version 3.5.3
library(nomclust)
## Warning: package 'nomclust' was built under R version 3.5.3
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.5.2
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.3
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.5.3
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
1) Lectura de datos
data<- read_excel("hidro con falla.xlsx")
colnames(data)
## [1] "TMRCLA" "TMA1LA" "TMA2LA" "TACCLA" "VEACLA" "VERCLA" "TMRCLOA"
## [8] "TACCLOA" "VER" "TTA" "PAA" "TAaT" "TAaC"
hist(data$PAA,col = c("red","blue"))

data<- data[data$PAA!=0, ]
hist(data$PAA,col = c("red","blue"))

knitr::kable(head(data), caption = "Datos Cargados")
Datos Cargados
| 81 |
86 |
79 |
56 |
2.4 |
2.4 |
79 |
59 |
2.4 |
54 |
128 |
48 |
41 |
| 79 |
86 |
81 |
56 |
2.4 |
2.4 |
79 |
59 |
2.4 |
54 |
128 |
48 |
41 |
| 81 |
86 |
81 |
56 |
2.6 |
2.4 |
79 |
59 |
2.4 |
54 |
128 |
48 |
41 |
| 81 |
86 |
79 |
56 |
2.4 |
2.4 |
79 |
59 |
2.4 |
54 |
128 |
48 |
41 |
| 79 |
86 |
81 |
56 |
2.4 |
2.4 |
79 |
59 |
2.4 |
54 |
128 |
48 |
41 |
| 81 |
86 |
81 |
56 |
2.6 |
2.4 |
79 |
59 |
2.4 |
54 |
128 |
48 |
41 |
a=1
2) Parametros de variables
p1<- c(80,85)
p2<- c(80,85)
p3<- c(80,85)
p4<- c(55,60)
p5<- c(2.5,3.5)
p6<- c(2.5,3.5)
p7<- c(80,85)
p8<- c(60,65)
p9<- c(2.5,3.5)
p10<- c(55,60)
p11<- c(127,130)
p12<- c(50,54)
p13<- c(42,48)
parametro=data.frame( rbind(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13))
row.names(parametro)=c("TMRCLA","TMA1LA","TMA2LA","TACCLA","VEACLA","VERCLA","TMRCLOA","TACCLOA","VER","TTA","PAA","TAaT","TAaC")
colnames(parametro)=c("Alarm","Trip")
knitr::kable(parametro, caption = "Parametros de configuracion")
Parametros de configuracion
| TMRCLA |
80.0 |
85.0 |
| TMA1LA |
80.0 |
85.0 |
| TMA2LA |
80.0 |
85.0 |
| TACCLA |
55.0 |
60.0 |
| VEACLA |
2.5 |
3.5 |
| VERCLA |
2.5 |
3.5 |
| TMRCLOA |
80.0 |
85.0 |
| TACCLOA |
60.0 |
65.0 |
| VER |
2.5 |
3.5 |
| TTA |
55.0 |
60.0 |
| PAA |
127.0 |
130.0 |
| TAaT |
50.0 |
54.0 |
| TAaC |
42.0 |
48.0 |
a=2
3) Transformacion de datos continuos a discretos
data$TMRCLA=ifelse(data$TMRCLA>=parametro["TMRCLA","Alarm"],ifelse(data$TMRCLA>=parametro["TMRCLA","Trip"],"Trip","Alarm"),"Ok")
data$TMA1LA=ifelse(data$TMA1LA>=80,ifelse(data$TMA1LA>=85,"Trip","Alarm"),"Ok")
data$TMA2LA=ifelse(data$TMA2LA>=80,ifelse(data$TMA2LA>=85,"Trip","Alarm"),"Ok")
data$TACCLA=ifelse(data$TACCLA>=55,ifelse(data$TACCLA>=60,"Trip","Alarm"),"Ok")
data$VEACLA=ifelse(data$VEACLA>=2.5,ifelse(data$VEACLA>=3.5,"Trip","Alarm"),"Ok")
data$VERCLA=ifelse(data$VERCLA>=2.5,ifelse(data$VERCLA>=3.5,"Trip","Alarm"),"Ok")
data$TMRCLOA=ifelse(data$TMRCLOA>=80,ifelse(data$TMRCLOA>=85,"Trip","Alarm"),"Ok")
data$TACCLOA=ifelse(data$TACCLOA>=60,ifelse(data$TACCLOA>=65,"Trip","Alarm"),"Ok")
data$VER=ifelse(data$VER>=2.5,ifelse(data$VER>=3.5,"Trip","Alarm"),"Ok")
data$TTA=ifelse(data$TTA>=55,ifelse(data$TTA>=60,"Trip","Alarm"),"Ok")
data$PAA=ifelse(data$PAA<=130,ifelse(data$PAA<=127,"Low Trip","Low Alarm"),ifelse(data$PAA>=150,ifelse(data$PAA>=157,"High Trip","Alarm High"),"Ok"))
data$TAaT=ifelse(data$TAaT>=50,ifelse(data$TAaT>=54,"Trip","Alarm"),"Ok")
data$TAaC=ifelse(data$TAaC>=42,ifelse(data$TAaC>=48,"Trip","Alarm"),"Ok")
knitr::kable(head(data), caption = "Datos discretizados")
Datos discretizados
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
a=3
4) evaluacion de eventos registrados con fallas y sin fallas
data$evento=ifelse(data$TMRCLA=="Trip"|data$TMA1LA=="Trip"| data$TMA2LA=="Trip"| data$TACCLA=="Trip"| data$VEACLA=="Trip"| data$VERCLA=="Trip"| data$TMRCLOA=="Trip" | data$TACCLOA=="Trip"| data$VER=="Trip"| data$TTA=="Trip"| data$PAA=="Low Trip"|data$PAA=="High Trip"| data$TAaT=="Trip"| data$TAaC=="Trip", "EN FALLA","SIN FALLA")
Trips=data[data$evento=="EN FALLA",]
Trips$evento<=NULL
## logical(0)
knitr::kable(head(data), caption = "Datos discretizados")
Datos discretizados
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
knitr::kable(head(Trips[,1:14]), caption = "Trips del sistema")
Trips del sistema
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
EN FALLA |
barplot(prop.table(table(Trips$PAA)), col = c("blue","green","orange","red"))

write.csv(Trips,"Trips.cvs")
write.csv(data,"Datos discretos.cvs")
a<-1
5) Normalizacion de los Estados
Trips<- Trips
Tabla<- Trips
Tabla$TMRCLAok<- ifelse(Trips$TMRCLA == "Ok", 1, 0)
Tabla$TMRCLAAlarm<- ifelse(Trips$TMRCLA == "Alarm", 1, 0)
Tabla$TMRCLATrip<- ifelse(Trips$TMRCLA == "Trip", 1, 0)
Tabla$TMA1LAok<- ifelse(Trips$TMA1LA == "Ok", 1, 0)
Tabla$TMA1LAAlarm<- ifelse(Trips$TMA1LA == "Alarm", 1, 0)
Tabla$TMA1LATrip<- ifelse(Trips$TMA1LA == "Trip", 1, 0)
Tabla$TMA2LAok<- ifelse(Trips$TMA2LA == "Ok", 1, 0)
Tabla$TMA2LAAlarm<- ifelse(Trips$TMA2LA == "Alarm", 1, 0)
Tabla$TMA2LATrip<- ifelse(Trips$TMA2LA == "Trip", 1, 0)
Tabla$TACCLAok<- ifelse(Trips$TACCLA == "Ok", 1, 0)
Tabla$TACCLAAlarm<- ifelse(Trips$TACCLA == "Alarm", 1, 0)
Tabla$TACCLATrip<- ifelse(Trips$TACCLA == "Trip", 1, 0)
Tabla$VEACLAok <- ifelse(Trips$VEACLA == "Ok", 1, 0)
Tabla$VEACLAAlarm<- ifelse(Trips$VEACLA == "Alarm", 1, 0)
Tabla$VEACLATrip<- ifelse(Trips$VEACLA == "Trip", 1, 0)
Tabla$VERCLAok <- ifelse(Trips$VERCLA == "Ok", 1, 0)
Tabla$VERCLAAlarm<- ifelse(Trips$VERCLA == "Alarm", 1, 0)
Tabla$VERCLATrip<- ifelse(Trips$VERCLA == "Trip", 1, 0)
Tabla$TMRCLOAok <- ifelse(Trips$TMRCLOA == "Ok", 1, 0)
Tabla$TMRCLOAAlarm<- ifelse(Trips$TMRCLOA == "Alarm", 1, 0)
Tabla$TMRCLOATrip<- ifelse(Trips$TMRCLOA == "Trip", 1, 0)
Tabla$TACCLOAok <- ifelse(Trips$TACCLOA == "Ok", 1, 0)
Tabla$TACCLOAAlarm<- ifelse(Trips$TACCLOA == "Alarm", 1, 0)
Tabla$TACCLOATrip<- ifelse(Trips$TACCLOA == "Trip", 1, 0)
Tabla$VERok <- ifelse(Trips$VER == "Ok", 1, 0)
Tabla$VERAlarm<- ifelse(Trips$VER == "Alarm", 1, 0)
Tabla$VERTrip<- ifelse(Trips$VER == "Trip", 1, 0)
Tabla$TTAok <- ifelse(Trips$TTA == "Ok", 1, 0)
Tabla$TTAAlarm<- ifelse(Trips$TTA == "Alarm", 1, 0)
Tabla$TTATrip<- ifelse(Trips$TTA == "Trip", 1, 0)
Tabla$PAAok <- ifelse(Trips$PAA == "Ok", 1, 0)
Tabla$PAAAlarmB<- ifelse(Trips$PAA == "Low Alarm", 1, 0)
Tabla$PAAAlarmA<- ifelse(Trips$PAA == "High Alarm", 1, 0)
Tabla$PAATripB<- ifelse(Trips$PAA == "Low Trip", 1, 0)
Tabla$PAATripA<- ifelse(Trips$PAA == "High Trip", 1, 0)
Tabla$TAaTok <- ifelse(Trips$TAaT == "Ok", 1, 0)
Tabla$TAaTAlarm<- ifelse(Trips$TAaT == "Alarm", 1, 0)
Tabla$TAaTTrip<- ifelse(Trips$TAaT == "Trip", 1, 0)
Tabla$TAaCok <- ifelse(Trips$TAaC == "Ok", 1, 0)
Tabla$TAaCAlarm<- ifelse(Trips$TAaC == "Alarm", 1, 0)
Tabla$TAaCTrip<- ifelse(Trips$TAaC == "Trip", 1, 0)
Datos_para_cluster<- Tabla[, -c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)]
kable( head(Datos_para_cluster[,c(1,2,3,4,5,6,7,8)]))
| 0 |
1 |
0 |
0 |
0 |
1 |
1 |
0 |
| 1 |
0 |
0 |
0 |
0 |
1 |
0 |
1 |
| 0 |
1 |
0 |
0 |
0 |
1 |
0 |
1 |
| 0 |
1 |
0 |
0 |
0 |
1 |
1 |
0 |
| 1 |
0 |
0 |
0 |
0 |
1 |
0 |
1 |
| 0 |
1 |
0 |
0 |
0 |
1 |
0 |
1 |
a<-1
5) Determinar Numero optimo de Cluster
library(ggplot2)
sumbt<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$betweenss
sumbt2<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$tot.withinss
for(i in 1:15) sumbt[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$betweenss
for(i in 1:15) sumbt2[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$tot.withinss
plot(1:15, sumbt, type="o", col="blue", lwd=1, main="Optimal Number of Cluster", xlab="Number of cluster", ylab="Distance",las=1, col.axis="black")
lines(1:15, sumbt2 ,type="o", col="green", lwd=1)
legend("bottomleft",col=c("blue","green"),legend =c("Betweenss cluster","Withinss cluster"), lwd=2, bty = "n", inset = 0.6)
points(1:15, sumbt, pch = 21, bg = "white")
points(1:15, sumbt2, pch = 21, bg = "white")
abline(v = 9, col="red", lwd=2, lty=2)

a<-1
6) Cluster Jerarquico
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 3.5.3
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.5.2
library(dendextend)
## Warning: package 'dendextend' was built under R version 3.5.3
##
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggdendro':
##
## theme_dendro
## The following object is masked from 'package:ggpubr':
##
## rotate
## The following object is masked from 'package:rpart':
##
## prune
## The following object is masked from 'package:stats':
##
## cutree
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.5.3
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(grDevices)
Trips_1<-Trips[,c(1:13)]
scalada<- scale(Datos_para_cluster)
distancia_scalada<- dist(scalada, method = 'euclidean')
ch_scalada<- hclust(distancia_scalada, method = 'ward.D')
nk<-9
h12<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "ward.D")
h13<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "single")
h14<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "complete")
h15<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "average")
h21<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "ward.D2")
h22<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "ward.D")
h23<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "single")
h24<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "complete")
h25<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "average")
h31<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "ward.D2")
h32<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "ward.D")
h33<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "single")
h34<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "complete")
h35<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "average")
a<-1
h<-h24
#Cluster en 2 dimensiones
plot(fviz_cluster(h, geom = 'point', show.clust.cent = FALSE, main = "distance function euclidean, grouping method Complete",pointsize = 1, xlab = "Dimension 1", ylab = "Dimension 2"))

#Cluster Jerarquico
plot(fviz_dend(h, rect = TRUE, k = 9, show_labels = FALSE, xlab ="Clusters", lwd = 0.1, main = "distance function euclidean, grouping method Complete"))

a<-1
7) Crear Cluster
set.seed(80)
distancia_scalada<- dist(scalada, method = 'euclidean')
cluster<- hcut(distancia_scalada, k=9, stand = TRUE, hc_func ="agnes", hc_method = "complete")
Trips$cluster <- cluster$cluster
a<-1
8) Organizar Datos despues del cluster y asignar etiquetas
cluster1<- Trips[ cluster$cluster==1,]
cluster2<- Trips[ cluster$cluster==2,]
cluster3<- Trips[ cluster$cluster==3,]
cluster4<- Trips[ cluster$cluster==4,]
cluster5<- Trips[ cluster$cluster==5,]
cluster6<- Trips[ cluster$cluster==6,]
cluster7<- Trips[ cluster$cluster==7,]
cluster8<- Trips[ cluster$cluster==8,]
cluster9<- Trips[ cluster$cluster==9,]
cluster10<- Trips[ cluster$cluster==10,]
datos_listos<- Trips[-c(14)]
datos_listos$Caso<- ifelse(cluster$cluster==1, "Caso 1",
ifelse(cluster$cluster==2, "Caso 2",
ifelse(cluster$cluster==3, "Caso 3",
ifelse(cluster$cluster==4, "Caso 4",
ifelse(cluster$cluster==5, "Caso 5",
ifelse(cluster$cluster==6, "Caso 6",
ifelse(cluster$cluster==7, "Caso 7",
ifelse(cluster$cluster==8, "Caso 8",
ifelse(cluster$cluster==9, "Caso 9",
ifelse(cluster$cluster==10, "Caso 10",NaN))))))))))
kable( datos_listos[c(1:5),])
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
1 |
Caso 1 |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
1 |
Caso 1 |
| Alarm |
Trip |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
1 |
Caso 1 |
| Alarm |
Trip |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
1 |
Caso 1 |
| Ok |
Trip |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
1 |
Caso 1 |
a<-1
##Quitar datos duplicados
cluster1<-unique(cluster1)
cluster2<-unique(cluster2)
cluster3<-unique(cluster3)
cluster4<-unique(cluster4)
cluster5<-unique(cluster5)
cluster6<-unique(cluster6)
cluster7<-unique(cluster7)
cluster8<-unique(cluster8)
cluster9<-unique(cluster9)
9) Preparar Set de entrenamiento y test para la red Bayesiana
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 1
n=nrow(cluster1)
seleccion<-sample(n, floor(0.3*n))
Test_C1<- cluster1[seleccion,-14]
Test_C1$Caso<- "Caso 1"
Train_C1<- cluster1[-c(seleccion),-14]
Train_C1$Caso<- "Caso 1"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 2
n=nrow(cluster2)
seleccion<-sample(n, floor(0.3*n))
Test_C2<- cluster2[seleccion,-14]
Test_C2$Caso<- "Caso 2"
Train_C2<- cluster2[-c(seleccion),-14]
Train_C2$Caso<- "Caso 2"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 3
n=nrow(cluster3)
seleccion<-sample(n, floor(0.3*n))
Test_C3<- cluster3[seleccion,-14]
Test_C3$Caso<- "Caso 3"
Train_C3<- cluster3[-c(seleccion),-14]
Train_C3$Caso<- "Caso 3"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 4
n=nrow(cluster4)
seleccion<-sample(n, floor(0.3*n))
Test_C4<- cluster4[seleccion,-14]
Test_C4$Caso<- "Caso 4"
Train_C4<- cluster4[-c(seleccion),-14]
Train_C4$Caso<- "Caso 4"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 5
n=nrow(cluster5)
seleccion<-sample(n, floor(0.3*n))
Test_C5<- cluster5[seleccion,-14]
Test_C5$Caso<- "Caso 5"
Train_C5<- cluster5[-c(seleccion),-14]
Train_C5$Caso<- "Caso 5"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 6
n=nrow(cluster6)
seleccion<-sample(n, floor(0.3*n))
Test_C6<- cluster6[seleccion,-14]
Test_C6$Caso<- "Caso 6"
Train_C6<- cluster6[-c(seleccion),-14]
Train_C6$Caso<- "Caso 6"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 7
n=nrow(cluster7)
seleccion<-sample(n, floor(0.3*n))
Test_C7<- cluster7[seleccion,-14]
Test_C7$Caso<- "Caso 7"
Train_C7<- cluster7[-c(seleccion),-14]
Train_C7$Caso<- "Caso 7"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 8
n=nrow(cluster8)
seleccion<-sample(n, floor(0.3*n))
Test_C8<- cluster8[seleccion,-14]
Test_C8$Caso<- "Caso 8"
Train_C8<- cluster8[-c(seleccion),-14]
Train_C8$Caso<- "Caso 8"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 9
n=nrow(cluster9)
seleccion<-sample(n, floor(0.3*n))
Test_C9<- cluster9[seleccion,-14]
Test_C9$Caso<- "Caso 9"
Train_C9<- cluster9[-c(seleccion),-14]
Train_C9$Caso<- "Caso 9"
#se agregan las particiones creadas al dataframe Train y Test
Test<- rbind(Test_C1,Test_C2)
Test<- rbind(Test,Test_C3)
Test<- rbind(Test,Test_C4)
Test<- rbind(Test,Test_C5)
Test<- rbind(Test,Test_C6)
Test<- rbind(Test,Test_C7)
Test<- rbind(Test,Test_C8)
Test<- rbind(Test,Test_C9)
Test<- unique(Test)
Train<- rbind(Train_C1,Train_C2)
Train<- rbind(Train,Train_C3)
Train<- rbind(Train,Train_C4)
Train<- rbind(Train,Train_C5)
Train<- rbind(Train,Train_C6)
Train<- rbind(Train,Train_C7)
Train<- rbind(Train,Train_C8)
Train<- rbind(Train,Train_C9)
a<-1
10) Crear modelo de Naive Bayes
modelo <- naive_bayes(Caso ~ ., data = Train)
kable( modelo$prior)
| Caso 1 |
0.0625000 |
| Caso 2 |
0.1250000 |
| Caso 3 |
0.1339286 |
| Caso 4 |
0.1250000 |
| Caso 5 |
0.0892857 |
| Caso 6 |
0.1339286 |
| Caso 7 |
0.0446429 |
| Caso 8 |
0.1160714 |
| Caso 9 |
0.1696429 |
kable(modelo$tables$TMRCLA)
| Alarm |
0.8571429 |
0.8571429 |
0 |
1 |
0 |
0.8 |
0 |
0 |
0 |
| Ok |
0.1428571 |
0.1428571 |
1 |
0 |
1 |
0.2 |
1 |
1 |
1 |
kable(modelo$tables$TMA1LA)
| Alarm |
0 |
0.8571429 |
0 |
1 |
0 |
0.8 |
0 |
0.3076923 |
0.2631579 |
| Ok |
0 |
0.1428571 |
1 |
0 |
1 |
0.2 |
1 |
0.6923077 |
0.7368421 |
| Trip |
1 |
0.0000000 |
0 |
0 |
0 |
0.0 |
0 |
0.0000000 |
0.0000000 |
kable(modelo$tables$TMA2LA)
| Alarm |
0.7142857 |
0.9285714 |
0 |
0.8571429 |
0 |
0.8666667 |
0 |
0 |
0.3157895 |
| Ok |
0.2857143 |
0.0714286 |
1 |
0.1428571 |
1 |
0.1333333 |
1 |
1 |
0.6842105 |
kable(modelo$tables$TACCLA)
| Alarm |
0.5714286 |
0.9285714 |
0.2666667 |
1 |
0 |
0 |
0 |
0 |
0 |
| Ok |
0.4285714 |
0.0714286 |
0.7333333 |
0 |
1 |
1 |
1 |
1 |
1 |
kable(modelo$tables$VEACLA)
| Alarm |
0.1428571 |
0.2142857 |
0 |
1 |
0.4 |
0 |
0 |
1 |
0.2631579 |
| Ok |
0.8571429 |
0.7857143 |
1 |
0 |
0.6 |
1 |
1 |
0 |
0.7368421 |
kable(modelo$tables$VERCLA)
| Alarm |
0 |
0 |
0 |
1 |
0.4 |
0 |
0 |
0.4615385 |
0 |
| Ok |
1 |
1 |
1 |
0 |
0.6 |
1 |
1 |
0.5384615 |
1 |
kable(modelo$tables$TMRCLOA)
| Alarm |
0 |
0 |
0 |
0.3571429 |
0 |
1 |
0 |
0 |
0 |
| Ok |
1 |
1 |
1 |
0.6428571 |
1 |
0 |
1 |
1 |
0 |
| Trip |
0 |
0 |
0 |
0.0000000 |
0 |
0 |
0 |
0 |
1 |
kable(modelo$tables$TACCLOA)
| Alarm |
0 |
1 |
0 |
1 |
0 |
1 |
0 |
0 |
1 |
| Ok |
1 |
0 |
1 |
0 |
1 |
0 |
1 |
1 |
0 |
kable(modelo$tables$VER)
| Alarm |
0 |
0.0714286 |
0 |
1 |
0.8 |
0 |
0 |
0 |
1 |
| Ok |
1 |
0.9285714 |
1 |
0 |
0.2 |
1 |
1 |
0 |
0 |
| Trip |
0 |
0.0000000 |
0 |
0 |
0.0 |
0 |
0 |
1 |
0 |
kable(modelo$tables$TTA)
| Alarm |
0 |
0 |
0 |
0 |
1 |
0.0666667 |
0.2 |
0 |
0 |
| Ok |
1 |
1 |
1 |
1 |
0 |
0.9333333 |
0.8 |
1 |
1 |
kable(modelo$tables$PAA)
| Alarm High |
0 |
0 |
0 |
0 |
0 |
0 |
0.4 |
0 |
0 |
| High Trip |
0 |
0 |
0 |
1 |
0 |
0 |
0.0 |
0 |
0 |
| Low Alarm |
1 |
0 |
0 |
0 |
0 |
0 |
0.0 |
0 |
1 |
| Low Trip |
0 |
0 |
1 |
0 |
1 |
1 |
0.0 |
0 |
0 |
| Ok |
0 |
1 |
0 |
0 |
0 |
0 |
0.6 |
1 |
0 |
kable(modelo$tables$TAaT)
| Alarm |
0 |
1 |
0.2666667 |
0 |
1 |
1 |
0 |
0 |
0 |
| Ok |
1 |
0 |
0.7333333 |
1 |
0 |
0 |
0 |
1 |
1 |
| Trip |
0 |
0 |
0.0000000 |
0 |
0 |
0 |
1 |
0 |
0 |
kable(modelo$tables$TAaC)
| Alarm |
0 |
0 |
0.4666667 |
0 |
0 |
1 |
0.8 |
0 |
0 |
| Ok |
1 |
0 |
0.5333333 |
1 |
1 |
0 |
0.2 |
1 |
1 |
| Trip |
0 |
1 |
0.0000000 |
0 |
0 |
0 |
0.0 |
0 |
0 |
a<-4
11) Prueba completa del set de test
como el sistema de identificacion de fallas funciono correctamente con una observacion del set de Test se procede a evaluar la totalidad de dicho set
set.seed(80)
pred <- predict(modelo, Test, threshold = 100)
pred
## [1] Caso 1 Caso 1 Caso 1 Caso 2 Caso 2 Caso 2 Caso 2 Caso 3 Caso 3 Caso 3
## [11] Caso 3 Caso 4 Caso 4 Caso 4 Caso 1 Caso 5 Caso 5 Caso 6 Caso 6 Caso 6
## [21] Caso 7 Caso 7 Caso 8 Caso 8 Caso 8 Caso 9 Caso 9 Caso 1 Caso 9
## 9 Levels: Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 ... Caso 9
tab <- table(Test$Caso, pred, dnn = c("Actual", "Predicha"))
kable(tab)
| Caso 1 |
3 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| Caso 2 |
0 |
4 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| Caso 3 |
0 |
0 |
4 |
0 |
0 |
0 |
0 |
0 |
0 |
| Caso 4 |
0 |
0 |
0 |
3 |
0 |
0 |
0 |
0 |
0 |
| Caso 5 |
1 |
0 |
0 |
0 |
2 |
0 |
0 |
0 |
0 |
| Caso 6 |
0 |
0 |
0 |
0 |
0 |
3 |
0 |
0 |
0 |
| Caso 7 |
0 |
0 |
0 |
0 |
0 |
0 |
2 |
0 |
0 |
| Caso 8 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
3 |
0 |
| Caso 9 |
1 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
3 |
a<-confusionMatrix(tab)
a
## Confusion Matrix and Statistics
##
## Predicha
## Actual Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
## Caso 1 3 0 0 0 0 0 0 0 0
## Caso 2 0 4 0 0 0 0 0 0 0
## Caso 3 0 0 4 0 0 0 0 0 0
## Caso 4 0 0 0 3 0 0 0 0 0
## Caso 5 1 0 0 0 2 0 0 0 0
## Caso 6 0 0 0 0 0 3 0 0 0
## Caso 7 0 0 0 0 0 0 2 0 0
## Caso 8 0 0 0 0 0 0 0 3 0
## Caso 9 1 0 0 0 0 0 0 0 3
##
## Overall Statistics
##
## Accuracy : 0.931
## 95% CI : (0.7723, 0.9915)
## No Information Rate : 0.1724
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9221
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Caso 1 Class: Caso 2 Class: Caso 3
## Sensitivity 0.6000 1.0000 1.0000
## Specificity 1.0000 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000 1.0000
## Neg Pred Value 0.9231 1.0000 1.0000
## Prevalence 0.1724 0.1379 0.1379
## Detection Rate 0.1034 0.1379 0.1379
## Detection Prevalence 0.1034 0.1379 0.1379
## Balanced Accuracy 0.8000 1.0000 1.0000
## Class: Caso 4 Class: Caso 5 Class: Caso 6
## Sensitivity 1.0000 1.00000 1.0000
## Specificity 1.0000 0.96296 1.0000
## Pos Pred Value 1.0000 0.66667 1.0000
## Neg Pred Value 1.0000 1.00000 1.0000
## Prevalence 0.1034 0.06897 0.1034
## Detection Rate 0.1034 0.06897 0.1034
## Detection Prevalence 0.1034 0.10345 0.1034
## Balanced Accuracy 1.0000 0.98148 1.0000
## Class: Caso 7 Class: Caso 8 Class: Caso 9
## Sensitivity 1.00000 1.0000 1.0000
## Specificity 1.00000 1.0000 0.9615
## Pos Pred Value 1.00000 1.0000 0.7500
## Neg Pred Value 1.00000 1.0000 1.0000
## Prevalence 0.06897 0.1034 0.1034
## Detection Rate 0.06897 0.1034 0.1034
## Detection Prevalence 0.06897 0.1034 0.1379
## Balanced Accuracy 1.00000 1.0000 0.9808
#a$byClass