Carga de librerias

library(rpart)
## Warning: package 'rpart' was built under R version 3.5.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.5.3
library(C50)
## Warning: package 'C50' was built under R version 3.5.3
library(nomclust)
## Warning: package 'nomclust' was built under R version 3.5.3
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.5.2
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.3
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.5.3
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice

1) Lectura de datos

data<- read_excel("hidro con falla.xlsx")
colnames(data)
##  [1] "TMRCLA"  "TMA1LA"  "TMA2LA"  "TACCLA"  "VEACLA"  "VERCLA"  "TMRCLOA"
##  [8] "TACCLOA" "VER"     "TTA"     "PAA"     "TAaT"    "TAaC"
hist(data$PAA,col = c("red","blue"))

data<- data[data$PAA!=0, ]
hist(data$PAA,col = c("red","blue"))

knitr::kable(head(data), caption = "Datos Cargados")
Datos Cargados
TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA PAA TAaT TAaC
81 86 79 56 2.4 2.4 79 59 2.4 54 128 48 41
79 86 81 56 2.4 2.4 79 59 2.4 54 128 48 41
81 86 81 56 2.6 2.4 79 59 2.4 54 128 48 41
81 86 79 56 2.4 2.4 79 59 2.4 54 128 48 41
79 86 81 56 2.4 2.4 79 59 2.4 54 128 48 41
81 86 81 56 2.6 2.4 79 59 2.4 54 128 48 41
a=1

2) Parametros de variables

p1<- c(80,85)
p2<- c(80,85)
p3<- c(80,85)
p4<- c(55,60)
p5<- c(2.5,3.5)
p6<- c(2.5,3.5)
p7<- c(80,85)
p8<- c(60,65)
p9<- c(2.5,3.5)
p10<- c(55,60)
p11<- c(127,130)
p12<- c(50,54)
p13<- c(42,48)
parametro=data.frame( rbind(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13))
row.names(parametro)=c("TMRCLA","TMA1LA","TMA2LA","TACCLA","VEACLA","VERCLA","TMRCLOA","TACCLOA","VER","TTA","PAA","TAaT","TAaC")
colnames(parametro)=c("Alarm","Trip")
knitr::kable(parametro, caption = "Parametros de configuracion")
Parametros de configuracion
Alarm Trip
TMRCLA 80.0 85.0
TMA1LA 80.0 85.0
TMA2LA 80.0 85.0
TACCLA 55.0 60.0
VEACLA 2.5 3.5
VERCLA 2.5 3.5
TMRCLOA 80.0 85.0
TACCLOA 60.0 65.0
VER 2.5 3.5
TTA 55.0 60.0
PAA 127.0 130.0
TAaT 50.0 54.0
TAaC 42.0 48.0
a=2

3) Transformacion de datos continuos a discretos

data$TMRCLA=ifelse(data$TMRCLA>=parametro["TMRCLA","Alarm"],ifelse(data$TMRCLA>=parametro["TMRCLA","Trip"],"Trip","Alarm"),"Ok")
data$TMA1LA=ifelse(data$TMA1LA>=80,ifelse(data$TMA1LA>=85,"Trip","Alarm"),"Ok")
data$TMA2LA=ifelse(data$TMA2LA>=80,ifelse(data$TMA2LA>=85,"Trip","Alarm"),"Ok")
data$TACCLA=ifelse(data$TACCLA>=55,ifelse(data$TACCLA>=60,"Trip","Alarm"),"Ok")
data$VEACLA=ifelse(data$VEACLA>=2.5,ifelse(data$VEACLA>=3.5,"Trip","Alarm"),"Ok")
data$VERCLA=ifelse(data$VERCLA>=2.5,ifelse(data$VERCLA>=3.5,"Trip","Alarm"),"Ok")
data$TMRCLOA=ifelse(data$TMRCLOA>=80,ifelse(data$TMRCLOA>=85,"Trip","Alarm"),"Ok")
data$TACCLOA=ifelse(data$TACCLOA>=60,ifelse(data$TACCLOA>=65,"Trip","Alarm"),"Ok")
data$VER=ifelse(data$VER>=2.5,ifelse(data$VER>=3.5,"Trip","Alarm"),"Ok")
data$TTA=ifelse(data$TTA>=55,ifelse(data$TTA>=60,"Trip","Alarm"),"Ok")
data$PAA=ifelse(data$PAA<=130,ifelse(data$PAA<=127,"Low Trip","Low Alarm"),ifelse(data$PAA>=150,ifelse(data$PAA>=157,"High Trip","Alarm High"),"Ok"))
data$TAaT=ifelse(data$TAaT>=50,ifelse(data$TAaT>=54,"Trip","Alarm"),"Ok")
data$TAaC=ifelse(data$TAaC>=42,ifelse(data$TAaC>=48,"Trip","Alarm"),"Ok")
knitr::kable(head(data), caption = "Datos discretizados")
Datos discretizados
TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA PAA TAaT TAaC
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok
a=3

4) evaluacion de eventos registrados con fallas y sin fallas

data$evento=ifelse(data$TMRCLA=="Trip"|data$TMA1LA=="Trip"| data$TMA2LA=="Trip"| data$TACCLA=="Trip"| data$VEACLA=="Trip"| data$VERCLA=="Trip"| data$TMRCLOA=="Trip" | data$TACCLOA=="Trip"| data$VER=="Trip"| data$TTA=="Trip"| data$PAA=="Low Trip"|data$PAA=="High Trip"| data$TAaT=="Trip"| data$TAaC=="Trip", "EN FALLA","SIN FALLA")
Trips=data[data$evento=="EN FALLA",]
Trips$evento<=NULL
## logical(0)
knitr::kable(head(data), caption = "Datos discretizados")
Datos discretizados
TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA PAA TAaT TAaC evento
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
knitr::kable(head(Trips[,1:14]), caption = "Trips del sistema")
Trips del sistema
TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA PAA TAaT TAaC evento
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok EN FALLA
barplot(prop.table(table(Trips$PAA)), col = c("blue","green","orange","red"))

write.csv(Trips,"Trips.cvs")
write.csv(data,"Datos discretos.cvs")
a<-1

5) Normalizacion de los Estados

  Trips<- Trips
  Tabla<- Trips
Tabla$TMRCLAok<- ifelse(Trips$TMRCLA == "Ok", 1, 0)
Tabla$TMRCLAAlarm<- ifelse(Trips$TMRCLA == "Alarm", 1, 0)
Tabla$TMRCLATrip<- ifelse(Trips$TMRCLA == "Trip", 1, 0)

Tabla$TMA1LAok<- ifelse(Trips$TMA1LA == "Ok", 1, 0)
Tabla$TMA1LAAlarm<- ifelse(Trips$TMA1LA == "Alarm", 1, 0)
Tabla$TMA1LATrip<- ifelse(Trips$TMA1LA == "Trip", 1, 0)

Tabla$TMA2LAok<- ifelse(Trips$TMA2LA == "Ok", 1, 0)
Tabla$TMA2LAAlarm<- ifelse(Trips$TMA2LA == "Alarm", 1, 0)
Tabla$TMA2LATrip<- ifelse(Trips$TMA2LA == "Trip", 1, 0)

Tabla$TACCLAok<- ifelse(Trips$TACCLA == "Ok", 1, 0)
Tabla$TACCLAAlarm<- ifelse(Trips$TACCLA == "Alarm", 1, 0)
Tabla$TACCLATrip<- ifelse(Trips$TACCLA == "Trip", 1, 0)

Tabla$VEACLAok <- ifelse(Trips$VEACLA == "Ok", 1, 0)
Tabla$VEACLAAlarm<- ifelse(Trips$VEACLA == "Alarm", 1, 0)
Tabla$VEACLATrip<- ifelse(Trips$VEACLA == "Trip", 1, 0)

Tabla$VERCLAok <- ifelse(Trips$VERCLA == "Ok", 1, 0)
Tabla$VERCLAAlarm<- ifelse(Trips$VERCLA == "Alarm", 1, 0)
Tabla$VERCLATrip<- ifelse(Trips$VERCLA == "Trip", 1, 0)

Tabla$TMRCLOAok <- ifelse(Trips$TMRCLOA == "Ok", 1, 0)
Tabla$TMRCLOAAlarm<- ifelse(Trips$TMRCLOA == "Alarm", 1, 0)
Tabla$TMRCLOATrip<- ifelse(Trips$TMRCLOA == "Trip", 1, 0)

Tabla$TACCLOAok <- ifelse(Trips$TACCLOA == "Ok", 1, 0)
Tabla$TACCLOAAlarm<- ifelse(Trips$TACCLOA == "Alarm", 1, 0)
Tabla$TACCLOATrip<- ifelse(Trips$TACCLOA == "Trip", 1, 0)

Tabla$VERok <- ifelse(Trips$VER == "Ok", 1, 0)
Tabla$VERAlarm<- ifelse(Trips$VER == "Alarm", 1, 0)
Tabla$VERTrip<- ifelse(Trips$VER == "Trip", 1, 0)

Tabla$TTAok <- ifelse(Trips$TTA == "Ok", 1, 0)
Tabla$TTAAlarm<- ifelse(Trips$TTA == "Alarm", 1, 0)
Tabla$TTATrip<- ifelse(Trips$TTA == "Trip", 1, 0)

Tabla$PAAok <- ifelse(Trips$PAA == "Ok", 1, 0)
Tabla$PAAAlarmB<- ifelse(Trips$PAA == "Low Alarm", 1, 0)
Tabla$PAAAlarmA<- ifelse(Trips$PAA == "High Alarm", 1, 0)
Tabla$PAATripB<- ifelse(Trips$PAA == "Low Trip", 1, 0)
Tabla$PAATripA<- ifelse(Trips$PAA == "High Trip", 1, 0)

Tabla$TAaTok <- ifelse(Trips$TAaT == "Ok", 1, 0)
Tabla$TAaTAlarm<- ifelse(Trips$TAaT == "Alarm", 1, 0)
Tabla$TAaTTrip<- ifelse(Trips$TAaT == "Trip", 1, 0)

Tabla$TAaCok <- ifelse(Trips$TAaC == "Ok", 1, 0)
Tabla$TAaCAlarm<- ifelse(Trips$TAaC == "Alarm", 1, 0)
Tabla$TAaCTrip<- ifelse(Trips$TAaC == "Trip", 1, 0)

Datos_para_cluster<- Tabla[, -c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)]

kable( head(Datos_para_cluster[,c(1,2,3,4,5,6,7,8)]))
TMRCLAok TMRCLAAlarm TMRCLATrip TMA1LAok TMA1LAAlarm TMA1LATrip TMA2LAok TMA2LAAlarm
0 1 0 0 0 1 1 0
1 0 0 0 0 1 0 1
0 1 0 0 0 1 0 1
0 1 0 0 0 1 1 0
1 0 0 0 0 1 0 1
0 1 0 0 0 1 0 1
a<-1

5) Determinar Numero optimo de Cluster

library(ggplot2)
sumbt<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$betweenss
sumbt2<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$tot.withinss
for(i in 1:15) sumbt[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$betweenss
for(i in 1:15) sumbt2[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$tot.withinss

plot(1:15, sumbt, type="o", col="blue", lwd=1, main="Optimal Number of Cluster", xlab="Number of cluster", ylab="Distance",las=1, col.axis="black")
lines(1:15, sumbt2 ,type="o", col="green", lwd=1) 
legend("bottomleft",col=c("blue","green"),legend =c("Betweenss cluster","Withinss cluster"), lwd=2, bty = "n",  inset = 0.6)
points(1:15, sumbt, pch = 21, bg = "white")
points(1:15, sumbt2, pch = 21, bg = "white")
abline(v = 9, col="red", lwd=2, lty=2)

a<-1

6) Cluster Jerarquico

library(ggdendro)
## Warning: package 'ggdendro' was built under R version 3.5.3
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.5.2
library(dendextend)
## Warning: package 'dendextend' was built under R version 3.5.3
## 
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggdendro':
## 
##     theme_dendro
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:rpart':
## 
##     prune
## The following object is masked from 'package:stats':
## 
##     cutree
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.5.3
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(grDevices)


Trips_1<-Trips[,c(1:13)]
scalada<- scale(Datos_para_cluster)
distancia_scalada<- dist(scalada, method = 'euclidean')
ch_scalada<- hclust(distancia_scalada, method = 'ward.D')
nk<-9


h12<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "ward.D")
h13<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "single")
h14<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "complete")
h15<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="diana", hc_method = "average")
h21<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "ward.D2")
h22<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "ward.D")
h23<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "single")
h24<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "complete")
h25<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="agnes", hc_method = "average")
h31<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "ward.D2")
h32<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "ward.D")
h33<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "single")
h34<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "complete")
h35<- hcut(distancia_scalada, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "average")
a<-1
h<-h24
#Cluster en 2 dimensiones
plot(fviz_cluster(h, geom = 'point', show.clust.cent = FALSE, main = "distance function euclidean, grouping method Complete",pointsize = 1, xlab = "Dimension 1", ylab = "Dimension 2"))

#Cluster Jerarquico
plot(fviz_dend(h, rect = TRUE, k = 9, show_labels = FALSE, xlab ="Clusters", lwd = 0.1, main = "distance function euclidean, grouping method Complete"))

a<-1

7) Crear Cluster

set.seed(80)

distancia_scalada<- dist(scalada, method = 'euclidean')
cluster<- hcut(distancia_scalada, k=9, stand = TRUE, hc_func ="agnes", hc_method = "complete")
Trips$cluster <- cluster$cluster
a<-1

8) Organizar Datos despues del cluster y asignar etiquetas

cluster1<- Trips[ cluster$cluster==1,]
cluster2<- Trips[ cluster$cluster==2,]
cluster3<- Trips[ cluster$cluster==3,]
cluster4<- Trips[ cluster$cluster==4,]
cluster5<- Trips[ cluster$cluster==5,]
cluster6<- Trips[ cluster$cluster==6,]
cluster7<- Trips[ cluster$cluster==7,]
cluster8<- Trips[ cluster$cluster==8,]
cluster9<- Trips[ cluster$cluster==9,]
cluster10<- Trips[ cluster$cluster==10,]
datos_listos<- Trips[-c(14)]
datos_listos$Caso<- ifelse(cluster$cluster==1, "Caso 1",
                    ifelse(cluster$cluster==2, "Caso 2",
                    ifelse(cluster$cluster==3, "Caso 3",
                    ifelse(cluster$cluster==4, "Caso 4",
                    ifelse(cluster$cluster==5, "Caso 5",
                    ifelse(cluster$cluster==6, "Caso 6",
                    ifelse(cluster$cluster==7, "Caso 7",
                    ifelse(cluster$cluster==8, "Caso 8",
                    ifelse(cluster$cluster==9, "Caso 9",
                    ifelse(cluster$cluster==10, "Caso 10",NaN))))))))))
kable(  datos_listos[c(1:5),])
TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA PAA TAaT TAaC cluster Caso
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok 1 Caso 1
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok 1 Caso 1
Alarm Trip Alarm Alarm Alarm Ok Ok Ok Ok Ok Low Alarm Ok Ok 1 Caso 1
Alarm Trip Ok Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok 1 Caso 1
Ok Trip Alarm Alarm Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok 1 Caso 1
a<-1

##Quitar datos duplicados

cluster1<-unique(cluster1)
cluster2<-unique(cluster2)
cluster3<-unique(cluster3)
cluster4<-unique(cluster4)
cluster5<-unique(cluster5)
cluster6<-unique(cluster6)
cluster7<-unique(cluster7)
cluster8<-unique(cluster8)
cluster9<-unique(cluster9)

9) Preparar Set de entrenamiento y test para la red Bayesiana

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 1
n=nrow(cluster1)
seleccion<-sample(n, floor(0.3*n))
Test_C1<- cluster1[seleccion,-14]
Test_C1$Caso<- "Caso 1"
Train_C1<- cluster1[-c(seleccion),-14]
Train_C1$Caso<- "Caso 1"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 2
n=nrow(cluster2)
seleccion<-sample(n, floor(0.3*n))
Test_C2<- cluster2[seleccion,-14]
Test_C2$Caso<- "Caso 2"
Train_C2<- cluster2[-c(seleccion),-14]
Train_C2$Caso<- "Caso 2"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 3
n=nrow(cluster3)
seleccion<-sample(n, floor(0.3*n))
Test_C3<- cluster3[seleccion,-14]
Test_C3$Caso<- "Caso 3"
Train_C3<- cluster3[-c(seleccion),-14]
Train_C3$Caso<- "Caso 3"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 4
n=nrow(cluster4)
seleccion<-sample(n, floor(0.3*n))
Test_C4<- cluster4[seleccion,-14]
Test_C4$Caso<- "Caso 4"
Train_C4<- cluster4[-c(seleccion),-14]
Train_C4$Caso<- "Caso 4"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 5
n=nrow(cluster5)
seleccion<-sample(n, floor(0.3*n))
Test_C5<- cluster5[seleccion,-14]
Test_C5$Caso<- "Caso 5"
Train_C5<- cluster5[-c(seleccion),-14]
Train_C5$Caso<- "Caso 5"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 6
n=nrow(cluster6)
seleccion<-sample(n, floor(0.3*n))
Test_C6<- cluster6[seleccion,-14]
Test_C6$Caso<- "Caso 6"
Train_C6<- cluster6[-c(seleccion),-14]
Train_C6$Caso<- "Caso 6"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 7
n=nrow(cluster7)
seleccion<-sample(n, floor(0.3*n))
Test_C7<- cluster7[seleccion,-14]
Test_C7$Caso<- "Caso 7"
Train_C7<- cluster7[-c(seleccion),-14]
Train_C7$Caso<- "Caso 7"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 8
n=nrow(cluster8)
seleccion<-sample(n, floor(0.3*n))
Test_C8<- cluster8[seleccion,-14]
Test_C8$Caso<- "Caso 8"
Train_C8<- cluster8[-c(seleccion),-14]
Train_C8$Caso<- "Caso 8"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 9
n=nrow(cluster9)
seleccion<-sample(n, floor(0.3*n))
Test_C9<- cluster9[seleccion,-14]
Test_C9$Caso<- "Caso 9"
Train_C9<- cluster9[-c(seleccion),-14]
Train_C9$Caso<- "Caso 9"

#se agregan las particiones creadas al dataframe Train y Test
Test<- rbind(Test_C1,Test_C2)
Test<- rbind(Test,Test_C3)
Test<- rbind(Test,Test_C4)
Test<- rbind(Test,Test_C5)
Test<- rbind(Test,Test_C6)
Test<- rbind(Test,Test_C7)
Test<- rbind(Test,Test_C8)
Test<- rbind(Test,Test_C9)
Test<- unique(Test)
Train<- rbind(Train_C1,Train_C2)
Train<- rbind(Train,Train_C3)
Train<- rbind(Train,Train_C4)
Train<- rbind(Train,Train_C5)
Train<- rbind(Train,Train_C6)
Train<- rbind(Train,Train_C7)
Train<- rbind(Train,Train_C8)
Train<- rbind(Train,Train_C9)
a<-1

10) Crear modelo de Naive Bayes

modelo <- naive_bayes(Caso ~ ., data = Train)
kable( modelo$prior)
Var1 Freq
Caso 1 0.0625000
Caso 2 0.1250000
Caso 3 0.1339286
Caso 4 0.1250000
Caso 5 0.0892857
Caso 6 0.1339286
Caso 7 0.0446429
Caso 8 0.1160714
Caso 9 0.1696429
kable(modelo$tables$TMRCLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0.8571429 0.8571429 0 1 0 0.8 0 0 0
Ok 0.1428571 0.1428571 1 0 1 0.2 1 1 1
kable(modelo$tables$TMA1LA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 0.8571429 0 1 0 0.8 0 0.3076923 0.2631579
Ok 0 0.1428571 1 0 1 0.2 1 0.6923077 0.7368421
Trip 1 0.0000000 0 0 0 0.0 0 0.0000000 0.0000000
kable(modelo$tables$TMA2LA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0.7142857 0.9285714 0 0.8571429 0 0.8666667 0 0 0.3157895
Ok 0.2857143 0.0714286 1 0.1428571 1 0.1333333 1 1 0.6842105
kable(modelo$tables$TACCLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0.5714286 0.9285714 0.2666667 1 0 0 0 0 0
Ok 0.4285714 0.0714286 0.7333333 0 1 1 1 1 1
kable(modelo$tables$VEACLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0.1428571 0.2142857 0 1 0.4 0 0 1 0.2631579
Ok 0.8571429 0.7857143 1 0 0.6 1 1 0 0.7368421
kable(modelo$tables$VERCLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 0 0 1 0.4 0 0 0.4615385 0
Ok 1 1 1 0 0.6 1 1 0.5384615 1
kable(modelo$tables$TMRCLOA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 0 0 0.3571429 0 1 0 0 0
Ok 1 1 1 0.6428571 1 0 1 1 0
Trip 0 0 0 0.0000000 0 0 0 0 1
kable(modelo$tables$TACCLOA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 1 0 1 0 1 0 0 1
Ok 1 0 1 0 1 0 1 1 0
kable(modelo$tables$VER)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 0.0714286 0 1 0.8 0 0 0 1
Ok 1 0.9285714 1 0 0.2 1 1 0 0
Trip 0 0.0000000 0 0 0.0 0 0 1 0
kable(modelo$tables$TTA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 0 0 0 1 0.0666667 0.2 0 0
Ok 1 1 1 1 0 0.9333333 0.8 1 1
kable(modelo$tables$PAA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm High 0 0 0 0 0 0 0.4 0 0
High Trip 0 0 0 1 0 0 0.0 0 0
Low Alarm 1 0 0 0 0 0 0.0 0 1
Low Trip 0 0 1 0 1 1 0.0 0 0
Ok 0 1 0 0 0 0 0.6 1 0
kable(modelo$tables$TAaT)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 1 0.2666667 0 1 1 0 0 0
Ok 1 0 0.7333333 1 0 0 0 1 1
Trip 0 0 0.0000000 0 0 0 1 0 0
kable(modelo$tables$TAaC)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Alarm 0 0 0.4666667 0 0 1 0.8 0 0
Ok 1 0 0.5333333 1 1 0 0.2 1 1
Trip 0 1 0.0000000 0 0 0 0.0 0 0
a<-4

11) Prueba completa del set de test

como el sistema de identificacion de fallas funciono correctamente con una observacion del set de Test se procede a evaluar la totalidad de dicho set

set.seed(80)
pred <- predict(modelo, Test, threshold = 100)
pred
##  [1] Caso 1 Caso 1 Caso 1 Caso 2 Caso 2 Caso 2 Caso 2 Caso 3 Caso 3 Caso 3
## [11] Caso 3 Caso 4 Caso 4 Caso 4 Caso 1 Caso 5 Caso 5 Caso 6 Caso 6 Caso 6
## [21] Caso 7 Caso 7 Caso 8 Caso 8 Caso 8 Caso 9 Caso 9 Caso 1 Caso 9
## 9 Levels: Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 ... Caso 9
tab <- table(Test$Caso, pred, dnn = c("Actual", "Predicha"))
kable(tab)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
Caso 1 3 0 0 0 0 0 0 0 0
Caso 2 0 4 0 0 0 0 0 0 0
Caso 3 0 0 4 0 0 0 0 0 0
Caso 4 0 0 0 3 0 0 0 0 0
Caso 5 1 0 0 0 2 0 0 0 0
Caso 6 0 0 0 0 0 3 0 0 0
Caso 7 0 0 0 0 0 0 2 0 0
Caso 8 0 0 0 0 0 0 0 3 0
Caso 9 1 0 0 0 0 0 0 0 3
a<-confusionMatrix(tab)
a
## Confusion Matrix and Statistics
## 
##         Predicha
## Actual   Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 Caso 9
##   Caso 1      3      0      0      0      0      0      0      0      0
##   Caso 2      0      4      0      0      0      0      0      0      0
##   Caso 3      0      0      4      0      0      0      0      0      0
##   Caso 4      0      0      0      3      0      0      0      0      0
##   Caso 5      1      0      0      0      2      0      0      0      0
##   Caso 6      0      0      0      0      0      3      0      0      0
##   Caso 7      0      0      0      0      0      0      2      0      0
##   Caso 8      0      0      0      0      0      0      0      3      0
##   Caso 9      1      0      0      0      0      0      0      0      3
## 
## Overall Statistics
##                                           
##                Accuracy : 0.931           
##                  95% CI : (0.7723, 0.9915)
##     No Information Rate : 0.1724          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9221          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Caso 1 Class: Caso 2 Class: Caso 3
## Sensitivity                 0.6000        1.0000        1.0000
## Specificity                 1.0000        1.0000        1.0000
## Pos Pred Value              1.0000        1.0000        1.0000
## Neg Pred Value              0.9231        1.0000        1.0000
## Prevalence                  0.1724        0.1379        0.1379
## Detection Rate              0.1034        0.1379        0.1379
## Detection Prevalence        0.1034        0.1379        0.1379
## Balanced Accuracy           0.8000        1.0000        1.0000
##                      Class: Caso 4 Class: Caso 5 Class: Caso 6
## Sensitivity                 1.0000       1.00000        1.0000
## Specificity                 1.0000       0.96296        1.0000
## Pos Pred Value              1.0000       0.66667        1.0000
## Neg Pred Value              1.0000       1.00000        1.0000
## Prevalence                  0.1034       0.06897        0.1034
## Detection Rate              0.1034       0.06897        0.1034
## Detection Prevalence        0.1034       0.10345        0.1034
## Balanced Accuracy           1.0000       0.98148        1.0000
##                      Class: Caso 7 Class: Caso 8 Class: Caso 9
## Sensitivity                1.00000        1.0000        1.0000
## Specificity                1.00000        1.0000        0.9615
## Pos Pred Value             1.00000        1.0000        0.7500
## Neg Pred Value             1.00000        1.0000        1.0000
## Prevalence                 0.06897        0.1034        0.1034
## Detection Rate             0.06897        0.1034        0.1034
## Detection Prevalence       0.06897        0.1034        0.1379
## Balanced Accuracy          1.00000        1.0000        0.9808
#a$byClass