Carga de librerias

library(rpart)
## Warning: package 'rpart' was built under R version 3.5.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.5.3
library(C50)
## Warning: package 'C50' was built under R version 3.5.3
library(nomclust)
## Warning: package 'nomclust' was built under R version 3.5.3
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.5.2
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.3
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.5.3
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice

1) Lectura de datos

data<- read_excel("hidro con falla.xlsx")
colnames(data)
##  [1] "TMRCLA"  "TMA1LA"  "TMA2LA"  "TACCLA"  "VEACLA"  "VERCLA"  "TMRCLOA"
##  [8] "TACCLOA" "VER"     "TTA"     "PAA"     "TAaT"    "TAaC"
hist(data$PAA,col = c("red","blue"))

data<- data[data$PAA!=0, ]
hist(data$PAA,col = c("red","blue"))

head(data)
## # A tibble: 6 x 13
##   TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA   VER   TTA
##    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl> <dbl> <dbl>
## 1   42.0   60     36.0   38     1.52  1.22     59      27.0  1.60  38.0
## 2   42     64.0   36     38     1.49  1.18     59.7    26.0  2.13  38.2
## 3   43.0   57     37     40     1.11  1.14     59.0    27    2.4   37.2
## 4   46.0   61.0   41.0   44.0   1.44  0.468    61.0    40.0  1.44  37.8
## 5   44.0   59.0   36.0   42.0   1.27  0.445    59.0    39.0  1.45  36.4
## 6   43.0   65     37     38.2   1.23  1.17     60.0    25.0  2.14  37.2
## # ... with 3 more variables: PAA <dbl>, TAaT <dbl>, TAaC <dbl>
a=1

2) Parametros de variables

p1<- c(80,85)
p2<- c(80,85)
p3<- c(80,85)
p4<- c(55,60)
p5<- c(2.5,3.5)
p6<- c(2.5,3.5)
p7<- c(80,85)
p8<- c(60,65)
p9<- c(2.5,3.5)
p10<- c(55,60)
p11<- c(127,130)
p12<- c(50,54)
p13<- c(42,48)
parametro=data.frame( rbind(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13))
row.names(parametro)=c("TMRCLA","TMA1LA","TMA2LA","TACCLA","VEACLA","VERCLA","TMRCLOA","TACCLOA","VER","TTA","PAA","TAaT","TAaC")
colnames(parametro)=c("Alarm","Trip")
parametro
##         Alarm  Trip
## TMRCLA   80.0  85.0
## TMA1LA   80.0  85.0
## TMA2LA   80.0  85.0
## TACCLA   55.0  60.0
## VEACLA    2.5   3.5
## VERCLA    2.5   3.5
## TMRCLOA  80.0  85.0
## TACCLOA  60.0  65.0
## VER       2.5   3.5
## TTA      55.0  60.0
## PAA     127.0 130.0
## TAaT     50.0  54.0
## TAaC     42.0  48.0
a=2

3) Transformacion de datos continuos a discretos

data$TMRCLA=ifelse(data$TMRCLA>=parametro["TMRCLA","Alarm"],ifelse(data$TMRCLA>=parametro["TMRCLA","Trip"],"Trip","Alarm"),"Ok")
data$TMA1LA=ifelse(data$TMA1LA>=80,ifelse(data$TMA1LA>=85,"Trip","Alarm"),"Ok")
data$TMA2LA=ifelse(data$TMA2LA>=80,ifelse(data$TMA2LA>=85,"Trip","Alarm"),"Ok")
data$TACCLA=ifelse(data$TACCLA>=55,ifelse(data$TACCLA>=60,"Trip","Alarm"),"Ok")
data$VEACLA=ifelse(data$VEACLA>=2.5,ifelse(data$VEACLA>=3.5,"Trip","Alarm"),"Ok")
data$VERCLA=ifelse(data$VERCLA>=2.5,ifelse(data$VERCLA>=3.5,"Trip","Alarm"),"Ok")
data$TMRCLOA=ifelse(data$TMRCLOA>=80,ifelse(data$TMRCLOA>=85,"Trip","Alarm"),"Ok")
data$TACCLOA=ifelse(data$TACCLOA>=60,ifelse(data$TACCLOA>=65,"Trip","Alarm"),"Ok")
data$VER=ifelse(data$VER>=2.5,ifelse(data$VER>=3.5,"Trip","Alarm"),"Ok")
data$TTA=ifelse(data$TTA>=55,ifelse(data$TTA>=60,"Trip","Alarm"),"Ok")
data$PAA=ifelse(data$PAA<=130,ifelse(data$PAA<=127,"Low Trip","Low Alarm"),ifelse(data$PAA>=150,ifelse(data$PAA>=157,"High Trip","Alarm High"),"Ok"))
data$TAaT=ifelse(data$TAaT>=50,ifelse(data$TAaT>=54,"Trip","Alarm"),"Ok")
data$TAaC=ifelse(data$TAaC>=42,ifelse(data$TAaC>=48,"Trip","Alarm"),"Ok")
head(data)
## # A tibble: 6 x 13
##   TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER   TTA  
##   <chr>  <chr>  <chr>  <chr>  <chr>  <chr>  <chr>   <chr>   <chr> <chr>
## 1 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 2 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 3 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 4 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 5 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 6 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## # ... with 3 more variables: PAA <chr>, TAaT <chr>, TAaC <chr>
a=3

4) evaluacion de eventos registrados con fallas y sin fallas

data$evento=ifelse(data$TMRCLA=="Trip"|data$TMA1LA=="Trip"| data$TMA2LA=="Trip"| data$TACCLA=="Trip"| data$VEACLA=="Trip"| data$VERCLA=="Trip"| data$TMRCLOA=="Trip" | data$TACCLOA=="Trip"| data$VER=="Trip"| data$TTA=="Trip"| data$PAA=="Low Trip"|data$PAA=="High Trip"| data$TAaT=="Trip"| data$TAaC=="Trip", "EN FALLA","SIN FALLA")
Trips=data[data$evento=="EN FALLA",]
Ok<-data[data$evento=="SIN FALLA",-14]
Trips$evento<=NULL
## logical(0)
head(data)
## # A tibble: 6 x 14
##   TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER   TTA  
##   <chr>  <chr>  <chr>  <chr>  <chr>  <chr>  <chr>   <chr>   <chr> <chr>
## 1 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 2 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 3 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 4 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 5 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 6 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## # ... with 4 more variables: PAA <chr>, TAaT <chr>, TAaC <chr>,
## #   evento <chr>
head(Trips[,1:14])
## # A tibble: 6 x 14
##   TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER   TTA  
##   <chr>  <chr>  <chr>  <chr>  <chr>  <chr>  <chr>   <chr>   <chr> <chr>
## 1 Ok     Ok     Alarm  Ok     Alarm  Ok     Ok      Ok      Trip  Alarm
## 2 Alarm  Trip   Trip   Ok     Ok     Ok     Ok      Ok      Ok    Ok   
## 3 Ok     Alarm  Alarm  Alarm  Ok     Ok     Ok      Alarm   Ok    Ok   
## 4 Ok     Ok     Ok     Ok     Ok     Ok     Ok      Ok      Ok    Alarm
## 5 Ok     Alarm  Alarm  Ok     Ok     Ok     Trip    Ok      Alarm Ok   
## 6 Alarm  Alarm  Alarm  Alarm  Ok     Ok     Ok      Alarm   Ok    Ok   
## # ... with 4 more variables: PAA <chr>, TAaT <chr>, TAaC <chr>,
## #   evento <chr>
barplot(prop.table(table(Trips$PAA)), col = c("blue","green","orange","red"))

write.csv(Trips,"Trips.cvs")
write.csv(Ok,"Ok.cvs")
write.csv(data,"Datos discretos.cvs")
a<-1

5) Normalizacion de los Estados

  Tabla<- Trips
Tabla$TMRCLAok<- ifelse(Trips$TMRCLA == "Ok", 1, 0)
Tabla$TMRCLAAlarm<- ifelse(Trips$TMRCLA == "Alarm", 1, 0)
Tabla$TMRCLATrip<- ifelse(Trips$TMRCLA == "Trip", 1, 0)

Tabla$TMA1LAok<- ifelse(Trips$TMA1LA == "Ok", 1, 0)
Tabla$TMA1LAAlarm<- ifelse(Trips$TMA1LA == "Alarm", 1, 0)
Tabla$TMA1LATrip<- ifelse(Trips$TMA1LA == "Trip", 1, 0)

Tabla$TMA2LAok<- ifelse(Trips$TMA2LA == "Ok", 1, 0)
Tabla$TMA2LAAlarm<- ifelse(Trips$TMA2LA == "Alarm", 1, 0)
Tabla$TMA2LATrip<- ifelse(Trips$TMA2LA == "Trip", 1, 0)

Tabla$TACCLAok<- ifelse(Trips$TACCLA == "Ok", 1, 0)
Tabla$TACCLAAlarm<- ifelse(Trips$TACCLA == "Alarm", 1, 0)
Tabla$TACCLATrip<- ifelse(Trips$TACCLA == "Trip", 1, 0)

Tabla$VEACLAok <- ifelse(Trips$VEACLA == "Ok", 1, 0)
Tabla$VEACLAAlarm<- ifelse(Trips$VEACLA == "Alarm", 1, 0)
Tabla$VEACLATrip<- ifelse(Trips$VEACLA == "Trip", 1, 0)

Tabla$VERCLAok <- ifelse(Trips$VERCLA == "Ok", 1, 0)
Tabla$VERCLAAlarm<- ifelse(Trips$VERCLA == "Alarm", 1, 0)
Tabla$VERCLATrip<- ifelse(Trips$VERCLA == "Trip", 1, 0)

Tabla$TMRCLOAok <- ifelse(Trips$TMRCLOA == "Ok", 1, 0)
Tabla$TMRCLOAAlarm<- ifelse(Trips$TMRCLOA == "Alarm", 1, 0)
Tabla$TMRCLOATrip<- ifelse(Trips$TMRCLOA == "Trip", 1, 0)

Tabla$TACCLOAok <- ifelse(Trips$TACCLOA == "Ok", 1, 0)
Tabla$TACCLOAAlarm<- ifelse(Trips$TACCLOA == "Alarm", 1, 0)
Tabla$TACCLOATrip<- ifelse(Trips$TACCLOA == "Trip", 1, 0)

Tabla$VERok <- ifelse(Trips$VER == "Ok", 1, 0)
Tabla$VERAlarm<- ifelse(Trips$VER == "Alarm", 1, 0)
Tabla$VERTrip<- ifelse(Trips$VER == "Trip", 1, 0)

Tabla$TTAok <- ifelse(Trips$TTA == "Ok", 1, 0)
Tabla$TTAAlarm<- ifelse(Trips$TTA == "Alarm", 1, 0)
Tabla$TTATrip<- ifelse(Trips$TTA == "Trip", 1, 0)

Tabla$PAAok <- ifelse(Trips$PAA == "Ok", 1, 0)
Tabla$PAAAlarmB<- ifelse(Trips$PAA == "Low Alarm", 1, 0)
Tabla$PAAAlarmA<- ifelse(Trips$PAA == "High Alarm", 1, 0)
Tabla$PAATripB<- ifelse(Trips$PAA == "Low Trip", 1, 0)
Tabla$PAATripA<- ifelse(Trips$PAA == "High Trip", 1, 0)

Tabla$TAaTok <- ifelse(Trips$TAaT == "Ok", 1, 0)
Tabla$TAaTAlarm<- ifelse(Trips$TAaT == "Alarm", 1, 0)
Tabla$TAaTTrip<- ifelse(Trips$TAaT == "Trip", 1, 0)

Tabla$TAaCok <- ifelse(Trips$TAaC == "Ok", 1, 0)
Tabla$TAaCAlarm<- ifelse(Trips$TAaC == "Alarm", 1, 0)
Tabla$TAaCTrip<- ifelse(Trips$TAaC == "Trip", 1, 0)

Datos_para_cluster<- Tabla[, -c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)]

 head(Datos_para_cluster[,c(31:35)])
## # A tibble: 6 x 5
##   PAAok PAAAlarmB PAAAlarmA PAATripB PAATripA
##   <dbl>     <dbl>     <dbl>    <dbl>    <dbl>
## 1     1         0         0        0        0
## 2     0         1         0        0        0
## 3     1         0         0        0        0
## 4     0         0         0        0        0
## 5     0         1         0        0        0
## 6     1         0         0        0        0
a<-1

5) Determinar Numero optimo de Cluster

library(ggplot2)
sumbt<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$betweenss
sumbt2<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$tot.withinss
for(i in 1:15) sumbt[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$betweenss
for(i in 1:15) sumbt2[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$tot.withinss
## Warning: did not converge in 10 iterations
plot(1:15, sumbt, type="o", col="blue", lwd=1, main="Optimal Number of Cluster", xlab="Number of cluster", ylab="Distance",las=1, col.axis="black")
lines(1:15, sumbt2 ,type="o", col="green", lwd=1) 
legend("bottomleft",col=c("blue","green"),legend =c("Betweenss cluster","Withinss cluster"), lwd=2, bty = "n",  inset = 0.6)
points(1:15, sumbt, pch = 21, bg = "white")
points(1:15, sumbt2, pch = 21, bg = "white")
abline(v = 8, col="red", lwd=2, lty=2)

a<-1

6) Cluster Jerarquico

library(ggdendro)
## Warning: package 'ggdendro' was built under R version 3.5.3
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.5.2
library(dendextend)
## Warning: package 'dendextend' was built under R version 3.5.3
## 
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggdendro':
## 
##     theme_dendro
## The following object is masked from 'package:ggpubr':
## 
##     rotate
## The following object is masked from 'package:rpart':
## 
##     prune
## The following object is masked from 'package:stats':
## 
##     cutree
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.5.3
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(grDevices)
library(cluster)
## Warning: package 'cluster' was built under R version 3.5.3
library(vegan)
## Warning: package 'vegan' was built under R version 3.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 3.5.3
## 
## Attaching package: 'permute'
## The following object is masked from 'package:dendextend':
## 
##     shuffle
## This is vegan 2.5-6
## 
## Attaching package: 'vegan'
## The following object is masked from 'package:caret':
## 
##     tolerance
nk<-8
dist<-vegdist(Datos_para_cluster,method = 'jaccard')

# metodos de agrupamiento evaluados:
# ward.D
# single
# complete
# average
# ward.D2

h<- hcut(dist, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "ward.D2")
a<-1
#Cluster en 2 dimensiones
plot(fviz_cluster(h, geom = 'point', show.clust.cent = TRUE, main = "Distance function Jaccard, grouping method Ward. D2",pointsize = 1, xlab = "Dimension 1", ylab = "Dimension 2"))

#Cluster Jerarquico
plot(fviz_dend(h, rect = TRUE, k = nk, show_labels = FALSE,ylab = 'Distance', xlab ="Clusters", lwd = 0.1, main = "Distance function Jaccard, grouping method Ward. D2"))

a<-1

7) Asignar Cluster a registro

set.seed(80)

#distancia_scalada<- dist(scalada, method = 'euclidean')
#cluster<- hcut(distancia_scalada, k=9, stand = TRUE, hc_func ="agnes", hc_method = "complete")
Trips$cluster <- h$cluster
a<-1

8) Organizar Datos despues del cluster y asignar etiquetas

cluster1<- Trips[ h$cluster==1,]
cluster2<- Trips[ h$cluster==2,]
cluster3<- Trips[ h$cluster==3,]
cluster4<- Trips[ h$cluster==4,]
cluster5<- Trips[ h$cluster==5,]
cluster6<- Trips[ h$cluster==6,]
cluster7<- Trips[ h$cluster==7,]
cluster8<- Trips[ h$cluster==8,]
datos_listos<- Trips[-c(14)]
datos_listos$Caso<- ifelse(h$cluster==1, "Caso 1",
                    ifelse(h$cluster==2, "Caso 2",
                    ifelse(h$cluster==3, "Caso 3",
                    ifelse(h$cluster==4, "Caso 4",
                    ifelse(h$cluster==5, "Caso 5",
                    ifelse(h$cluster==6, "Caso 6",
                    ifelse(h$cluster==7, "Caso 7",
                    ifelse(h$cluster==8, "Caso 8",NaN))))))))
# Se borra la columna 14 (Cluster)
datos_listos<-datos_listos[,-14]
kable(  datos_listos[c(1:5),])
TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA PAA TAaT TAaC Caso
Ok Ok Alarm Ok Alarm Ok Ok Ok Trip Alarm Ok Ok Ok Caso 1
Alarm Trip Trip Ok Ok Ok Ok Ok Ok Ok Low Alarm Ok Ok Caso 2
Ok Alarm Alarm Alarm Ok Ok Ok Alarm Ok Ok Ok Ok Trip Caso 3
Ok Ok Ok Ok Ok Ok Ok Ok Ok Alarm Alarm High Trip Alarm Caso 4
Ok Alarm Alarm Ok Ok Ok Trip Ok Alarm Ok Low Alarm Ok Ok Caso 5
a<-1

SF<-Ok
#SF<-unique(Ok)
SF$cluster<-9
SF$Caso<-"OK"

##Quitar datos duplicados

cluster1<-unique(cluster1)
cluster2<-unique(cluster2)
cluster3<-unique(cluster3)
cluster4<-unique(cluster4)
cluster5<-unique(cluster5)
cluster6<-unique(cluster6)
cluster7<-unique(cluster7)
cluster8<-unique(cluster8)

9) Preparar Set de entrenamiento y test para la red Bayesiana

n=nrow(SF)
seleccion<- sample(n, floor(0.4*n))
Test_SF<-SF[seleccion, ]
Train_SF<-SF[-c(seleccion), ]

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 1
n=nrow(cluster1)
seleccion<-sample(n, floor(0.4*n))
Test_C1<- cluster1[seleccion,-14]
Test_C1$Caso<- "Caso 1"
Train_C1<- cluster1[-c(seleccion),-14]
Train_C1$Caso<- "Caso 1"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 2
n=nrow(cluster2)
seleccion<-sample(n, floor(0.4*n))
Test_C2<- cluster2[seleccion,-14]
Test_C2$Caso<- "Caso 2"
Train_C2<- cluster2[-c(seleccion),-14]
Train_C2$Caso<- "Caso 2"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 3
n=nrow(cluster3)
seleccion<-sample(n, floor(0.4*n))
Test_C3<- cluster3[seleccion,-14]
Test_C3$Caso<- "Caso 3"
Train_C3<- cluster3[-c(seleccion),-14]
Train_C3$Caso<- "Caso 3"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 4
n=nrow(cluster4)
seleccion<-sample(n, floor(0.4*n))
Test_C4<- cluster4[seleccion,-14]
Test_C4$Caso<- "Caso 4"
Train_C4<- cluster4[-c(seleccion),-14]
Train_C4$Caso<- "Caso 4"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 5
n=nrow(cluster5)
seleccion<-sample(n, floor(0.4*n))
Test_C5<- cluster5[seleccion,-14]
Test_C5$Caso<- "Caso 5"
Train_C5<- cluster5[-c(seleccion),-14]
Train_C5$Caso<- "Caso 5"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 6
n=nrow(cluster6)
seleccion<-sample(n, floor(0.4*n))
Test_C6<- cluster6[seleccion,-14]
Test_C6$Caso<- "Caso 6"
Train_C6<- cluster6[-c(seleccion),-14]
Train_C6$Caso<- "Caso 6"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 7
n=nrow(cluster7)
seleccion<-sample(n, floor(0.4*n))
Test_C7<- cluster7[seleccion,-14]
Test_C7$Caso<- "Caso 7"
Train_C7<- cluster7[-c(seleccion),-14]
Train_C7$Caso<- "Caso 7"

# Se crea una particion aleatoria del 30%  para test y 70% para entrenamiento a partir  de los datos del cluster 8
n=nrow(cluster8)
seleccion<-sample(n, floor(0.4*n))
Test_C8<- cluster8[seleccion,-14]
Test_C8$Caso<- "Caso 8"
Train_C8<- cluster8[-c(seleccion),-14]
Train_C8$Caso<- "Caso 8"


#se agregan las particiones creadas al dataframe Train y Test
Test<- rbind(Test_C1,Test_C2)
Test<- rbind(Test,Test_C3)
Test<- rbind(Test,Test_C4)
Test<- rbind(Test,Test_C5)
Test<- rbind(Test,Test_C6)
Test<- rbind(Test,Test_C7)
Test<- rbind(Test,Test_C8)
Test<- rbind(Test,Test_SF)
Test<- unique(Test)
Train<- rbind(Train_C1,Train_C2)
Train<- rbind(Train,Train_C3)
Train<- rbind(Train,Train_C4)
Train<- rbind(Train,Train_C5)
Train<- rbind(Train,Train_C6)
Train<- rbind(Train,Train_C7)
Train<- rbind(Train,Train_C8)
Train<- rbind(Train,Train_SF)
Test<-Test[,-14]
Train<-Train[,-14]
write.csv(Train,"Train.cvs")
a<-1

10) Crear modelo de Naive Bayes

modelo <- naive_bayes(Caso ~ ., data = Train, laplace = TRUE)
kable( modelo$prior)
Var1 Freq
Caso 1 0.0014529
Caso 2 0.0017171
Caso 3 0.0009246
Caso 4 0.0010567
Caso 5 0.0018492
Caso 6 0.0009246
Caso 7 0.0010567
Caso 8 0.0007925
OK 0.9902259
kable(modelo$tables$TMRCLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.1538462 0.4666667 0.8888889 0.1 0.0625 0.1111111 0.6 0.5 0.0012002
Ok 0.8461538 0.5333333 0.1111111 0.9 0.9375 0.8888889 0.4 0.5 0.9987998
kable(modelo$tables$TMA1LA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.5714286 0.0625 0.8 0.0909091 0.5294118 0.1 0.7272727 0.7777778 0.0012000
Ok 0.3571429 0.0625 0.1 0.8181818 0.4117647 0.8 0.1818182 0.1111111 0.9986667
Trip 0.0714286 0.8750 0.1 0.0909091 0.0588235 0.1 0.0909091 0.1111111 0.0001333
kable(modelo$tables$TMA2LA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.5000000 0.4375 0.7 0.2727273 0.4117647 0.1 0.5454545 0.5555556 0.0012000
Ok 0.4285714 0.1250 0.2 0.6363636 0.5294118 0.8 0.3636364 0.3333333 0.9986667
Trip 0.0714286 0.4375 0.1 0.0909091 0.0588235 0.1 0.0909091 0.1111111 0.0001333
kable(modelo$tables$TACCLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.1538462 0.6666667 0.6666667 0.1 0.0625 0.1111111 0.9 0.125 0.0009335
Ok 0.8461538 0.3333333 0.3333333 0.9 0.9375 0.8888889 0.1 0.875 0.9990665
kable(modelo$tables$VEACLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.9230769 0.5333333 0.2222222 0.1 0.75 0.5555556 0.7 0.125 0.0008001
Ok 0.0769231 0.4666667 0.7777778 0.9 0.25 0.4444444 0.3 0.875 0.9991999
kable(modelo$tables$VERCLA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.4615385 0.0666667 0.1111111 0.1 0.0625 0.5555556 0.9 0.125 0.0140019
Ok 0.5384615 0.9333333 0.8888889 0.9 0.9375 0.4444444 0.1 0.875 0.9859981
kable(modelo$tables$TMRCLOA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.0714286 0.0625 0.1 0.1818182 0.0588235 0.1 0.3636364 0.5555556 0.0001333
Ok 0.8571429 0.8750 0.8 0.7272727 0.0588235 0.8 0.5454545 0.3333333 0.9997333
Trip 0.0714286 0.0625 0.1 0.0909091 0.8823529 0.1 0.0909091 0.1111111 0.0001333
kable(modelo$tables$TACCLOA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.0769231 0.0666667 0.8888889 0.1 0.6875 0.1111111 0.7 0.5 0.0009335
Ok 0.9230769 0.9333333 0.1111111 0.9 0.3125 0.8888889 0.3 0.5 0.9990665
kable(modelo$tables$VER)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.0714286 0.0625 0.2 0.0909091 0.5294118 0.6 0.8181818 0.1111111 0.0020000
Ok 0.0714286 0.8750 0.7 0.8181818 0.4117647 0.3 0.0909091 0.7777778 0.9978667
Trip 0.8571429 0.0625 0.1 0.0909091 0.0588235 0.1 0.0909091 0.1111111 0.0001333
kable(modelo$tables$TTA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.6153846 0.0666667 0.1111111 0.4 0.0625 0.6666667 0.1 0.125 0.0005334
Ok 0.3846154 0.9333333 0.8888889 0.6 0.9375 0.3333333 0.9 0.875 0.9994666
kable(modelo$tables$PAA)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm High 0.0625 0.0555556 0.0833333 0.3076923 0.0526316 0.0833333 0.0769231 0.0909091 0.0001333
High Trip 0.0625 0.0555556 0.0833333 0.0769231 0.0526316 0.0833333 0.6923077 0.0909091 0.0001333
Low Alarm 0.0625 0.5000000 0.0833333 0.0769231 0.7894737 0.0833333 0.0769231 0.0909091 0.0021328
Low Trip 0.0625 0.0555556 0.0833333 0.0769231 0.0526316 0.6666667 0.0769231 0.6363636 0.0001333
Ok 0.7500 0.3333333 0.6666667 0.4615385 0.0526316 0.0833333 0.0769231 0.0909091 0.9974673
kable(modelo$tables$TAaT)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.0714286 0.0625 0.5 0.0909091 0.0588235 0.8 0.0909091 0.7777778 0.0162667
Ok 0.8571429 0.8750 0.4 0.0909091 0.8823529 0.1 0.8181818 0.1111111 0.9836000
Trip 0.0714286 0.0625 0.1 0.8181818 0.0588235 0.1 0.0909091 0.1111111 0.0001333
kable(modelo$tables$TAaC)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Alarm 0.0714286 0.0625 0.1 0.4545455 0.0588235 0.1 0.0909091 0.1111111 0.0086667
Ok 0.8571429 0.8750 0.1 0.4545455 0.8823529 0.8 0.8181818 0.7777778 0.9912000
Trip 0.0714286 0.0625 0.8 0.0909091 0.0588235 0.1 0.0909091 0.1111111 0.0001333
a<-4

11) Prueba completa del set de test

como el sistema de identificacion de fallas funciono correctamente con una observacion del set de Test se procede a evaluar la totalidad de dicho set

set.seed(800)
pred <- predict(modelo, Test, threshold = 100)
pred
##  [1] Caso 1 Caso 1 Caso 1 Caso 1 Caso 1 Caso 1 Caso 2 Caso 2 Caso 2 Caso 2
## [11] Caso 2 Caso 2 Caso 2 Caso 2 Caso 3 Caso 3 Caso 3 Caso 3 Caso 6 Caso 4
## [21] Caso 4 Caso 4 Caso 4 Caso 5 Caso 5 Caso 5 Caso 5 Caso 5 Caso 5 Caso 5
## [31] Caso 5 Caso 6 Caso 6 Caso 6 Caso 6 Caso 7 Caso 7 Caso 7 Caso 7 Caso 7
## [41] Caso 8 Caso 8 Caso 8 Caso 8 OK     OK     OK     Caso 6 OK     OK    
## [51] Caso 1 Caso 3 OK     OK     OK     OK     Caso 6 OK    
## Levels: Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
tab <- table(Test$Caso, pred, dnn = c("Actual", "Predicha"))
kable(tab)
Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
Caso 1 6 0 0 0 0 0 0 0 0
Caso 2 0 8 0 0 0 0 0 0 0
Caso 3 0 0 4 0 0 0 0 0 0
Caso 4 0 0 0 4 0 1 0 0 0
Caso 5 0 0 0 0 8 0 0 0 0
Caso 6 0 0 0 0 0 4 0 0 0
Caso 7 0 0 0 0 0 0 5 0 0
Caso 8 0 0 0 0 0 0 0 4 0
OK 1 0 1 0 0 2 0 0 10
a<-confusionMatrix(tab)
print(a)
## Confusion Matrix and Statistics
## 
##         Predicha
## Actual   Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
##   Caso 1      6      0      0      0      0      0      0      0  0
##   Caso 2      0      8      0      0      0      0      0      0  0
##   Caso 3      0      0      4      0      0      0      0      0  0
##   Caso 4      0      0      0      4      0      1      0      0  0
##   Caso 5      0      0      0      0      8      0      0      0  0
##   Caso 6      0      0      0      0      0      4      0      0  0
##   Caso 7      0      0      0      0      0      0      5      0  0
##   Caso 8      0      0      0      0      0      0      0      4  0
##   OK          1      0      1      0      0      2      0      0 10
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9138          
##                  95% CI : (0.8102, 0.9714)
##     No Information Rate : 0.1724          
##     P-Value [Acc > NIR] : < 2.2e-16       
##                                           
##                   Kappa : 0.9015          
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: Caso 1 Class: Caso 2 Class: Caso 3
## Sensitivity                 0.8571        1.0000       0.80000
## Specificity                 1.0000        1.0000       1.00000
## Pos Pred Value              1.0000        1.0000       1.00000
## Neg Pred Value              0.9808        1.0000       0.98148
## Prevalence                  0.1207        0.1379       0.08621
## Detection Rate              0.1034        0.1379       0.06897
## Detection Prevalence        0.1034        0.1379       0.06897
## Balanced Accuracy           0.9286        1.0000       0.90000
##                      Class: Caso 4 Class: Caso 5 Class: Caso 6
## Sensitivity                1.00000        1.0000       0.57143
## Specificity                0.98148        1.0000       1.00000
## Pos Pred Value             0.80000        1.0000       1.00000
## Neg Pred Value             1.00000        1.0000       0.94444
## Prevalence                 0.06897        0.1379       0.12069
## Detection Rate             0.06897        0.1379       0.06897
## Detection Prevalence       0.08621        0.1379       0.06897
## Balanced Accuracy          0.99074        1.0000       0.78571
##                      Class: Caso 7 Class: Caso 8 Class: OK
## Sensitivity                1.00000       1.00000    1.0000
## Specificity                1.00000       1.00000    0.9167
## Pos Pred Value             1.00000       1.00000    0.7143
## Neg Pred Value             1.00000       1.00000    1.0000
## Prevalence                 0.08621       0.06897    0.1724
## Detection Rate             0.08621       0.06897    0.1724
## Detection Prevalence       0.08621       0.06897    0.2414
## Balanced Accuracy          1.00000       1.00000    0.9583
barplot(prop.table(table(Test$Caso)), col = c("blue","green","orange","red"))

barplot(prop.table(table(Train$Caso)), col = c("blue","green","orange","red"))

barplot(prop.table(table(datos_listos$Caso)), col = c("blue","green","orange","red"))


library(ROCR)
Train__C1<- Train
Train__C1$Caso<- ifelse(Train__C1$Caso!="Caso 1","No","Yes")
Test__C1<- Test
Test__C1$Caso<- ifelse(Test__C1$Caso!="Caso 1","No","Yes")
mod_c1<- naive_bayes(Caso ~ ., data =Train__C1, method="class")

pred_c1<-predict(mod_c1, Test__C1, type = "prob", threshold = 0.1)[,2]
predic_C1<- prediction(pred_c1, Test__C1$Caso)
per<- performance(pred_c1, "tpr", "fpr")


library(rpart)   # para arbol decision
library(rattle)  # para data set, y arbol decision
library(ROCR)    # para curva ROC
 
datos         <- weather
datos         <- within(datos, rm("Date","Location","RISK_MM")) #borra dummy 
set.seed(42)  #  fija la secuencia de numeros aleatorios
sampleTrain   <- sample(nrow(datos),(nrow(datos)*.7)) 
Train         <- Train__C1
Test          <- Test__C1
 
 
# MODELO 
#------------------------------------------------------------------------------
modelo.rpart  <- rpart(Caso ~ .,Train, method="class")
 
 
# PREDICCION
#------------------------------------------------------------------------------
predict.rpart <- predict(modelo.rpart,Test)[,2] #prob. clase=yes
predict.rocr  <- prediction (predict.rpart,Test$Caso)
perf.rocr     <- performance(predict.rocr,"tpr","fpr") #True y False postivie.rate
 
 
# GRAFICO CURVA ROC
#------------------------------------------------------------------------------
auc <- as.numeric(performance(predict.rocr ,"auc")@y.values)
plot(perf.rocr,type='o', main = paste('Area Bajo la Curva =',round(auc,2)))  
abline(a=0, b= 1)
 
 
# GRAFICO ARBOL DECISION
#------------------------------------------------------------------------------
fancyRpartPlot(modelo.rpart)