Carga de librerias
library(rpart)
## Warning: package 'rpart' was built under R version 3.5.3
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 3.5.3
library(C50)
## Warning: package 'C50' was built under R version 3.5.3
library(nomclust)
## Warning: package 'nomclust' was built under R version 3.5.3
library(readxl)
## Warning: package 'readxl' was built under R version 3.5.3
library(ggpubr)
## Warning: package 'ggpubr' was built under R version 3.5.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.5.3
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 3.5.2
library(ggplot2)
library(knitr)
## Warning: package 'knitr' was built under R version 3.5.3
library(e1071)
## Warning: package 'e1071' was built under R version 3.5.3
library(naivebayes)
## Warning: package 'naivebayes' was built under R version 3.5.3
library(caret)
## Warning: package 'caret' was built under R version 3.5.3
## Loading required package: lattice
1) Lectura de datos
data<- read_excel("hidro con falla.xlsx")
colnames(data)
## [1] "TMRCLA" "TMA1LA" "TMA2LA" "TACCLA" "VEACLA" "VERCLA" "TMRCLOA"
## [8] "TACCLOA" "VER" "TTA" "PAA" "TAaT" "TAaC"
hist(data$PAA,col = c("red","blue"))

data<- data[data$PAA!=0, ]
hist(data$PAA,col = c("red","blue"))

head(data)
## # A tibble: 6 x 13
## TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 42.0 60 36.0 38 1.52 1.22 59 27.0 1.60 38.0
## 2 42 64.0 36 38 1.49 1.18 59.7 26.0 2.13 38.2
## 3 43.0 57 37 40 1.11 1.14 59.0 27 2.4 37.2
## 4 46.0 61.0 41.0 44.0 1.44 0.468 61.0 40.0 1.44 37.8
## 5 44.0 59.0 36.0 42.0 1.27 0.445 59.0 39.0 1.45 36.4
## 6 43.0 65 37 38.2 1.23 1.17 60.0 25.0 2.14 37.2
## # ... with 3 more variables: PAA <dbl>, TAaT <dbl>, TAaC <dbl>
a=1
2) Parametros de variables
p1<- c(80,85)
p2<- c(80,85)
p3<- c(80,85)
p4<- c(55,60)
p5<- c(2.5,3.5)
p6<- c(2.5,3.5)
p7<- c(80,85)
p8<- c(60,65)
p9<- c(2.5,3.5)
p10<- c(55,60)
p11<- c(127,130)
p12<- c(50,54)
p13<- c(42,48)
parametro=data.frame( rbind(p1,p2,p3,p4,p5,p6,p7,p8,p9,p10,p11,p12,p13))
row.names(parametro)=c("TMRCLA","TMA1LA","TMA2LA","TACCLA","VEACLA","VERCLA","TMRCLOA","TACCLOA","VER","TTA","PAA","TAaT","TAaC")
colnames(parametro)=c("Alarm","Trip")
parametro
## Alarm Trip
## TMRCLA 80.0 85.0
## TMA1LA 80.0 85.0
## TMA2LA 80.0 85.0
## TACCLA 55.0 60.0
## VEACLA 2.5 3.5
## VERCLA 2.5 3.5
## TMRCLOA 80.0 85.0
## TACCLOA 60.0 65.0
## VER 2.5 3.5
## TTA 55.0 60.0
## PAA 127.0 130.0
## TAaT 50.0 54.0
## TAaC 42.0 48.0
a=2
3) Transformacion de datos continuos a discretos
data$TMRCLA=ifelse(data$TMRCLA>=parametro["TMRCLA","Alarm"],ifelse(data$TMRCLA>=parametro["TMRCLA","Trip"],"Trip","Alarm"),"Ok")
data$TMA1LA=ifelse(data$TMA1LA>=80,ifelse(data$TMA1LA>=85,"Trip","Alarm"),"Ok")
data$TMA2LA=ifelse(data$TMA2LA>=80,ifelse(data$TMA2LA>=85,"Trip","Alarm"),"Ok")
data$TACCLA=ifelse(data$TACCLA>=55,ifelse(data$TACCLA>=60,"Trip","Alarm"),"Ok")
data$VEACLA=ifelse(data$VEACLA>=2.5,ifelse(data$VEACLA>=3.5,"Trip","Alarm"),"Ok")
data$VERCLA=ifelse(data$VERCLA>=2.5,ifelse(data$VERCLA>=3.5,"Trip","Alarm"),"Ok")
data$TMRCLOA=ifelse(data$TMRCLOA>=80,ifelse(data$TMRCLOA>=85,"Trip","Alarm"),"Ok")
data$TACCLOA=ifelse(data$TACCLOA>=60,ifelse(data$TACCLOA>=65,"Trip","Alarm"),"Ok")
data$VER=ifelse(data$VER>=2.5,ifelse(data$VER>=3.5,"Trip","Alarm"),"Ok")
data$TTA=ifelse(data$TTA>=55,ifelse(data$TTA>=60,"Trip","Alarm"),"Ok")
data$PAA=ifelse(data$PAA<=130,ifelse(data$PAA<=127,"Low Trip","Low Alarm"),ifelse(data$PAA>=150,ifelse(data$PAA>=157,"High Trip","Alarm High"),"Ok"))
data$TAaT=ifelse(data$TAaT>=50,ifelse(data$TAaT>=54,"Trip","Alarm"),"Ok")
data$TAaC=ifelse(data$TAaC>=42,ifelse(data$TAaC>=48,"Trip","Alarm"),"Ok")
head(data)
## # A tibble: 6 x 13
## TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 2 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 3 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 4 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 5 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 6 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## # ... with 3 more variables: PAA <chr>, TAaT <chr>, TAaC <chr>
a=3
4) evaluacion de eventos registrados con fallas y sin fallas
data$evento=ifelse(data$TMRCLA=="Trip"|data$TMA1LA=="Trip"| data$TMA2LA=="Trip"| data$TACCLA=="Trip"| data$VEACLA=="Trip"| data$VERCLA=="Trip"| data$TMRCLOA=="Trip" | data$TACCLOA=="Trip"| data$VER=="Trip"| data$TTA=="Trip"| data$PAA=="Low Trip"|data$PAA=="High Trip"| data$TAaT=="Trip"| data$TAaC=="Trip", "EN FALLA","SIN FALLA")
Trips=data[data$evento=="EN FALLA",]
Ok<-data[data$evento=="SIN FALLA",-14]
Trips$evento<=NULL
## logical(0)
head(data)
## # A tibble: 6 x 14
## TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 2 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 3 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 4 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 5 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## 6 Ok Ok Ok Ok Ok Ok Ok Ok Ok Ok
## # ... with 4 more variables: PAA <chr>, TAaT <chr>, TAaC <chr>,
## # evento <chr>
head(Trips[,1:14])
## # A tibble: 6 x 14
## TMRCLA TMA1LA TMA2LA TACCLA VEACLA VERCLA TMRCLOA TACCLOA VER TTA
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Ok Ok Alarm Ok Alarm Ok Ok Ok Trip Alarm
## 2 Alarm Trip Trip Ok Ok Ok Ok Ok Ok Ok
## 3 Ok Alarm Alarm Alarm Ok Ok Ok Alarm Ok Ok
## 4 Ok Ok Ok Ok Ok Ok Ok Ok Ok Alarm
## 5 Ok Alarm Alarm Ok Ok Ok Trip Ok Alarm Ok
## 6 Alarm Alarm Alarm Alarm Ok Ok Ok Alarm Ok Ok
## # ... with 4 more variables: PAA <chr>, TAaT <chr>, TAaC <chr>,
## # evento <chr>
barplot(prop.table(table(Trips$PAA)), col = c("blue","green","orange","red"))

write.csv(Trips,"Trips.cvs")
write.csv(Ok,"Ok.cvs")
write.csv(data,"Datos discretos.cvs")
a<-1
5) Normalizacion de los Estados
Tabla<- Trips
Tabla$TMRCLAok<- ifelse(Trips$TMRCLA == "Ok", 1, 0)
Tabla$TMRCLAAlarm<- ifelse(Trips$TMRCLA == "Alarm", 1, 0)
Tabla$TMRCLATrip<- ifelse(Trips$TMRCLA == "Trip", 1, 0)
Tabla$TMA1LAok<- ifelse(Trips$TMA1LA == "Ok", 1, 0)
Tabla$TMA1LAAlarm<- ifelse(Trips$TMA1LA == "Alarm", 1, 0)
Tabla$TMA1LATrip<- ifelse(Trips$TMA1LA == "Trip", 1, 0)
Tabla$TMA2LAok<- ifelse(Trips$TMA2LA == "Ok", 1, 0)
Tabla$TMA2LAAlarm<- ifelse(Trips$TMA2LA == "Alarm", 1, 0)
Tabla$TMA2LATrip<- ifelse(Trips$TMA2LA == "Trip", 1, 0)
Tabla$TACCLAok<- ifelse(Trips$TACCLA == "Ok", 1, 0)
Tabla$TACCLAAlarm<- ifelse(Trips$TACCLA == "Alarm", 1, 0)
Tabla$TACCLATrip<- ifelse(Trips$TACCLA == "Trip", 1, 0)
Tabla$VEACLAok <- ifelse(Trips$VEACLA == "Ok", 1, 0)
Tabla$VEACLAAlarm<- ifelse(Trips$VEACLA == "Alarm", 1, 0)
Tabla$VEACLATrip<- ifelse(Trips$VEACLA == "Trip", 1, 0)
Tabla$VERCLAok <- ifelse(Trips$VERCLA == "Ok", 1, 0)
Tabla$VERCLAAlarm<- ifelse(Trips$VERCLA == "Alarm", 1, 0)
Tabla$VERCLATrip<- ifelse(Trips$VERCLA == "Trip", 1, 0)
Tabla$TMRCLOAok <- ifelse(Trips$TMRCLOA == "Ok", 1, 0)
Tabla$TMRCLOAAlarm<- ifelse(Trips$TMRCLOA == "Alarm", 1, 0)
Tabla$TMRCLOATrip<- ifelse(Trips$TMRCLOA == "Trip", 1, 0)
Tabla$TACCLOAok <- ifelse(Trips$TACCLOA == "Ok", 1, 0)
Tabla$TACCLOAAlarm<- ifelse(Trips$TACCLOA == "Alarm", 1, 0)
Tabla$TACCLOATrip<- ifelse(Trips$TACCLOA == "Trip", 1, 0)
Tabla$VERok <- ifelse(Trips$VER == "Ok", 1, 0)
Tabla$VERAlarm<- ifelse(Trips$VER == "Alarm", 1, 0)
Tabla$VERTrip<- ifelse(Trips$VER == "Trip", 1, 0)
Tabla$TTAok <- ifelse(Trips$TTA == "Ok", 1, 0)
Tabla$TTAAlarm<- ifelse(Trips$TTA == "Alarm", 1, 0)
Tabla$TTATrip<- ifelse(Trips$TTA == "Trip", 1, 0)
Tabla$PAAok <- ifelse(Trips$PAA == "Ok", 1, 0)
Tabla$PAAAlarmB<- ifelse(Trips$PAA == "Low Alarm", 1, 0)
Tabla$PAAAlarmA<- ifelse(Trips$PAA == "High Alarm", 1, 0)
Tabla$PAATripB<- ifelse(Trips$PAA == "Low Trip", 1, 0)
Tabla$PAATripA<- ifelse(Trips$PAA == "High Trip", 1, 0)
Tabla$TAaTok <- ifelse(Trips$TAaT == "Ok", 1, 0)
Tabla$TAaTAlarm<- ifelse(Trips$TAaT == "Alarm", 1, 0)
Tabla$TAaTTrip<- ifelse(Trips$TAaT == "Trip", 1, 0)
Tabla$TAaCok <- ifelse(Trips$TAaC == "Ok", 1, 0)
Tabla$TAaCAlarm<- ifelse(Trips$TAaC == "Alarm", 1, 0)
Tabla$TAaCTrip<- ifelse(Trips$TAaC == "Trip", 1, 0)
Datos_para_cluster<- Tabla[, -c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14)]
head(Datos_para_cluster[,c(31:35)])
## # A tibble: 6 x 5
## PAAok PAAAlarmB PAAAlarmA PAATripB PAATripA
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0 0 0 0
## 2 0 1 0 0 0
## 3 1 0 0 0 0
## 4 0 0 0 0 0
## 5 0 1 0 0 0
## 6 1 0 0 0 0
a<-1
5) Determinar Numero optimo de Cluster
library(ggplot2)
sumbt<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$betweenss
sumbt2<- kmeans(Datos_para_cluster, centers = 9, iter.max = 50)$tot.withinss
for(i in 1:15) sumbt[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$betweenss
for(i in 1:15) sumbt2[i]<- kmeans(Datos_para_cluster, centers = i, nstart = 50)$tot.withinss
## Warning: did not converge in 10 iterations
plot(1:15, sumbt, type="o", col="blue", lwd=1, main="Optimal Number of Cluster", xlab="Number of cluster", ylab="Distance",las=1, col.axis="black")
lines(1:15, sumbt2 ,type="o", col="green", lwd=1)
legend("bottomleft",col=c("blue","green"),legend =c("Betweenss cluster","Withinss cluster"), lwd=2, bty = "n", inset = 0.6)
points(1:15, sumbt, pch = 21, bg = "white")
points(1:15, sumbt2, pch = 21, bg = "white")
abline(v = 8, col="red", lwd=2, lty=2)

a<-1
6) Cluster Jerarquico
library(ggdendro)
## Warning: package 'ggdendro' was built under R version 3.5.3
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 3.5.2
library(dendextend)
## Warning: package 'dendextend' was built under R version 3.5.3
##
## ---------------------
## Welcome to dendextend version 1.12.0
## Type citation('dendextend') for how to cite the package.
##
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
##
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## Or contact: <tal.galili@gmail.com>
##
## To suppress this message use: suppressPackageStartupMessages(library(dendextend))
## ---------------------
##
## Attaching package: 'dendextend'
## The following object is masked from 'package:ggdendro':
##
## theme_dendro
## The following object is masked from 'package:ggpubr':
##
## rotate
## The following object is masked from 'package:rpart':
##
## prune
## The following object is masked from 'package:stats':
##
## cutree
library(factoextra)
## Warning: package 'factoextra' was built under R version 3.5.3
## Welcome! Related Books: `Practical Guide To Cluster Analysis in R` at https://goo.gl/13EFCZ
library(grDevices)
library(cluster)
## Warning: package 'cluster' was built under R version 3.5.3
library(vegan)
## Warning: package 'vegan' was built under R version 3.5.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 3.5.3
##
## Attaching package: 'permute'
## The following object is masked from 'package:dendextend':
##
## shuffle
## This is vegan 2.5-6
##
## Attaching package: 'vegan'
## The following object is masked from 'package:caret':
##
## tolerance
nk<-8
dist<-vegdist(Datos_para_cluster,method = 'jaccard')
# metodos de agrupamiento evaluados:
# ward.D
# single
# complete
# average
# ward.D2
h<- hcut(dist, k=nk, stand = TRUE, hc_func ="hclust", hc_method = "ward.D2")
a<-1
#Cluster en 2 dimensiones
plot(fviz_cluster(h, geom = 'point', show.clust.cent = TRUE, main = "Distance function Jaccard, grouping method Ward. D2",pointsize = 1, xlab = "Dimension 1", ylab = "Dimension 2"))

#Cluster Jerarquico
plot(fviz_dend(h, rect = TRUE, k = nk, show_labels = FALSE,ylab = 'Distance', xlab ="Clusters", lwd = 0.1, main = "Distance function Jaccard, grouping method Ward. D2"))

a<-1
7) Asignar Cluster a registro
set.seed(80)
#distancia_scalada<- dist(scalada, method = 'euclidean')
#cluster<- hcut(distancia_scalada, k=9, stand = TRUE, hc_func ="agnes", hc_method = "complete")
Trips$cluster <- h$cluster
a<-1
8) Organizar Datos despues del cluster y asignar etiquetas
cluster1<- Trips[ h$cluster==1,]
cluster2<- Trips[ h$cluster==2,]
cluster3<- Trips[ h$cluster==3,]
cluster4<- Trips[ h$cluster==4,]
cluster5<- Trips[ h$cluster==5,]
cluster6<- Trips[ h$cluster==6,]
cluster7<- Trips[ h$cluster==7,]
cluster8<- Trips[ h$cluster==8,]
datos_listos<- Trips[-c(14)]
datos_listos$Caso<- ifelse(h$cluster==1, "Caso 1",
ifelse(h$cluster==2, "Caso 2",
ifelse(h$cluster==3, "Caso 3",
ifelse(h$cluster==4, "Caso 4",
ifelse(h$cluster==5, "Caso 5",
ifelse(h$cluster==6, "Caso 6",
ifelse(h$cluster==7, "Caso 7",
ifelse(h$cluster==8, "Caso 8",NaN))))))))
# Se borra la columna 14 (Cluster)
datos_listos<-datos_listos[,-14]
kable( datos_listos[c(1:5),])
| Ok |
Ok |
Alarm |
Ok |
Alarm |
Ok |
Ok |
Ok |
Trip |
Alarm |
Ok |
Ok |
Ok |
Caso 1 |
| Alarm |
Trip |
Trip |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Low Alarm |
Ok |
Ok |
Caso 2 |
| Ok |
Alarm |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Alarm |
Ok |
Ok |
Ok |
Ok |
Trip |
Caso 3 |
| Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Ok |
Alarm |
Alarm High |
Trip |
Alarm |
Caso 4 |
| Ok |
Alarm |
Alarm |
Ok |
Ok |
Ok |
Trip |
Ok |
Alarm |
Ok |
Low Alarm |
Ok |
Ok |
Caso 5 |
a<-1
SF<-Ok
#SF<-unique(Ok)
SF$cluster<-9
SF$Caso<-"OK"
##Quitar datos duplicados
cluster1<-unique(cluster1)
cluster2<-unique(cluster2)
cluster3<-unique(cluster3)
cluster4<-unique(cluster4)
cluster5<-unique(cluster5)
cluster6<-unique(cluster6)
cluster7<-unique(cluster7)
cluster8<-unique(cluster8)
9) Preparar Set de entrenamiento y test para la red Bayesiana
n=nrow(SF)
seleccion<- sample(n, floor(0.4*n))
Test_SF<-SF[seleccion, ]
Train_SF<-SF[-c(seleccion), ]
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 1
n=nrow(cluster1)
seleccion<-sample(n, floor(0.4*n))
Test_C1<- cluster1[seleccion,-14]
Test_C1$Caso<- "Caso 1"
Train_C1<- cluster1[-c(seleccion),-14]
Train_C1$Caso<- "Caso 1"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 2
n=nrow(cluster2)
seleccion<-sample(n, floor(0.4*n))
Test_C2<- cluster2[seleccion,-14]
Test_C2$Caso<- "Caso 2"
Train_C2<- cluster2[-c(seleccion),-14]
Train_C2$Caso<- "Caso 2"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 3
n=nrow(cluster3)
seleccion<-sample(n, floor(0.4*n))
Test_C3<- cluster3[seleccion,-14]
Test_C3$Caso<- "Caso 3"
Train_C3<- cluster3[-c(seleccion),-14]
Train_C3$Caso<- "Caso 3"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 4
n=nrow(cluster4)
seleccion<-sample(n, floor(0.4*n))
Test_C4<- cluster4[seleccion,-14]
Test_C4$Caso<- "Caso 4"
Train_C4<- cluster4[-c(seleccion),-14]
Train_C4$Caso<- "Caso 4"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 5
n=nrow(cluster5)
seleccion<-sample(n, floor(0.4*n))
Test_C5<- cluster5[seleccion,-14]
Test_C5$Caso<- "Caso 5"
Train_C5<- cluster5[-c(seleccion),-14]
Train_C5$Caso<- "Caso 5"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 6
n=nrow(cluster6)
seleccion<-sample(n, floor(0.4*n))
Test_C6<- cluster6[seleccion,-14]
Test_C6$Caso<- "Caso 6"
Train_C6<- cluster6[-c(seleccion),-14]
Train_C6$Caso<- "Caso 6"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 7
n=nrow(cluster7)
seleccion<-sample(n, floor(0.4*n))
Test_C7<- cluster7[seleccion,-14]
Test_C7$Caso<- "Caso 7"
Train_C7<- cluster7[-c(seleccion),-14]
Train_C7$Caso<- "Caso 7"
# Se crea una particion aleatoria del 30% para test y 70% para entrenamiento a partir de los datos del cluster 8
n=nrow(cluster8)
seleccion<-sample(n, floor(0.4*n))
Test_C8<- cluster8[seleccion,-14]
Test_C8$Caso<- "Caso 8"
Train_C8<- cluster8[-c(seleccion),-14]
Train_C8$Caso<- "Caso 8"
#se agregan las particiones creadas al dataframe Train y Test
Test<- rbind(Test_C1,Test_C2)
Test<- rbind(Test,Test_C3)
Test<- rbind(Test,Test_C4)
Test<- rbind(Test,Test_C5)
Test<- rbind(Test,Test_C6)
Test<- rbind(Test,Test_C7)
Test<- rbind(Test,Test_C8)
Test<- rbind(Test,Test_SF)
Test<- unique(Test)
Train<- rbind(Train_C1,Train_C2)
Train<- rbind(Train,Train_C3)
Train<- rbind(Train,Train_C4)
Train<- rbind(Train,Train_C5)
Train<- rbind(Train,Train_C6)
Train<- rbind(Train,Train_C7)
Train<- rbind(Train,Train_C8)
Train<- rbind(Train,Train_SF)
Test<-Test[,-14]
Train<-Train[,-14]
write.csv(Train,"Train.cvs")
a<-1
10) Crear modelo de Naive Bayes
modelo <- naive_bayes(Caso ~ ., data = Train, laplace = TRUE)
kable( modelo$prior)
| Caso 1 |
0.0014529 |
| Caso 2 |
0.0017171 |
| Caso 3 |
0.0009246 |
| Caso 4 |
0.0010567 |
| Caso 5 |
0.0018492 |
| Caso 6 |
0.0009246 |
| Caso 7 |
0.0010567 |
| Caso 8 |
0.0007925 |
| OK |
0.9902259 |
kable(modelo$tables$TMRCLA)
| Alarm |
0.1538462 |
0.4666667 |
0.8888889 |
0.1 |
0.0625 |
0.1111111 |
0.6 |
0.5 |
0.0012002 |
| Ok |
0.8461538 |
0.5333333 |
0.1111111 |
0.9 |
0.9375 |
0.8888889 |
0.4 |
0.5 |
0.9987998 |
kable(modelo$tables$TMA1LA)
| Alarm |
0.5714286 |
0.0625 |
0.8 |
0.0909091 |
0.5294118 |
0.1 |
0.7272727 |
0.7777778 |
0.0012000 |
| Ok |
0.3571429 |
0.0625 |
0.1 |
0.8181818 |
0.4117647 |
0.8 |
0.1818182 |
0.1111111 |
0.9986667 |
| Trip |
0.0714286 |
0.8750 |
0.1 |
0.0909091 |
0.0588235 |
0.1 |
0.0909091 |
0.1111111 |
0.0001333 |
kable(modelo$tables$TMA2LA)
| Alarm |
0.5000000 |
0.4375 |
0.7 |
0.2727273 |
0.4117647 |
0.1 |
0.5454545 |
0.5555556 |
0.0012000 |
| Ok |
0.4285714 |
0.1250 |
0.2 |
0.6363636 |
0.5294118 |
0.8 |
0.3636364 |
0.3333333 |
0.9986667 |
| Trip |
0.0714286 |
0.4375 |
0.1 |
0.0909091 |
0.0588235 |
0.1 |
0.0909091 |
0.1111111 |
0.0001333 |
kable(modelo$tables$TACCLA)
| Alarm |
0.1538462 |
0.6666667 |
0.6666667 |
0.1 |
0.0625 |
0.1111111 |
0.9 |
0.125 |
0.0009335 |
| Ok |
0.8461538 |
0.3333333 |
0.3333333 |
0.9 |
0.9375 |
0.8888889 |
0.1 |
0.875 |
0.9990665 |
kable(modelo$tables$VEACLA)
| Alarm |
0.9230769 |
0.5333333 |
0.2222222 |
0.1 |
0.75 |
0.5555556 |
0.7 |
0.125 |
0.0008001 |
| Ok |
0.0769231 |
0.4666667 |
0.7777778 |
0.9 |
0.25 |
0.4444444 |
0.3 |
0.875 |
0.9991999 |
kable(modelo$tables$VERCLA)
| Alarm |
0.4615385 |
0.0666667 |
0.1111111 |
0.1 |
0.0625 |
0.5555556 |
0.9 |
0.125 |
0.0140019 |
| Ok |
0.5384615 |
0.9333333 |
0.8888889 |
0.9 |
0.9375 |
0.4444444 |
0.1 |
0.875 |
0.9859981 |
kable(modelo$tables$TMRCLOA)
| Alarm |
0.0714286 |
0.0625 |
0.1 |
0.1818182 |
0.0588235 |
0.1 |
0.3636364 |
0.5555556 |
0.0001333 |
| Ok |
0.8571429 |
0.8750 |
0.8 |
0.7272727 |
0.0588235 |
0.8 |
0.5454545 |
0.3333333 |
0.9997333 |
| Trip |
0.0714286 |
0.0625 |
0.1 |
0.0909091 |
0.8823529 |
0.1 |
0.0909091 |
0.1111111 |
0.0001333 |
kable(modelo$tables$TACCLOA)
| Alarm |
0.0769231 |
0.0666667 |
0.8888889 |
0.1 |
0.6875 |
0.1111111 |
0.7 |
0.5 |
0.0009335 |
| Ok |
0.9230769 |
0.9333333 |
0.1111111 |
0.9 |
0.3125 |
0.8888889 |
0.3 |
0.5 |
0.9990665 |
kable(modelo$tables$VER)
| Alarm |
0.0714286 |
0.0625 |
0.2 |
0.0909091 |
0.5294118 |
0.6 |
0.8181818 |
0.1111111 |
0.0020000 |
| Ok |
0.0714286 |
0.8750 |
0.7 |
0.8181818 |
0.4117647 |
0.3 |
0.0909091 |
0.7777778 |
0.9978667 |
| Trip |
0.8571429 |
0.0625 |
0.1 |
0.0909091 |
0.0588235 |
0.1 |
0.0909091 |
0.1111111 |
0.0001333 |
kable(modelo$tables$TTA)
| Alarm |
0.6153846 |
0.0666667 |
0.1111111 |
0.4 |
0.0625 |
0.6666667 |
0.1 |
0.125 |
0.0005334 |
| Ok |
0.3846154 |
0.9333333 |
0.8888889 |
0.6 |
0.9375 |
0.3333333 |
0.9 |
0.875 |
0.9994666 |
kable(modelo$tables$PAA)
| Alarm High |
0.0625 |
0.0555556 |
0.0833333 |
0.3076923 |
0.0526316 |
0.0833333 |
0.0769231 |
0.0909091 |
0.0001333 |
| High Trip |
0.0625 |
0.0555556 |
0.0833333 |
0.0769231 |
0.0526316 |
0.0833333 |
0.6923077 |
0.0909091 |
0.0001333 |
| Low Alarm |
0.0625 |
0.5000000 |
0.0833333 |
0.0769231 |
0.7894737 |
0.0833333 |
0.0769231 |
0.0909091 |
0.0021328 |
| Low Trip |
0.0625 |
0.0555556 |
0.0833333 |
0.0769231 |
0.0526316 |
0.6666667 |
0.0769231 |
0.6363636 |
0.0001333 |
| Ok |
0.7500 |
0.3333333 |
0.6666667 |
0.4615385 |
0.0526316 |
0.0833333 |
0.0769231 |
0.0909091 |
0.9974673 |
kable(modelo$tables$TAaT)
| Alarm |
0.0714286 |
0.0625 |
0.5 |
0.0909091 |
0.0588235 |
0.8 |
0.0909091 |
0.7777778 |
0.0162667 |
| Ok |
0.8571429 |
0.8750 |
0.4 |
0.0909091 |
0.8823529 |
0.1 |
0.8181818 |
0.1111111 |
0.9836000 |
| Trip |
0.0714286 |
0.0625 |
0.1 |
0.8181818 |
0.0588235 |
0.1 |
0.0909091 |
0.1111111 |
0.0001333 |
kable(modelo$tables$TAaC)
| Alarm |
0.0714286 |
0.0625 |
0.1 |
0.4545455 |
0.0588235 |
0.1 |
0.0909091 |
0.1111111 |
0.0086667 |
| Ok |
0.8571429 |
0.8750 |
0.1 |
0.4545455 |
0.8823529 |
0.8 |
0.8181818 |
0.7777778 |
0.9912000 |
| Trip |
0.0714286 |
0.0625 |
0.8 |
0.0909091 |
0.0588235 |
0.1 |
0.0909091 |
0.1111111 |
0.0001333 |
a<-4
11) Prueba completa del set de test
como el sistema de identificacion de fallas funciono correctamente con una observacion del set de Test se procede a evaluar la totalidad de dicho set
set.seed(800)
pred <- predict(modelo, Test, threshold = 100)
pred
## [1] Caso 1 Caso 1 Caso 1 Caso 1 Caso 1 Caso 1 Caso 2 Caso 2 Caso 2 Caso 2
## [11] Caso 2 Caso 2 Caso 2 Caso 2 Caso 3 Caso 3 Caso 3 Caso 3 Caso 6 Caso 4
## [21] Caso 4 Caso 4 Caso 4 Caso 5 Caso 5 Caso 5 Caso 5 Caso 5 Caso 5 Caso 5
## [31] Caso 5 Caso 6 Caso 6 Caso 6 Caso 6 Caso 7 Caso 7 Caso 7 Caso 7 Caso 7
## [41] Caso 8 Caso 8 Caso 8 Caso 8 OK OK OK Caso 6 OK OK
## [51] Caso 1 Caso 3 OK OK OK OK Caso 6 OK
## Levels: Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
tab <- table(Test$Caso, pred, dnn = c("Actual", "Predicha"))
kable(tab)
| Caso 1 |
6 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| Caso 2 |
0 |
8 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
| Caso 3 |
0 |
0 |
4 |
0 |
0 |
0 |
0 |
0 |
0 |
| Caso 4 |
0 |
0 |
0 |
4 |
0 |
1 |
0 |
0 |
0 |
| Caso 5 |
0 |
0 |
0 |
0 |
8 |
0 |
0 |
0 |
0 |
| Caso 6 |
0 |
0 |
0 |
0 |
0 |
4 |
0 |
0 |
0 |
| Caso 7 |
0 |
0 |
0 |
0 |
0 |
0 |
5 |
0 |
0 |
| Caso 8 |
0 |
0 |
0 |
0 |
0 |
0 |
0 |
4 |
0 |
| OK |
1 |
0 |
1 |
0 |
0 |
2 |
0 |
0 |
10 |
a<-confusionMatrix(tab)
print(a)
## Confusion Matrix and Statistics
##
## Predicha
## Actual Caso 1 Caso 2 Caso 3 Caso 4 Caso 5 Caso 6 Caso 7 Caso 8 OK
## Caso 1 6 0 0 0 0 0 0 0 0
## Caso 2 0 8 0 0 0 0 0 0 0
## Caso 3 0 0 4 0 0 0 0 0 0
## Caso 4 0 0 0 4 0 1 0 0 0
## Caso 5 0 0 0 0 8 0 0 0 0
## Caso 6 0 0 0 0 0 4 0 0 0
## Caso 7 0 0 0 0 0 0 5 0 0
## Caso 8 0 0 0 0 0 0 0 4 0
## OK 1 0 1 0 0 2 0 0 10
##
## Overall Statistics
##
## Accuracy : 0.9138
## 95% CI : (0.8102, 0.9714)
## No Information Rate : 0.1724
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9015
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Caso 1 Class: Caso 2 Class: Caso 3
## Sensitivity 0.8571 1.0000 0.80000
## Specificity 1.0000 1.0000 1.00000
## Pos Pred Value 1.0000 1.0000 1.00000
## Neg Pred Value 0.9808 1.0000 0.98148
## Prevalence 0.1207 0.1379 0.08621
## Detection Rate 0.1034 0.1379 0.06897
## Detection Prevalence 0.1034 0.1379 0.06897
## Balanced Accuracy 0.9286 1.0000 0.90000
## Class: Caso 4 Class: Caso 5 Class: Caso 6
## Sensitivity 1.00000 1.0000 0.57143
## Specificity 0.98148 1.0000 1.00000
## Pos Pred Value 0.80000 1.0000 1.00000
## Neg Pred Value 1.00000 1.0000 0.94444
## Prevalence 0.06897 0.1379 0.12069
## Detection Rate 0.06897 0.1379 0.06897
## Detection Prevalence 0.08621 0.1379 0.06897
## Balanced Accuracy 0.99074 1.0000 0.78571
## Class: Caso 7 Class: Caso 8 Class: OK
## Sensitivity 1.00000 1.00000 1.0000
## Specificity 1.00000 1.00000 0.9167
## Pos Pred Value 1.00000 1.00000 0.7143
## Neg Pred Value 1.00000 1.00000 1.0000
## Prevalence 0.08621 0.06897 0.1724
## Detection Rate 0.08621 0.06897 0.1724
## Detection Prevalence 0.08621 0.06897 0.2414
## Balanced Accuracy 1.00000 1.00000 0.9583
barplot(prop.table(table(Test$Caso)), col = c("blue","green","orange","red"))

barplot(prop.table(table(Train$Caso)), col = c("blue","green","orange","red"))

barplot(prop.table(table(datos_listos$Caso)), col = c("blue","green","orange","red"))

library(ROCR)
Train__C1<- Train
Train__C1$Caso<- ifelse(Train__C1$Caso!="Caso 1","No","Yes")
Test__C1<- Test
Test__C1$Caso<- ifelse(Test__C1$Caso!="Caso 1","No","Yes")
mod_c1<- naive_bayes(Caso ~ ., data =Train__C1, method="class")
pred_c1<-predict(mod_c1, Test__C1, type = "prob", threshold = 0.1)[,2]
predic_C1<- prediction(pred_c1, Test__C1$Caso)
per<- performance(pred_c1, "tpr", "fpr")
library(rpart) # para arbol decision
library(rattle) # para data set, y arbol decision
library(ROCR) # para curva ROC
datos <- weather
datos <- within(datos, rm("Date","Location","RISK_MM")) #borra dummy
set.seed(42) # fija la secuencia de numeros aleatorios
sampleTrain <- sample(nrow(datos),(nrow(datos)*.7))
Train <- Train__C1
Test <- Test__C1
# MODELO
#------------------------------------------------------------------------------
modelo.rpart <- rpart(Caso ~ .,Train, method="class")
# PREDICCION
#------------------------------------------------------------------------------
predict.rpart <- predict(modelo.rpart,Test)[,2] #prob. clase=yes
predict.rocr <- prediction (predict.rpart,Test$Caso)
perf.rocr <- performance(predict.rocr,"tpr","fpr") #True y False postivie.rate
# GRAFICO CURVA ROC
#------------------------------------------------------------------------------
auc <- as.numeric(performance(predict.rocr ,"auc")@y.values)
plot(perf.rocr,type='o', main = paste('Area Bajo la Curva =',round(auc,2)))
abline(a=0, b= 1)
# GRAFICO ARBOL DECISION
#------------------------------------------------------------------------------
fancyRpartPlot(modelo.rpart)