library(readr)
dataset <- read_csv("classification-results.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## dataset = col_character(),
## var = col_character(),
## config.train = col_character(),
## config.vars = col_character(),
## T = col_integer(),
## alg = col_character()
## )
## See spec(...) for full column specifications.
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
dataset[,8:ncol(dataset)] <- round(dataset[,8:ncol(dataset)],digits = 3)
Creación de dataset genérico para resultados clasificación
df <- dataset %>%
unite(dataset,
col=label,c("dataset","var","config.train","config.vars","T","alg"),
sep = "-",remove=FALSE) %>%
select(label,dataset,var,config.train,config.vars,T,alg,FAR,Recall,Specificity,Accuracy,Kappa,F1,Precision)
Valores posibles en la columna vars Son las estaciones meteorológicas
print(unique(dataset$var))
## [1] "junin.temp_min" "tunuyan.temp_min" "las_paredes.temp_min"
## [4] "la_llave.temp_min" "agua_amarga.temp_min"
Caso columna o dato que evalue si colabora o no el enfoque agregar vecinos.
El dataset del que hacemos la resta es la siguiente:
df %>%
select(dataset,var,config.train,config.vars,T,alg,FAR) %>%
filter(dataset == "dacc" & var == "junin.temp_min" ) %>% # & alg=="glm" & T == 1 ) %>%
group_by(dataset,var,config.train,T,alg) %>%
arrange(dataset,var,config.train,T,alg) %>%
mutate(local_vs_all = lag(FAR) - FAR)
## # A tibble: 40 x 8
## # Groups: dataset, var, config.train, T, alg [20]
## dataset var config.train config.vars T alg FAR local_vs_all
## <chr> <chr> <chr> <chr> <int> <chr> <dbl> <dbl>
## 1 dacc junin.… normal all 1 C5.0 0.23 NA
## 2 dacc junin.… normal local 1 C5.0 0.2 0.03
## 3 dacc junin.… normal all 1 glm 0.04 NA
## 4 dacc junin.… normal local 1 glm 0.15 -0.110
## 5 dacc junin.… normal all 1 rf 0.16 NA
## 6 dacc junin.… normal local 1 rf 0.15 0.01
## 7 dacc junin.… normal all 1 rpart 0 NA
## 8 dacc junin.… normal local 1 rpart 0.18 -0.18
## 9 dacc junin.… normal all 2 C5.0 0.28 NA
## 10 dacc junin.… normal local 2 C5.0 0.21 0.07
## # ... with 30 more rows
aplico los filtros que quiera graficar/consultar y el orden
df1 <- df %>%
select(dataset,var,config.train,config.vars,T,alg,FAR) %>%
filter(dataset == "dacc" & var == "junin.temp_min" ) %>% # & alg=="glm" & T == 1 ) %>%
group_by(dataset,var,config.train,T,alg) %>%
arrange(dataset,var,config.train,T,alg) %>%
mutate(local_vs_all = lag(FAR) - FAR) %>%
filter(!is.na(local_vs_all)) %>%
unite(col=label,c("dataset","var","config.train","T","alg"),
sep = "-",remove=FALSE)
ggplot(data=df1, aes(x= reorder(label,-local_vs_all), y=local_vs_all)) +
geom_bar(stat="identity",fill="green")+
geom_text(aes(label=local_vs_all), vjust=1.3, color="black", size=3) +
coord_flip()+
theme_minimal() +
labs(x = "Models", y = "local_vs_all_segun_FAR")
MOSTRAR RANKING POR recall, precision, F1, sensitivity, etc
df2 <- df %>%
filter(dataset == "dacc" & var == "junin.temp_min" ) %>% # & alg=="glm" & T == 1 ) %>%
arrange(FAR)
Donde dice FAR se puede cambiar por: Recall,Specificity,Accuracy,Kappa,F1,Precision
ggplot(data=df2, aes(x= reorder(label,-FAR), y=FAR, color=config.vars)) +
geom_bar(stat="identity",fill="white")+
geom_text(aes(label=FAR), vjust=1.3, color="black", size=3) +
coord_flip()+
theme_minimal() +
labs(x = "Models", y = "FAR", title="FAR")
ggplot(data=df2, aes(x= reorder(label,Recall), y=Recall, color=config.vars)) +
geom_bar(stat="identity",fill="white")+
geom_text(aes(label=Recall), vjust=1.3, color="black", size=3) +
coord_flip()+
theme_minimal() +
labs(x = "Models", y = "Recall", title="Recall")
Comparativas de sensitivity, precision, F1,
df3 <- df %>%
select(label,dataset,var,config.train,config.vars,T,alg,Recall,Specificity,Accuracy,Kappa,F1,Precision) %>%
filter(dataset == "dacc" & var == "junin.temp_min" & alg=="glm") # & T == 1 ) %>%
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
df3 <- melt(df3,id=(c("label","dataset","var","config.train","config.vars","T","alg")))
ggplot(data=df3, aes(x= reorder(label,value), y=value, fill=variable)) +
geom_bar(stat="identity",position=position_dodge())+
coord_flip()+
theme(legend.position="bottom") +
labs(x = "Logistic Regression Models", y = "metricas", title="comparacion")