Resultados clasificación

library(readr)
library(ggplot2)
library(gridExtra)
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble  1.4.2     ✔ dplyr   0.7.6
## ✔ tidyr   0.8.1     ✔ stringr 1.3.1
## ✔ purrr   0.2.5     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
library(lazyeval)
## 
## Attaching package: 'lazyeval'
## The following objects are masked from 'package:purrr':
## 
##     is_atomic, is_formula
dataset <- read_csv("classification-results.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   dataset = col_character(),
##   var = col_character(),
##   config.train = col_character(),
##   config.vars = col_character(),
##   T = col_integer(),
##   alg = col_character()
## )
## See spec(...) for full column specifications.
dataset[,8:ncol(dataset)] <- round(dataset[,8:ncol(dataset)],digits = 3)

ESTACION: valores posibles en la columna vars. Son las estaciones meteorológicas

print(unique(dataset$var))
## [1] "tunuyan.temp_min"     "la_llave.temp_min"    "junin.temp_min"      
## [4] "las_paredes.temp_min" "agua_amarga.temp_min"

DATASET: valores posibles “dacc”, faltan casos de “dacc-temp”,“dacc-spring”

Caso columna o dato que evalue si colabora o no el enfoque agregar vecinos.

ESTACION <- "junin.temp_min"
DATASET <- "dacc"
ALGORITMO <- "glm"

stations <- unique(dataset$var)
algoritmos <- unique(dataset$alg)
metrics <- c("FAR","Sensitivity","Specificity","Accuracy","Kappa","F1","Precision")

Carga dataset

Creación de dataset genérico para resultados clasificación

df <- dataset %>%
  unite(dataset, 
        col=label,c("dataset","var","config.train","config.vars","T","alg"),
        sep = "-",remove=FALSE) %>% 
  select(label,dataset,var,config.train,config.vars,T,alg,FAR,Sensitivity,Specificity,Accuracy,Kappa,F1,Precision) 

Gráficos barras comparativos por recall, precision, F1, sensitivity, etc, indicando local - all

plot_ranking_alg <- function(df,m,s,alg)
{
  df2 <- df 
  # renombrar columna con la métrica
  colnames(df2)[which(colnames(df2) == m)] <- c("metric")
  
  p <- ggplot(data=df2, aes(x= reorder(label,metric), y=metric, 
                       color=config.vars)) +
    geom_bar(stat="identity",fill="white")+
    geom_text(aes(label=metric), vjust=1.3, color="black", size=3) +
    coord_flip()+
    theme_minimal() +
    labs(x = "Models",y=m, title=paste(s,alg,m,sep="--"))
  print(p)
  #plot(p)
  
}

Comparativas de sensitivity, precision, F1,

’ ## MOSTRAR RANKING POR recall, precision, F1, sensitivity, etc

plot_comparativo <- function(df,s,alg)
{
 
    df3 <- melt(df,id=(c("label","dataset","var","config.train","config.vars","T","alg")))
    
    
   p<-  ggplot(data=df3, aes(x= reorder(label,value), y=value, fill=variable)) +
      geom_bar(stat="identity",position=position_dodge())+
      coord_flip()+
      theme(legend.position="bottom") +
      labs(x = alg, y = "metricas", title=paste("comparacion",alg,s,sep="-"))
   print(p)
}

library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
metrics <- c("Sensitivity","Specificity","Accuracy","Kappa","F1","Precision") #"FAR",

for(s in stations)
{
  for(a in algoritmos)
  {
    
    df3 <- df %>%
      select(label,dataset,var,config.train,config.vars,T,alg,Sensitivity,Specificity,Accuracy,Kappa,F1,Precision) %>%
      filter(dataset == DATASET & var == ESTACION  & alg==ALGORITMO) # & T == 1 ) %>%
    
    plot_comparativo(df3,s,a)
    
    for(m in metrics){
      plot_ranking_alg(df3,m,s,a)
      
    }
  }
}