library(readr)
library(ggplot2)
library(gridExtra)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ purrr 0.2.5 ✔ forcats 0.3.0
## ── Conflicts ──────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lazyeval)
##
## Attaching package: 'lazyeval'
## The following objects are masked from 'package:purrr':
##
## is_atomic, is_formula
dataset <- read_csv("classification-results.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## dataset = col_character(),
## var = col_character(),
## config.train = col_character(),
## config.vars = col_character(),
## T = col_integer(),
## alg = col_character()
## )
## See spec(...) for full column specifications.
dataset[,8:ncol(dataset)] <- round(dataset[,8:ncol(dataset)],digits = 3)
ESTACION: valores posibles en la columna vars. Son las estaciones meteorológicas
print(unique(dataset$var))
## [1] "tunuyan.temp_min" "agua_amarga.temp_min" "junin.temp_min"
## [4] "la_llave.temp_min" "las_paredes.temp_min"
DATASET: valores posibles “dacc”, faltan casos de “dacc-temp”,“dacc-spring”
Caso columna o dato que evalue si colabora o no el enfoque agregar vecinos.
ESTACION <- "junin.temp_min"
DATASET <- "dacc"
ALGORITMO <- "glm"
stations <- unique(dataset$var)
algoritmos <- unique(dataset$alg)
metrics <- c("FAR","Sensitivity","Specificity","Accuracy","Kappa","F1","Precision")
Creación de dataset genérico para resultados clasificación
df <- dataset %>%
unite(dataset,
col=label,c("dataset","var","config.train","config.vars","T","alg"),
sep = "-",remove=FALSE) %>%
select(label,dataset,var,config.train,config.vars,T,alg,FAR,Sensitivity,Specificity,Accuracy,Kappa,F1,Precision)
Vistazo del dataset del que hacemos la resta:
df %>%
select(dataset,var,config.train,config.vars,T,alg,FAR) %>%
filter(dataset == DATASET & var == ESTACION ) %>% # & alg=="glm" & T == 1 ) %>%
group_by(dataset,var,config.train,T,alg) %>%
arrange(dataset,var,config.train,T,alg,desc(config.vars)) %>%
mutate(local_vs_all = lag(FAR) - FAR)
## # A tibble: 54 x 8
## # Groups: dataset, var, config.train, T, alg [28]
## dataset var config.train config.vars T alg FAR local_vs_all
## <chr> <chr> <chr> <chr> <int> <chr> <dbl> <dbl>
## 1 dacc junin.… normal local 1 C5.0 0.2 NA
## 2 dacc junin.… normal all 1 C5.0 0.28 -0.08
## 3 dacc junin.… normal local 1 glm 0.15 NA
## 4 dacc junin.… normal all 1 glm 0.16 -0.01
## 5 dacc junin.… normal local 1 rf 0.15 NA
## 6 dacc junin.… normal all 1 rf 0.17 -0.02
## 7 dacc junin.… normal local 1 rpart 0.18 NA
## 8 dacc junin.… normal all 1 rpart 0.26 -0.08
## 9 dacc junin.… normal local 2 C5.0 0.21 NA
## 10 dacc junin.… normal all 2 C5.0 0.25 -0.04
## # ... with 44 more rows
para crear columna local vs all
mutate_call_ <- function(df, col1, col2, new_col_name) {
mutate_call = lazyeval::interp(~ round(lag(a) - b,2), a = as.name(col1), b = as.name(col2))
df %>% mutate_(.dots = setNames(list(mutate_call), new_col_name))
}
df data.frame
NO IMPLEMENTADO filtro: filtro a pasar a ggplot en filter
m metrica, valores posibles o referencia array metrics o mirar dataset.
s: character, nombre de la estacion o variable predecida
plot_local_vs_all <- function(df,m,s)
{
df1 <- df %>%
select(dataset,var,config.train,config.vars,T,alg,m) %>%
group_by(dataset,var,config.train,T,alg) %>%
arrange(dataset,var,config.train,T,alg,desc(config.vars)) %>%
mutate_call_(m, m, "local_vs_all") %>%
filter(!is.na(local_vs_all)) %>%
unite(col=label,c("dataset","var","config.train","T","alg"),
sep = "-",remove=FALSE)
p <- ggplot(data=df1, aes(x= reorder(label,-local_vs_all), y=local_vs_all)) +
geom_bar(stat="identity",fill="green")+
geom_text(aes(label=local_vs_all), vjust=1.3, color="black", size=3) +
coord_flip()+
theme_minimal() +
labs(x = "Models",title=paste(s,m,sep="--"))
print(p)
return(p)
}
corro para cada estación para cada una de las métricas
(info de las otras estaciones)
lista <- NULL
for(s in stations)
{
df1 <- df %>%
#select(dataset,var,config.train,config.vars,T,alg,m) %>%
filter(dataset == DATASET & var == s ) # & alg=="glm" & T == 1 ) %>%
for(m in metrics){
lista[[paste(s,m,sep="--")]] <- plot_local_vs_all(df1,m,s)
}
}