library(readr)
library(ggplot2)
library(gridExtra)
library(tidyverse)
## ── Attaching packages ─────────────────────────────── tidyverse 1.2.1 ──
## ✔ tibble 1.4.2 ✔ dplyr 0.7.6
## ✔ tidyr 0.8.1 ✔ stringr 1.3.1
## ✔ purrr 0.2.5 ✔ forcats 0.3.0
## ── Conflicts ────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lazyeval)
##
## Attaching package: 'lazyeval'
## The following objects are masked from 'package:purrr':
##
## is_atomic, is_formula
dataset <- read_csv("classification-results.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## dataset = col_character(),
## var = col_character(),
## config.train = col_character(),
## config.vars = col_character(),
## T = col_integer(),
## alg = col_character()
## )
## See spec(...) for full column specifications.
dataset[,8:ncol(dataset)] <- round(dataset[,8:ncol(dataset)],digits = 3)
ESTACION: valores posibles en la columna vars. Son las estaciones meteorológicas
print(unique(dataset$var))
## [1] "tunuyan.temp_min" "agua_amarga.temp_min" "junin.temp_min"
## [4] "la_llave.temp_min" "las_paredes.temp_min"
DATASET: valores posibles “dacc”, faltan casos de “dacc-temp”,“dacc-spring”
Caso columna o dato que evalue si colabora o no el enfoque agregar vecinos.
stations <- unique(dataset$var)
algoritmos <- unique(dataset$alg)
metrics <- c("FAR","Sensitivity","Specificity","Accuracy","Kappa","F1","Precision")
Creación de dataset genérico para resultados clasificación
df <- dataset %>%
unite(dataset,
col=label,c("dataset","var","config.train","config.vars","T","alg"),
sep = "-",remove=FALSE) %>%
select(label,dataset,var,config.train,config.vars,T,alg,FAR,Sensitivity,Specificity,Accuracy,Kappa,F1,Precision)
p <-ggplot(aes(y = Sensitivity, x = var, fill = config.vars), data = df) +
geom_boxplot() + coord_flip()
print(p)
p <-ggplot(aes(y = F1, x = var, fill = config.vars), data = df) + geom_boxplot() + coord_flip()
print(p)
p <-ggplot(aes(y = Precision , x = var, fill = config.vars), data = df) + geom_boxplot() + coord_flip()
print(p)
p <-ggplot(aes(y = Precision , x = var, fill = as.factor(T)), data = df) + geom_boxplot() + coord_flip()
print(p)
p <-ggplot(aes(y = Sensitivity , x = var, fill = as.factor(T)), data = df) + geom_boxplot() + coord_flip()
print(p)
p <-ggplot(aes(y = F1 , x = var, fill = as.factor(T)), data = df) +
geom_boxplot() + coord_flip() + labs(fill="T")
print(p)
for(s in stations){
df1 <- df %>% filter( var == s )
p <-ggplot(aes(y = F1 , x = alg, fill = config.vars), data = df1) +
geom_boxplot() + coord_flip() + labs(title=paste("Estación",s,sep=" "))
print(p)
p <-ggplot(aes(y = Sensitivity , x = alg, fill = config.vars), data = df1) +
geom_boxplot() + coord_flip() + labs(title=paste("Estación",s,sep=" "))
print(p)
p <-ggplot(aes(y = Precision , x = alg, fill = config.vars), data = df1) +
geom_boxplot() + coord_flip() + labs(title=paste("Estación",s,sep=" "))
print(p)
}
library(reshape)
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
## The following objects are masked from 'package:tidyr':
##
## expand, smiths
df3 <- df %>% filter( dataset == "dacc") %>% select(-one_of(c("FAR")))
# melt(df3,id.vars="label")
df4 <- melt(as.data.frame(df3),
id.vars =(c("label","dataset","var","config.train","config.vars","T","alg")),
measure.vars = metrics[-1])
p <-ggplot(aes(y = value , x = var, fill = variable), data = df4) +
geom_boxplot() + coord_flip() + labs(title="Variabilidad de las métricas por las estaciones")
print(p)
df4 <- df %>% filter( dataset == "dacc")
df4 <- melt(as.data.frame(df4),
id.vars =(c("label","dataset","var","config.train","config.vars","T","alg")),
measure.vars = metrics)
for(a in algoritmos)
{
for(m in metrics)
{
df5 <- df4 %>% filter( alg == a & variable == m)
p <-ggplot(aes(y = value , x = var, fill = config.vars), data = df5) +
geom_boxplot() + coord_flip() + labs(title=paste("Comportamiento de ",m," en modelo ",a,sep=""))
print(p)
}
# for(s in stations){
#
# df5 <- df4 %>% filter( var == s & alg == a)
# p <-ggplot(aes(y = value , x = variable, fill = config.vars), data = df5) +
# geom_boxplot() + coord_flip() + labs(title=paste("Estacion ",s," y algoritmo ",a,sep=""))
# print(p)
# }
}