library(ggplot2)
Warning message:
In readChar(file, size, TRUE) : truncating string with embedded nuls
library(dplyr)
library(tidyr)
library(stringr)

Se normalizo teniendo en cuenta el tiempo.

Parte Uno

Tt <- PRF %>% select(-standard_deviation) %>% mutate(PC = PC$prom) %>% mutate(DRF = DRF$prom) %>% mutate(DC = DC$prom) %>% mutate(URF = URF$prom) %>% mutate(UC = UC$prom) 
colnames(Tt)[2] <- "PRF"

Tsd <- PRF %>% select(-prom) %>% mutate(PC = PC$standard_deviation) %>% mutate(DRF = DRF$standard_deviation) %>%
  mutate(DC = DC$standard_deviation) %>% mutate(URF = URF$standard_deviation) %>% mutate(UC = UC$standard_deviation)
colnames(Tsd)[2] <- "PRF"

Tsd <- Tsd %>% pivot_longer(cols = -X1)
Tt <- Tt %>% pivot_longer(cols = -X1) %>% mutate(sd = Tsd$value)

Tengo mejores resultados luego de aplicar DownSampling. Tambien tengo mejores resultados con los feature vectors sin normalizar, aunque esta mejora es extremadamente minima.

Tt %>% filter(X1 %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  geom_errorbar(aes(x=name,ymin=value-sd, ymax=value+sd), width=.2,position=position_dodge(.9))+
  facet_wrap(~X1)+
  theme_bw()

#Parte Dos

UCV = read.csv(file="UpV_Time")
DCV = read.csv(file="DownV_Time")
PCV = read.csv(file="PuroV_Time")
URFV = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/UpV_Time")
DRFV = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/DownV_Time")
PRFV = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/PuroV_Time")
PCv <- PCV %>% mutate(test = "PC") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

DCv <- DCV %>% mutate(test = "DC") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

UCv <- UCV %>% mutate(test = "UC") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

PRFv <- PRFV %>% mutate(test = "PRF") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

DRFv <- DRFV %>% mutate(test = "DRF") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

URFv <- URFV %>% mutate(test = "URF") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)
PCvSD <- PCV %>% mutate(test = "PC") %>% select(-prom,-prom.1,-prom.2,-prom.3,-prom.4,-prom.5,-prom.6,-prom.7,-prom.8) %>% 
  filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>%select(-X) %>% pivot_longer(cols = -test)

DCvSD <- DCV %>% mutate(test = "DC") %>% select(-prom,-prom.1,-prom.2,-prom.3,-prom.4,-prom.5,-prom.6,-prom.7,-prom.8) %>%
  filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test)

UCvSD <- UCV %>% mutate(test = "UC") %>% select(-prom,-prom.1,-prom.2,-prom.3,-prom.4,-prom.5,-prom.6,-prom.7,-prom.8) %>%
  filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test)

PRFvSD <- PRFV %>% mutate(test = "PRF") %>% select(-prom,-prom.1,-prom.2,-prom.3,-prom.4,-prom.5,-prom.6,-prom.7,-prom.8) %>%
  filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test)

DRFvSD <- DRFV %>% mutate(test = "DRF") %>% select(-prom,-prom.1,-prom.2,-prom.3,-prom.4,-prom.5,-prom.6,-prom.7,-prom.8) %>%
  filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test)

URFvSD <- URFV %>% mutate(test = "URF") %>% select(-prom,-prom.1,-prom.2,-prom.3,-prom.4,-prom.5,-prom.6,-prom.7,-prom.8) %>%
  filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test)
PCv <- PCv %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name) %>% mutate(sd = PCvSD$value)
PRFv <- PRFv %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name) %>% mutate(sd = PRFvSD$value)
DCv <- DCv %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name) %>% mutate(sd = DCvSD$value)
DRFv <- DRFv %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name) %>% mutate(sd = DRFvSD$value)
UCv <- UCv %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name) %>% mutate(sd = UCvSD$value)
URFv <- URFv %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name) %>% mutate(sd = URFvSD$value)
name <- c("Sensitivity.0","Sensitivity.1","Sensitivity.2","Sensitivity.3","Sensitivity.4","Sensitivity.5","Sensitivity.6","Sensitivity.7","Sensitivity.8","Specificity.0","Specificity.1","Specificity.2","Specificity.3","Specificity.4","Specificity.5","Specificity.6","Specificity.7","Specificity.8","F1.0","F1.1","F1.2","F1.3","F1.4","F1.5","F1.6","F1.7","F1.8","BAccuracy.0","BAccuracy.1","BAccuracy.2","BAccuracy.3","BAccuracy.4","BAccuracy.5","BAccuracy.6","BAccuracy.7","BAccuracy.8")
PCv <- PCv %>% cbind(name) 
PRFv <- PRFv %>% cbind(name) 
DCv <- DCv %>% cbind(name) 
DRFv <- DRFv %>% cbind(name) 
UCv <- UCv %>% cbind(name) 
URFv <- URFv %>% cbind(name) 

Ahora falta meter todo en un mismo dataset para poder graficar.

Tsec <- PCv %>% rbind(PRFv) %>% rbind(DCv) %>% rbind(DRFv) %>% rbind(UCv) %>% rbind(URFv) 
Tsec
Tsec %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name, ncol = 9)+
  geom_errorbar(aes(x=test,ymin=value-sd, ymax=value+sd), width=.2,position=position_dodge(.9))+
  theme_bw()

Analizamos Puro utilizando Catboost

Balanced accuracy

PCv %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  geom_errorbar(aes(x=name,ymin=value-sd, ymax=value+sd), width=.2,position=position_dodge(.9))+
  theme_bw()

Comparamos

Balanced accuracy

PRFv %>% rbind(PCv) %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  geom_errorbar(aes(x=test,ymin=value-sd, ymax=value+sd), width=.2,position=position_dodge(.9))+
  facet_wrap(~name)+
  theme_bw()

