library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)

Dadas las diferencias en los resultados que se generan a partir de como se toman en cuenta los simbolos de Tiempo se presentan dos resultados: "_Size" se normaliza sin tomar en cuenta los simbolos de tiempo pero se coloca una columna nueva “size” que toma en cuenta los simbolos de tiempo. Por otro lado, "_Time" normaliza tomando en cuenta los simbolos de tiempo.

Parte Dos

PS: Puro Size PT: Puro Time DS: Down Size DT: Down Time US: Up Size UT: Up Time

UpV_N = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/UpV_Size")
DownV_N = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/DownV_Size")
PuroV_N = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/PuroV_Size")
UpV_SN = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/UpV_Time")
DownV_SN = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/DownV_Time")
PuroV_SN = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/PuroV_Time")
PuroN <- PuroV_N %>% mutate(test = "PuroN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

PuroSN <- PuroV_SN %>% mutate(test = "PuroSN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

DownN <- DownV_N %>% mutate(test = "DownN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

DownSN <- DownV_SN %>% mutate(test = "DownSN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

UpN <- UpV_N %>% mutate(test = "UpN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

UpSN <- UpV_SN %>% mutate(test = "UpSN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)
PuroN <- PuroN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
PuroSN <- PuroSN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
DownN <- DownN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
DownSN <- DownSN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
UpN <- UpN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
UpSN <- UpSN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
name <- c("Sensitivity.0","Sensitivity.1","Sensitivity.2","Sensitivity.3","Sensitivity.4","Sensitivity.5","Sensitivity.6","Sensitivity.7","Sensitivity.8","Specificity.0","Specificity.1","Specificity.2","Specificity.3","Specificity.4","Specificity.5","Specificity.6","Specificity.7","Specificity.8","F1.0","F1.1","F1.2","F1.3","F1.4","F1.5","F1.6","F1.7","F1.8","BAccuracy.0","BAccuracy.1","BAccuracy.2","BAccuracy.3","BAccuracy.4","BAccuracy.5","BAccuracy.6","BAccuracy.7","BAccuracy.8")
PS <- PuroN %>% cbind(name) %>% select(-test) %>% mutate(test = "PS")
PT <- PuroSN %>% cbind(name) %>% select(-test) %>% mutate(test = "PT")
DS <- DownN %>% cbind(name) %>% select(-test) %>% mutate(test = "DS")
DT <- DownSN %>% cbind(name) %>% select(-test) %>% mutate(test = "DT")
US <- UpN %>% cbind(name) %>% select(-test) %>% mutate(test = "US")
UT <- UpSN %>% cbind(name) %>% select(-test) %>% mutate(test = "UT")

Ahora falta meter todo en un mismo dataset para poder graficar.

Tsec <- PS %>% rbind(PT) %>% rbind(DS) %>% rbind(DT) %>% rbind(US) %>% rbind(UT)
Tsec

Si vemos PS y PT parece ser que PS tiene mejores resultados con cadenas mas cortas en BAccuracy y Specificity. No sucede lo mismo al usar DownSam o UpSam, donde DT y UT siempre tienen mejores resultados que DS y US respectivamente. A medida que el tamaño de las cadenas aumenta se ven resultados similares, sobre todo cuando no se usa ni UpSam ni DownSam.

Tsec %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name, ncol = 9)+
  theme_bw()

Analizando solo Puro Normalizado con tiempo

Balanced accuracy

PT %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()

Spec

PT %>% filter(grepl("Spe",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()

Analizando solo Puro Normalizado sin tiempo con columna Size

Balanced accuracy

PS %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()

Spec

PS %>% filter(grepl("Spe",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()

Comparamos

Balanced accuracy

PS %>% rbind(PT) %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name)+
  theme_bw()

Spec

PS %>% rbind(PT) %>% filter(grepl("Spe",name)) %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name)+
  theme_bw()

---
title: "Resultados CTU19_RandomForest"
output: html_notebook
---

```{r}
library(ggplot2)
library(dplyr)
library(tidyr)
library(stringr)
```

Dadas las diferencias en los resultados que se generan a partir de como se toman en cuenta los simbolos de Tiempo se presentan dos resultados: "_Size" se normaliza sin tomar en cuenta los simbolos de tiempo pero se coloca una columna nueva "size" que toma en cuenta los simbolos de tiempo. Por otro lado, "_Time" normaliza tomando en cuenta los simbolos de tiempo. 


# Parte Dos

PS: Puro Size
PT: Puro Time
DS: Down Size
DT: Down Time
US: Up Size
UT: Up Time

```{r}
UpV_N = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/UpV_Size")
DownV_N = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/DownV_Size")
PuroV_N = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/PuroV_Size")
UpV_SN = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/UpV_Time")
DownV_SN = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/DownV_Time")
PuroV_SN = read.csv(file="C:/Users/DuzzLogic/Google Drive/Cosas/LABSIN/Botnets/CTU19_RandomForest/Resultados2/PuroV_Time")
```

```{r} 
PuroN <- PuroV_N %>% mutate(test = "PuroN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

PuroSN <- PuroV_SN %>% mutate(test = "PuroSN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

DownN <- DownV_N %>% mutate(test = "DownN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

DownSN <- DownV_SN %>% mutate(test = "DownSN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

UpN <- UpV_N %>% mutate(test = "UpN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)

UpSN <- UpV_SN %>% mutate(test = "UpSN") %>% 
  select(-standard_deviation,-standard_deviation.1,-standard_deviation.2,-standard_deviation.3,
         -standard_deviation.4,-standard_deviation.5,-standard_deviation.6,-standard_deviation.7,-standard_deviation.8)
```

```{r}
PuroN <- PuroN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
PuroSN <- PuroSN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
DownN <- DownN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
DownSN <- DownSN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
UpN <- UpN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
UpSN <- UpSN %>% filter(X %in% c("Sensitivity","Specificity","F1","Balanced Accuracy")) %>% select(-X) %>% pivot_longer(cols = -test) %>% select(-name)
```

```{r}
name <- c("Sensitivity.0","Sensitivity.1","Sensitivity.2","Sensitivity.3","Sensitivity.4","Sensitivity.5","Sensitivity.6","Sensitivity.7","Sensitivity.8","Specificity.0","Specificity.1","Specificity.2","Specificity.3","Specificity.4","Specificity.5","Specificity.6","Specificity.7","Specificity.8","F1.0","F1.1","F1.2","F1.3","F1.4","F1.5","F1.6","F1.7","F1.8","BAccuracy.0","BAccuracy.1","BAccuracy.2","BAccuracy.3","BAccuracy.4","BAccuracy.5","BAccuracy.6","BAccuracy.7","BAccuracy.8")
PS <- PuroN %>% cbind(name) %>% select(-test) %>% mutate(test = "PS")
PT <- PuroSN %>% cbind(name) %>% select(-test) %>% mutate(test = "PT")
DS <- DownN %>% cbind(name) %>% select(-test) %>% mutate(test = "DS")
DT <- DownSN %>% cbind(name) %>% select(-test) %>% mutate(test = "DT")
US <- UpN %>% cbind(name) %>% select(-test) %>% mutate(test = "US")
UT <- UpSN %>% cbind(name) %>% select(-test) %>% mutate(test = "UT")
```

Ahora falta meter todo en un mismo dataset para poder graficar. 

```{r}
Tsec <- PS %>% rbind(PT) %>% rbind(DS) %>% rbind(DT) %>% rbind(US) %>% rbind(UT)
Tsec
```

Si vemos PS y PT parece ser que PS tiene mejores resultados con cadenas mas cortas en BAccuracy y Specificity. No sucede lo mismo al usar DownSam o UpSam, donde DT y UT siempre tienen mejores resultados que DS y US respectivamente. A medida que el tamaño de las cadenas aumenta se ven resultados similares, sobre todo cuando no se usa ni UpSam ni DownSam.

```{r fig.width=20}
Tsec %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name, ncol = 9)+
  theme_bw()
```

## Analizando solo Puro Normalizado con tiempo

### Balanced accuracy

```{r fig.width=10}
PT %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()
```

### Spec

```{r fig.width=10}
PT %>% filter(grepl("Spe",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()
```

## Analizando solo Puro Normalizado sin tiempo con columna Size

### Balanced accuracy

```{r fig.width=10}
PS %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()
```

### Spec

```{r fig.width=10}
PS %>% filter(grepl("Spe",name)) %>%
  ggplot()+
  geom_col(aes(x=name,y=value), fill="skyblue")+
  theme_bw()
```

## Comparamos


### Balanced accuracy

```{r fig.width=15}
PS %>% rbind(PT) %>% filter(grepl("BA",name)) %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name)+
  theme_bw()
```

### Spec

```{r fig.width=15}
PS %>% rbind(PT) %>% filter(grepl("Spe",name)) %>%
  ggplot()+
  geom_col(aes(x=test,y=value), fill="skyblue")+
  facet_wrap(~name)+
  theme_bw()
```