ignore this first chunk, it fixes a problem that kept occruing when
tryign to knit the document
replica with half violin
library(readr)
CAM <- read_csv("Violin_Plot_Data.csv")
View(CAM)
#load all packages needed for this module
library("ggplot2")
library("readr")
library("tidyverse")
library("dplyr")
library("ggpubr")
library("see")
library("scales")
#view the data
head(CAM)
## # A tibble: 2 × 21
## F1Performance Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SVMWithGradCA… 0.670 0.702 0.681 0.711 0.649 0.716 0.714 0.685
## 2 SVMWithDeepSh… 0.674 0.610 0.631 0.618 0.663 0.609 0.624 0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## # Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## # Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## # Repeat20 <dbl>
#Calculate min and max
min_value <- min(data_long$values, na.rm = TRUE)
max_value <- max(data_long$values, na.rm = TRUE)
#give your newly formatted data a name you will recognize, in this case "data_long"
data_long <- CAM %>%
#Pivot the data from having many columns to many rows
pivot_longer(
cols = starts_with("Repeat"), # Select columns to pivot
names_to = "Repeat",
values_to = "values") #give the newly created column a name
#view the resulting data
head(CAM)
## # A tibble: 2 × 21
## F1Performance Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SVMWithGradCA… 0.670 0.702 0.681 0.711 0.649 0.716 0.714 0.685
## 2 SVMWithDeepSh… 0.674 0.610 0.631 0.618 0.663 0.609 0.624 0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## # Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## # Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## # Repeat20 <dbl>
#calculating min and max
min_value <- min(data_long$values, na.rm = TRUE)
max_value <- max(data_long$values, na.rm = TRUE)
#writing ggplot2 base data
ggplot(data_long, aes(x = F1Performance, y = values)) + geom_jitter(size = 6, width = 0.1, height = 0, alpha = NA, aes(color = F1Performance)) + geom_violinhalf() +
#add in the color and legend
aes(fill = F1Performance) +
#making the color transparent
aes(alpha = "0.20", size.f = 2) +
#adding quantile lines
geom_violinhalf(draw_quantiles = c(0.25, 0.5, 0.75)) +
#flip the axis
coord_flip() +
#changing the color to orange and purple
scale_fill_manual(values = c("purple4", "darkorange2")) +
scale_color_manual(values = c("purple4", "darkorange2")) +
#add summary statistic and highlight it
stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5, alpha = NA) +
#changing theme
theme_minimal() +
#changing y axis
theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line.y = element_blank()) +
#changing x axis
theme(axis.line.x = element_line(color = "black", size = 2)) +
#changing major and minor grid lines
# Remove major grid lines for y axis
theme(panel.grid.major.y = element_blank(),
# Remove minor grid lines for x axis
panel.grid.minor.x = element_blank(),
#changing the major x axis grid lines
panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5)) +
#adding title
theme(plot.title = element_text(hjust = 0.5, face="bold")) +
theme(legend.position = "none") +
#adding text labels
geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -3.5, color = "darkorange2", size = 4.5) +
geom_text(aes(x = "SMVWithDeepShapMaps", label = "SMV + Deep SHAP", y = 0.6), vjust = -14, color = "purple4", size = 4.5) +
scale_y_continuous(limits = c(min_value, max_value), breaks = seq(min_value, max_value, by = 0.02)) +
#adding a title and changing axis name
labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.", face = "bold") +
labs(y = "F1")

#that was really difficult
replica with full violins
library(readr)
CAM <- read_csv("Violin_Plot_Data.csv")
View(CAM)
#load all packages needed for this module
library("ggplot2")
library("readr")
library("tidyverse")
library("dplyr")
library("ggpubr")
library("see")
library("scales")
#view the data
head(CAM)
## # A tibble: 2 × 21
## F1Performance Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SVMWithGradCA… 0.670 0.702 0.681 0.711 0.649 0.716 0.714 0.685
## 2 SVMWithDeepSh… 0.674 0.610 0.631 0.618 0.663 0.609 0.624 0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## # Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## # Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## # Repeat20 <dbl>
#Calculate min and max
min_value <- min(data_long$values, na.rm = TRUE)
max_value <- max(data_long$values, na.rm = TRUE)
#give your newly formatted data a name you will recognize, in this case "data_long"
data_long <- CAM %>%
#Pivot the data from having many columns to many rows
pivot_longer(
cols = starts_with("Repeat"), # Select columns to pivot
names_to = "Repeat",
values_to = "values") #give the newly created column a name
#view the resulting data
head(CAM)
## # A tibble: 2 × 21
## F1Performance Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SVMWithGradCA… 0.670 0.702 0.681 0.711 0.649 0.716 0.714 0.685
## 2 SVMWithDeepSh… 0.674 0.610 0.631 0.618 0.663 0.609 0.624 0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## # Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## # Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## # Repeat20 <dbl>
#calculating min and max
min_value <- min(data_long$values, na.rm = TRUE)
max_value <- max(data_long$values, na.rm = TRUE)
#writing ggplot2 base data
ggplot(data_long, aes(x = F1Performance, y = values)) + geom_jitter(size = 6, width = 0.1, height = 0, alpha = NA, aes(color = F1Performance)) + geom_violin() +
#add in the color and legend
aes(fill = F1Performance) +
#making the color transparent
aes(alpha = "0.20", size.f = 2) +
#adding quantile lines
geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) +
#flip the axis
coord_flip() +
#changing the color to orange and purple
scale_fill_manual(values = c("purple4", "darkorange2")) +
scale_color_manual(values = c("purple4", "darkorange2")) +
#add summary statistic and highlight it
stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5, alpha = NA) +
#changing theme
theme_minimal() +
#changing y axis
theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line.y = element_blank()) +
#changing x axis
theme(axis.line.x = element_line(color = "black", size = 2)) +
#changing major and minor grid lines
# Remove major grid lines for y axis
theme(panel.grid.major.y = element_blank(),
# Remove minor grid lines for x axis
panel.grid.minor.x = element_blank(),
#changing the major x axis grid lines
panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5)) +
#adding title
theme(plot.title = element_text(hjust = 0.5, face="bold")) +
theme(legend.position = "none") +
#adding text labels
geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -3.5, color = "darkorange2", size = 4.5) +
geom_text(aes(x = "SMVWithDeepShapMaps", label = "SMV + Deep SHAP", y = 0.6), vjust = -14, color = "purple4", size = 4.5) +
scale_y_continuous(limits = c(min_value, max_value), breaks = seq(min_value, max_value, by = 0.02)) +
#adding a title and changing axis name
labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.", face = "bold") +
labs(y = "F1")

Module 3 Challenge violin plus box plot
library("rmarkdown")
library("knitr")
# libraries
library("ggplot2")
library("readr")
library("tidyverse")
library("dplyr")
library("ggpubr")
library("see")
library("scales")
library("hrbrthemes")
library(readr)
CAM <- read_csv("Violin_Plot_Data.csv")
View(CAM)
# calculate min and max values
min_value <- min(data_long$values, na.rm = TRUE)
max_value <- max(data_long$values, na.rm = TRUE)
# make the data long format
data_long <- CAM %>%
#Pivot the data from having many columns to many rows
pivot_longer(
cols = starts_with("Repeat"), # Select columns to pivot
names_to = "Repeat",
values_to = "values") #give the newly created column a name
head(data_long)
## # A tibble: 6 × 3
## F1Performance Repeat values
## <chr> <chr> <dbl>
## 1 SVMWithGradCAMMaps Repeat1 0.670
## 2 SVMWithGradCAMMaps Repeat2 0.702
## 3 SVMWithGradCAMMaps Repeat3 0.681
## 4 SVMWithGradCAMMaps Repeat4 0.711
## 5 SVMWithGradCAMMaps Repeat5 0.649
## 6 SVMWithGradCAMMaps Repeat6 0.716
# create the base ggplot
ggplot(data_long, aes(x = F1Performance, y = values)) +
# add data points
geom_jitter(size = 5, width = 0.1, height = 0, alpha = NA) +
# add violin and boxplot
geom_violin(aes(fill = F1Performance, alpha = 0.2)) +
geom_boxplot(width = 0.3, aes(color_fill_manual = "grey", alpha = 0.2)) +
# add a mean point
stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5, alpha = NA) +
# changing theme
theme_classic() +
theme(legend.position = "none") +
theme(axis.title.y = element_blank()) +
labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.")

# doing this a second time was a little easier