ignore this first chunk, it fixes a problem that kept occruing when tryign to knit the document

replica with half violin

library(readr)
CAM <- read_csv("Violin_Plot_Data.csv")
View(CAM)
#load all packages needed for this module
library("ggplot2") 
library("readr") 
library("tidyverse")
library("dplyr") 
library("ggpubr") 
library("see")
library("scales")

#view the data
head(CAM)
## # A tibble: 2 × 21
##   F1Performance  Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
##   <chr>            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 SVMWithGradCA…   0.670   0.702   0.681   0.711   0.649   0.716   0.714   0.685
## 2 SVMWithDeepSh…   0.674   0.610   0.631   0.618   0.663   0.609   0.624   0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## #   Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## #   Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## #   Repeat20 <dbl>
#Calculate min and max
min_value <- min(data_long$values, na.rm = TRUE) 
max_value <- max(data_long$values, na.rm = TRUE)

#give your newly formatted data a name you will recognize, in this case "data_long"
data_long <- CAM %>%
  #Pivot the data from having many columns to many rows
  pivot_longer(
    cols = starts_with("Repeat"),  # Select columns to pivot
    names_to = "Repeat", 
    values_to = "values") #give the newly created column a name
#view the resulting data
head(CAM) 
## # A tibble: 2 × 21
##   F1Performance  Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
##   <chr>            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 SVMWithGradCA…   0.670   0.702   0.681   0.711   0.649   0.716   0.714   0.685
## 2 SVMWithDeepSh…   0.674   0.610   0.631   0.618   0.663   0.609   0.624   0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## #   Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## #   Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## #   Repeat20 <dbl>
#calculating min and max 
 min_value <- min(data_long$values, na.rm = TRUE) 
max_value <- max(data_long$values, na.rm = TRUE) 
#writing ggplot2 base data
ggplot(data_long, aes(x = F1Performance, y = values)) + geom_jitter(size = 6, width = 0.1, height = 0, alpha = NA, aes(color = F1Performance)) + geom_violinhalf() + 

  #add in the color and legend 
  aes(fill = F1Performance) +
  #making the color transparent
  aes(alpha = "0.20", size.f = 2) +
  #adding quantile lines
  geom_violinhalf(draw_quantiles = c(0.25, 0.5, 0.75)) + 
  #flip the axis 
  coord_flip() + 
  
  #changing the color to orange and purple
  scale_fill_manual(values = c("purple4", "darkorange2")) + 
  scale_color_manual(values = c("purple4", "darkorange2")) +
  #add summary statistic and highlight it
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5, alpha = NA) + 
  #changing theme 
  theme_minimal() +
  #changing y axis
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line.y = element_blank()) +
  #changing x axis
  theme(axis.line.x = element_line(color = "black", size = 2)) + 
  #changing major and minor grid lines 
    # Remove major grid lines for y axis
  theme(panel.grid.major.y = element_blank(),
      # Remove minor grid lines for x axis
  panel.grid.minor.x = element_blank(),     
    #changing the major x axis grid lines
  panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5)) +
  #adding title
  theme(plot.title = element_text(hjust = 0.5, face="bold")) + 
    theme(legend.position = "none") +
  
  #adding text labels 
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -3.5, color = "darkorange2", size = 4.5) + 
  geom_text(aes(x = "SMVWithDeepShapMaps", label = "SMV + Deep SHAP", y = 0.6), vjust = -14, color = "purple4", size = 4.5) +
  scale_y_continuous(limits = c(min_value, max_value), breaks = seq(min_value, max_value, by = 0.02)) +
  #adding a title and changing axis name 
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.", face = "bold") +
  labs(y = "F1")

#that was really difficult

replica with full violins

library(readr)
CAM <- read_csv("Violin_Plot_Data.csv")
View(CAM)

#load all packages needed for this module
library("ggplot2") 
library("readr") 
library("tidyverse")
library("dplyr") 
library("ggpubr") 
library("see")
library("scales")

#view the data
head(CAM)
## # A tibble: 2 × 21
##   F1Performance  Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
##   <chr>            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 SVMWithGradCA…   0.670   0.702   0.681   0.711   0.649   0.716   0.714   0.685
## 2 SVMWithDeepSh…   0.674   0.610   0.631   0.618   0.663   0.609   0.624   0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## #   Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## #   Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## #   Repeat20 <dbl>
#Calculate min and max
min_value <- min(data_long$values, na.rm = TRUE) 
max_value <- max(data_long$values, na.rm = TRUE)

#give your newly formatted data a name you will recognize, in this case "data_long"
data_long <- CAM %>%
  #Pivot the data from having many columns to many rows
  pivot_longer(
    cols = starts_with("Repeat"),  # Select columns to pivot
    names_to = "Repeat", 
    values_to = "values") #give the newly created column a name
#view the resulting data
head(CAM) 
## # A tibble: 2 × 21
##   F1Performance  Repeat1 Repeat2 Repeat3 Repeat4 Repeat5 Repeat6 Repeat7 Repeat8
##   <chr>            <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
## 1 SVMWithGradCA…   0.670   0.702   0.681   0.711   0.649   0.716   0.714   0.685
## 2 SVMWithDeepSh…   0.674   0.610   0.631   0.618   0.663   0.609   0.624   0.643
## # ℹ 12 more variables: Repeat9 <dbl>, Repeat10 <dbl>, Repeat11 <dbl>,
## #   Repeat12 <dbl>, Repeat13 <dbl>, Repeat14 <dbl>, Repeat15 <dbl>,
## #   Repeat16 <dbl>, Repeat17 <dbl>, Repeat18 <dbl>, Repeat19 <dbl>,
## #   Repeat20 <dbl>
#calculating min and max 
 min_value <- min(data_long$values, na.rm = TRUE) 
max_value <- max(data_long$values, na.rm = TRUE) 
#writing ggplot2 base data
ggplot(data_long, aes(x = F1Performance, y = values)) + geom_jitter(size = 6, width = 0.1, height = 0, alpha = NA, aes(color = F1Performance)) + geom_violin() + 

  #add in the color and legend 
  aes(fill = F1Performance) +
  #making the color transparent
  aes(alpha = "0.20", size.f = 2) +
  #adding quantile lines
  geom_violin(draw_quantiles = c(0.25, 0.5, 0.75)) + 
  #flip the axis 
  coord_flip() + 
  
  #changing the color to orange and purple
  scale_fill_manual(values = c("purple4", "darkorange2")) + 
  scale_color_manual(values = c("purple4", "darkorange2")) +
  #add summary statistic and highlight it
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5, alpha = NA) + 
  #changing theme 
  theme_minimal() +
  #changing y axis
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), axis.line.y = element_blank()) +
  #changing x axis
  theme(axis.line.x = element_line(color = "black", size = 2)) + 
  #changing major and minor grid lines 
    # Remove major grid lines for y axis
  theme(panel.grid.major.y = element_blank(),
      # Remove minor grid lines for x axis
  panel.grid.minor.x = element_blank(),     
    #changing the major x axis grid lines
  panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5)) +
  #adding title
  theme(plot.title = element_text(hjust = 0.5, face="bold")) + 
    theme(legend.position = "none") +
  
  #adding text labels 
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -3.5, color = "darkorange2", size = 4.5) + 
  geom_text(aes(x = "SMVWithDeepShapMaps", label = "SMV + Deep SHAP", y = 0.6), vjust = -14, color = "purple4", size = 4.5) +
  scale_y_continuous(limits = c(min_value, max_value), breaks = seq(min_value, max_value, by = 0.02)) +
  #adding a title and changing axis name 
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.", face = "bold") +
  labs(y = "F1")

Module 3 Challenge violin plus box plot

library("rmarkdown")
library("knitr")

# libraries 
library("ggplot2") 
library("readr") 
library("tidyverse")
library("dplyr") 
library("ggpubr") 
library("see")
library("scales") 
library("hrbrthemes") 
library(readr)

CAM <- read_csv("Violin_Plot_Data.csv")
View(CAM)

# calculate min and max values
min_value <- min(data_long$values, na.rm = TRUE) 
max_value <- max(data_long$values, na.rm = TRUE)

# make the data long format 
data_long <- CAM %>%
  #Pivot the data from having many columns to many rows
  pivot_longer(
    cols = starts_with("Repeat"),  # Select columns to pivot
    names_to = "Repeat", 
    values_to = "values") #give the newly created column a name
head(data_long)
## # A tibble: 6 × 3
##   F1Performance      Repeat  values
##   <chr>              <chr>    <dbl>
## 1 SVMWithGradCAMMaps Repeat1  0.670
## 2 SVMWithGradCAMMaps Repeat2  0.702
## 3 SVMWithGradCAMMaps Repeat3  0.681
## 4 SVMWithGradCAMMaps Repeat4  0.711
## 5 SVMWithGradCAMMaps Repeat5  0.649
## 6 SVMWithGradCAMMaps Repeat6  0.716
# create the base ggplot
ggplot(data_long, aes(x = F1Performance, y = values)) + 
  # add data points 
  geom_jitter(size = 5, width = 0.1, height = 0, alpha = NA) +
  # add violin and boxplot 
  geom_violin(aes(fill = F1Performance, alpha = 0.2)) +
  geom_boxplot(width = 0.3, aes(color_fill_manual = "grey", alpha = 0.2)) + 
  # add a mean point 
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5, alpha = NA) + 
  # changing theme 
  theme_classic() + 
  theme(legend.position = "none") + 
  theme(axis.title.y = element_blank()) + 
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.") 

# doing this a second time was a little easier