Installing and Checking for Packages

packages <- c("ggplot2", "readr", "tidyverse", "dplyr", "ggpubr", "see", "ggExtra", "rmarkdown") # What packages you want

check_install_packages <- function(pkg){
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)}} # Checking and Installing packages

sapply(packages, check_install_packages) # Doing the previous function to all listed packages
## $ggplot2
## NULL
## 
## $readr
## NULL
## 
## $tidyverse
## NULL
## 
## $dplyr
## NULL
## 
## $ggpubr
## NULL
## 
## $see
## NULL
## 
## $ggExtra
## NULL
## 
## $rmarkdown
## NULL

How to View Dataset and Information

data("USArrests") # Reads in data into your environment
head(USArrests, n = 3) # Views three rows of the data set
##         Murder Assault UrbanPop Rape
## Alabama   13.2     236       58 21.2
## Alaska    10.0     263       48 44.5
## Arizona    8.1     294       80 31.0
?USArrests # Provides background information on the data set

This data set contains statistics for assault, murder, and rape in each of the 50 states. Each variable is the amount of arrests per 100,00 residents. There is also a variable of the percentage of the population living in urban areas. Each value is numberical.

Scatter Plots & Customizations

library(ggplot2) # Loads packages into R session

ggplot(mtcars, aes(x = mpg, y=hp, color = cyl)) + # Makes the plot with the "mtcars" data set and assigns axes and categorical variable 
  geom_point(size = 2.4, shape = 8) + # Makes the plot a scatter plot with customized points
  theme_minimal()+ # Makes the plot a minimal theme
  theme(legend.position = "bottom") + # Moves legend to the bottom of the plot
  labs(
    title = "Effect of Horsepower on Fuel Efficiency",
    subtitle = "Catagorized by Number of Cylinders",
    x = "Horsepower", y = "Fuel Efficiency (MPG)") # Creates title, subtitle, and axis titles

data("iris") # Reads in data set

ggplot(iris, aes(x = Sepal.Length, y = Petal.Length, color = Species)) + # Makes the plot with the data set and assigns axes and categorical variable
    geom_point(size = 3, shape = 17) + # Makes the plot a scatter plot with customized points
    theme_classic()+ # Makes the plot a classic theme
    theme(legend.position = "right") + # Moves legend to the right of the plot
    labs(
      title = "Effect of Sepal Length on Petal Length",
      subtitle = "Catagorized by species of iris",
      x = "Sepal Length (cm)", y = "Petal Length (cm)") # Creates title, subtitle, and axis titles

library(ggplot2) # Loads packages into R session

data("USArrests") # Reads in data set

ggplot(USArrests, aes(x = Murder, y=Assault)) + # Makes the plot with the data set and assigns axes
    geom_point(size = 3, shape = 1) + # Makes the plot a scatter plot with customized points
    geom_smooth(method = lm, se = TRUE) + # Adds regression line, linear, with standard error
    theme_minimal() + # Makes the plot a minimal theme
    labs(
      title = "Scatter Plot of Assault vs. Murder Rates",
      x = "Murder Rate", y = "Assault Rate") # Creates title and axis titles

Line Plots

USArrests$State <- rownames(USArrests) # Adds the state names as a new column in the dataset
USArrests$AverageCrimeRate <- rowMeans(USArrests[, c("Murder", "Assault", "Rape")]) # Calculates the average of the "Murder", "Assault", and "Rape" columns for each state and stores it in a new column AverageCrimeRate

ggplot(data = USArrests, aes(x = State, y = AverageCrimeRate, group = 1)) + # group argument is used to indicate that all the data points should be treated as part of a single group
    geom_line(color = "darkgreen") + # Adds a line for the data points
    geom_point(color = "red") + # Adds individual data points
    theme_minimal() +
    theme(axis.text.x = element_text(angle = 90, hjust = 1)) + #  Rotates the x-axis labels by 90 degrees and aligns the text right
    labs(title = "Line Plot of Average Crime Rate by State",
         subtitle = "In Christmas Colors!",
         y = "Average Crime Rate")

Violin Plots & Boxplots

library(ggplot2) # Loads packages into R session

CAM <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/Violin_Plot_Data.csv")

data_long <- CAM %>% #turns the data into a long format
  pivot_longer(
    cols = starts_with("Repeat"),
    names_to = "Repeat", 
    values_to = "values")

ggplot(data = data_long, aes(x = F1Performance, y = values, fill = F1Performance)) +
  geom_jitter(aes(color = F1Performance), position = position_jitter(0.1), size = 5, alpha = 0.8) + 
  # colors points to the category, jitters points, sizes them, and makes them transparent
  scale_color_manual(values = c("SVMWithDeepShapMaps" = "darkorchid4",
                                "SVMWithGradCAMMaps" = "chocolate2")) +
  # manually colors points
  geom_violin(trim = TRUE, alpha = 0.4, size = 2, draw_quantiles = c(0.25, 0.5, 0.75)) +
  # adds violin plot with a specific opacity, specific sized outlines, and adds percentage lines
  scale_fill_manual(values = c("SVMWithDeepShapMaps" = "darkorchid4",
                                "SVMWithGradCAMMaps" = "chocolate2")) +
  # makes the violin fill colors specific
  coord_flip() +
  # flips the plot
  theme_minimal() +
  # adds the minimal theme
  theme(axis.title.y = element_blank(), #removes y-axis title
        axis.ticks.y = element_blank(), # removes y-axis ticks
        axis.text.y = element_blank(), # removes y-axis text
        axis.line.x = element_line(color = "black", size = 1.5), # customizes x-axis line
        plot.title = element_text(hjust = 0.5, face="bold"), # centers the plot title and makes it bold
        panel.grid.major.y = element_blank(), # removes the major grid lines in the y-direction
        panel.grid.minor.x = element_blank(), # removes the minor grid lines in the x-direction
        panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5), # customizes major x grid
        legend.position = "none") + # removes legend
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5) +
  # calculates medium, adds point, makes it circular, makes it size 3, colors it, and adds the thickness of border
  geom_text(aes(x = "SVMWithGradCAMMaps", 
                label = "SVM + GRAD-CAM++", 
                y = 0.64), 
                vjust = -4.5, 
                color = "darkorange2", 
                size = 4.5) +
  # adds text above violin plot
  geom_text(aes(x = "SVMWithDeepShapMaps", 
                label = "SVM + Deep SHAP", 
                y = 0.59), 
                vjust = -4.5, 
                color = "darkorchid4", 
                size = 4.5) +
  # adds text above violin plot
  scale_y_continuous(
    limits = c(0.56, 0.74), # adds limits of values on y-axis
    breaks = seq(0.56, 0.74, by = 0.02), #adds sequential breaks, by 0.02, between the limits
    labels = seq(0.56, 0.74, by = 0.02)) + # adds labels to the y-axis
  labs(
    title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.", #adds plot title
    y = "F1") #adds y-axis title

# NEW PLOT WITH HALF VIOLINS
library(ggplot2) # Loads packages into R session

CAM <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/Violin_Plot_Data.csv")

data_long <- CAM %>%
  pivot_longer(
    cols = starts_with("Repeat"),
    names_to = "Repeat", 
    values_to = "values")

ggplot(data = data_long, aes(x = F1Performance, y = values, fill = F1Performance)) +
  geom_jitter(aes(color = F1Performance), position = position_jitter(0.1), size = 5, alpha = 0.8) + 
  scale_color_manual(values = c("SVMWithDeepShapMaps" = "darkorchid4",
                                "SVMWithGradCAMMaps" = "chocolate2")) +
  geom_violinhalf(trim = TRUE, alpha = 0.4, size = 2, draw_quantiles = c(0.25, 0.5, 0.75)) + # Makes the plot a HALF VIOLIN
  scale_fill_manual(values = c("SVMWithDeepShapMaps" = "darkorchid4",
                                "SVMWithGradCAMMaps" = "chocolate2")) +
  coord_flip() +
  theme_minimal() +
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank(),
        axis.line.x = element_line(color = "black", size = 1.5),
        plot.title = element_text(hjust = 0.5, face="bold"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5),
        legend.position = "none") +
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black", stroke = 1.5) +
  geom_text(aes(x = "SVMWithGradCAMMaps", 
                label = "SVM + GRAD-CAM++", 
                y = 0.64), 
                vjust = -4.5, 
                color = "darkorange2", 
                size = 4.5) +
  geom_text(aes(x = "SVMWithDeepShapMaps", 
                label = "SVM + Deep SHAP", 
                y = 0.59), 
                vjust = -4.5, 
                color = "darkorchid4", 
                size = 4.5) +
  scale_y_continuous(
    limits = c(0.56, 0.74),
    breaks = seq(0.56, 0.74, by = 0.02),
    labels = seq(0.56, 0.74, by = 0.02)) +
  labs(
    title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.",
    y = "F1")

# NEW PLOT WITH VIOLIN + BOX PLOTS
library(ggplot2) # Loads packages into R session

CAM <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/Violin_Plot_Data.csv")

data_long <- CAM %>%
  pivot_longer(
    cols = starts_with("Repeat"),
    names_to = "Repeat", 
    values_to = "values")

ggplot(data = data_long, aes(x = F1Performance, y = values, fill = F1Performance)) +
  geom_violin(trim = FALSE, alpha = 0.6, size = 2) +
  scale_fill_manual(values = c("SVMWithDeepShapMaps" = "darkorchid4",
                                "SVMWithGradCAMMaps" = "chocolate2")) +
  geom_boxplot(width = 0.15, fill = "white", color = "black", size = 1) + # Adds boxplots on top of the violins for extra information
  coord_flip() +
  theme_minimal() +
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_blank(),
        axis.line.x = element_line(color = "black", size = 1.5),
        plot.title = element_text(hjust = 0.5, face="bold"),
        panel.grid.major.y = element_blank(),
        panel.grid.minor.x = element_blank(),
        panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5),
        legend.position = "none") +
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "orangered", color = "black", stroke = 1.5) +
  geom_text(aes(x = "SVMWithGradCAMMaps", 
                label = "SVM + GRAD-CAM++", 
                y = 0.64), 
                vjust = -4.5, 
                color = "darkorange2", 
                size = 4.5) +
  geom_text(aes(x = "SVMWithDeepShapMaps", 
                label = "SVM + Deep SHAP", 
                y = 0.59), 
                vjust = -4.5, 
                color = "darkorchid4", 
                size = 4.5) +
  scale_y_continuous(
    limits = c(0.56, 0.74),
    breaks = seq(0.56, 0.74, by = 0.02),
    labels = seq(0.56, 0.74, by = 0.02)) +
  labs(
    title = "Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.",
    y = "F1")

Density Plots

library(ggplot2) # Loads packages into R session

population_data <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/log_population_data.csv")

ggplot(data = population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population)) +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour = "white") + # Creates a 2D Density plot
  scale_fill_distiller(palette = "YlOrRd", direction = 1) + # Uses custom palette for continuous data and reverses color direction
  theme_minimal() +
  labs(title = "2D Density Plot of Population Sizes",
       x = "Log10(Current population size N0)",
       y = "Log10(Past population size N1)")

library(ggplot2) # Loads packages into R session

longevity_data <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/longevity_data.csv")

long <- longevity_data %>% #create a new data frame called "long" that contains all your newly calculated variables
  mutate( #mutate tells the program to perform new calculations
    log_mass = log10(mass_g),                          # create a new column called "log_mass" which Log-transforms mass values
    log_lifespan = log10(maximum_lifespan_yr))  %>%          # create a new column called "log_lifespan" that Log-transforms lifespan value
   group_by(order) %>%        # this tells it that after "mutate", you are going to start a new function. for each "order" or group of animals    
  mutate(order_size = n())      #calculate the sample size of each order and put it in a column called "order_size". 

#Now you have a sample size for each order, and you have transformed each mass and lifespan value to log form.

long$class <- as.factor(long$class) # Turns class into a categorical variable
long$order_size <- as.numeric(long$order_size) # Turns order_size into a numeric variable

p = ggplot(long, aes(x = log_mass, y = log_lifespan, size = order_size, color = class)) +
  geom_point(alpha = 0.3) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(values = c("Aves" = "lightgreen", "Mammalia" = "darkslategrey")) +
  labs(title = "Bubble Chart of Longevity and Body Mass",
       x = "Log (Body Mass [g])",
       y = "Log (Maximum Lifespan [yr])") +
  guides(color = "none", size = "none") + # Removes legends
  theme_minimal() +
  theme(plot.title = element_text(size = 14, face = "bold"),
        axis.title.x = element_text(size = 12, face = "bold"), # Customizes axis titles
        axis.title.y = element_text(size = 12, face = "bold")) + # Customizes axis titles
  annotate("text", x = 5.1, y = 1.9, label = "Aves", size = 5, color = "lightgreen", fontface = "bold") + # Adds annotation
  annotate("text", x = 6.5, y = 1.48, label = "Mammals", size = 5, color = "darkslategrey", fontface = "bold") # Adds annotation
  
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4) # adds the cool density plots to the top and side

Interpretations:

  1. The addition of the density plots makes the density levels across each axis clear, and its also easier to compare aves and mammals.

  2. The body mass/lifespan individual aves is listed as the light green bubbles. and the body mass/lifespan individual mammals is listed as the dark grey bubbles. The averages of individuals for both aves and mammals can be visualized with the light green and dark grey regression lines. Lastly the densities of the individuals of aves and mammals’ body masses can be visualized with the density plots on top of the graph, and the densities of the individuals of aves and mammals’ lifespans can be visualized with the density plots on the right side of the graph.

  3. Both aves and mammals have a positive relationship between longevity and body mass, but the increase is more extreme in aves.

  4. The data is more biased to average lifespans in both aves and mammals, smaller masses in aves, and slightly larger masses in mammals. I believe the smaller masses in aves is necessary to be able to have consistent flight, while it doesn’t matter as much as mammals due to reproduction and ability to get around to feed.

  5. Adding standard error to the regression lines is always an aspect I look for in graphs, and I would have liked it here as well.

# Look at previous plot for annotations
library(ggplot2)

data("airquality")

p = ggplot(airquality, aes(x = Temp, y = Wind, size = 3, color = factor(Month))) +
  geom_point(alpha = 0.5) +
  scale_color_manual(values = c("5" = "lightblue2",
                                "6" = "steelblue1",
                                "7" = "steelblue4",
                                "8" = "midnightblue",
                                "9" = "gray18")) +
  theme_minimal() +
  guides(color = guide_legend(title = "Month"),
    size = "none")+
  labs(title = "Air Quality Values Over the Span of 5 Months",
       x = "Temperature (F)",
       y = "Wind (mph)")

ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4)

Multiple Panel Plots

library(ggplot2)

data("ChickWeight")

ggplot(data = ChickWeight, aes(x = Time, y = weight, colour = Chick)) +
geom_line(alpha = 0.8) + 
geom_smooth(method = "loess", colour = "black", size = 1.2, se = TRUE) + # "Loess" makes curved regression
facet_wrap(~ Diet, ncol = 4) + # Makes panels by variable "Diet" in 4 columns
labs(title = "Chick Growth by Diet Type",
     x = "Time (Days)",
     y = "Weight (Grams)") +
guides(colour = "none") + # Removes legend
theme_minimal()

library(ggplot2)
library(ggpubr)

data("CO2")

#VIOLIN PLOT
violin <- ggplot(CO2, aes(x = Treatment, y = uptake, fill = Type)) +
  geom_violin(trim = FALSE, size = 0.5, alpha = 0.4) +
  scale_fill_manual(values = c("Quebec" = "darkslategray3",
                                "Mississippi" = "orange2")) +
  geom_boxplot(width = 0.15, position = position_dodge(width = 0.9)) +
  theme_bw() +
  labs(y = "CO2 Uptake (μmol/m2sec)",
       fill = "Origin") +
  theme(axis.title.y = element_text(size = 5))

#LINE PLOT
line <- ggplot(CO2, aes(x = conc, y = uptake)) +
  geom_line(aes(color = Treatment)) +
  geom_point(aes(color = Treatment)) +
   scale_color_manual(values = c("nonchilled" = "darkolivegreen3",
                                "chilled" = "orchid")) +
  facet_wrap(~ Type) + # Multiple panels by "Type" variable
  labs(y = "CO2 Uptake (μmol/m2sec)",
       x = "Ambient Carbon Dioxide Concentration (mL/L)") +
  theme_bw() +
  theme(axis.title.y = element_text(size = 5))

#SCATTER PLOT
scatter <- ggplot(CO2, aes(x = conc, y = uptake)) +
  geom_point(aes(color = Treatment), shape = 18, size = 4) +
  scale_color_manual(values = c("nonchilled" = "darkolivegreen3",
                                "chilled" = "orchid")) +
  geom_smooth(aes(color = Treatment), method = "lm", se = TRUE, size = 1) +
  labs(y = "CO2 Uptake (μmol/m2sec)",
       x = "Ambient Carbon Dioxide Concentration (mL/L)") +
  theme_bw() +
  theme(axis.title.y = element_text(size = 5))

#COMBINED PLOT  
combined_plots <- ggarrange(line, violin, scatter + rremove("x.text"), # Uses ggarrange to put 3 plots together
          labels = c("A", "B", "C"), # Labels panels
          ncol = 1, nrow = 3, # Puts panels into one column and 3 rows
          heights = c(2, 2.5, 2.2)) # Adjusts heights of panels compared to each other

combined_plot_with_title <- annotate_figure(
  combined_plots, 
  top = text_grob("Cold-Tolerance Effects on CO2 Uptake in Grass Plants", size = 16, face = "bold")) # Adds title to whole plot

combined_plot_with_title # Views final combined plot

Plots From Public Dataset

alcohol_data <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/Consumption of alcoholic beverages in Russia 1998-2023.csv")

linep <- ggplot(alcohol_data, aes(x = Year, y = Consumption.of.alcoholic.beverages.L, color = Type)) + 
    geom_line(size = 1) + 
    theme_minimal()+ 
    theme(legend.position = "right") + 
    labs(
      title = "Consumption of Alcohol in Russia",
      x = "Year", y = "Consumption (L per capita)") 

linep

alcohol_data <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/Consumption of alcoholic beverages in Russia 1998-2023.csv")

scatterp <- ggplot(alcohol_data, aes(x = Year, y = Consumption.of.alcoholic.beverages.L.Pure, color = Type)) + 
    geom_point(size = 1, shape = 19) + 
    geom_smooth(aes(color = Type), method = lm, se = TRUE) +
    theme_minimal()+ 
    theme(legend.position = "right") + 
    labs(
      title = "Consumption of Pure Alcohol in Russia",
      x = "Year", y = "Consumption of Pure Alcohol (L per capita)") 

scatterp

alcohol_data <- read.csv("C:/Users/Sam/OneDrive/Documents/SMCM/Winter R/Consumption of alcoholic beverages in Russia 1998-2023.csv")

alcohol_data$Type <- factor(alcohol_data$Type)
boxp <- ggplot(alcohol_data, aes(x = Type, y = Consumption.of.alcoholic.beverages.L)) + 
  geom_boxplot(notch = FALSE, fill = "lightblue", color = "black", size = 0.7) +
    theme_minimal()+ 
    theme(legend.position = "top") + 
    labs(
      title = "Consumption of Alcohol in Russia",
      x = "Types of Alcohol", y = "Consumption of Alcohol (L per capita)") 

boxp

combined_plots <- ggarrange(linep, boxp, scatterp + rremove("x.text"), # Uses ggarrange to put 3 plots together
          labels = c("A", "B", "C"), # Labels panels
          ncol = 1, nrow = 3, # Puts panels into one column and 3 rows
          heights = c(2, 2.5, 2.2)) # Adjusts heights of panels compared to each other

combined_plot_with_title <- annotate_figure(
  combined_plots, 
  top = text_grob("Consumption of Alcohol in Russia", size = 16, face = "bold")) # Adds title to whole plot

combined_plot_with_title