Scatter Plots and Bar Graphs

#Install and Load necessary Packages
packages <- c("ggplot2", "readr", "tidyverse", "dplyr", "ggpubr", "gridExtra", "grid")
check_install_packages <- function(pkg){
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}
sapply(packages, check_install_packages)
## Loading required package: ggplot2
## Loading required package: readr
## Loading required package: tidyverse
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ stringr   1.5.1
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: ggpubr
## 
## Loading required package: gridExtra
## 
## 
## Attaching package: 'gridExtra'
## 
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## 
## Loading required package: grid
## $ggplot2
## NULL
## 
## $readr
## NULL
## 
## $tidyverse
## NULL
## 
## $dplyr
## NULL
## 
## $ggpubr
## NULL
## 
## $gridExtra
## NULL
## 
## $grid
## NULL
# Load the USArrests dataset
data("USArrests")
head(USArrests)
##            Murder Assault UrbanPop Rape
## Alabama      13.2     236       58 21.2
## Alaska       10.0     263       48 44.5
## Arizona       8.1     294       80 31.0
## Arkansas      8.8     190       50 19.5
## California    9.0     276       91 40.6
## Colorado      7.9     204       78 38.7
# Simple scatter plot
ggplot(mtcars, aes(x = mpg, y = hp)) +
  geom_point() +
  labs(
    title = "Basic Scatter Plot",
    x = "Miles Per Gallon (MPG)",
    y = "Horsepower (HP)"
  )

# More Customized Scatterplot
ggplot(mtcars, aes(x = hp, y = mpg, color = as.factor(cyl))) +
  geom_point(size = 2.4, shape = 8) + # Customize point size and shape
  theme_minimal() +                   # Apply theme
  theme(legend.position = "bottom") + # Position legend at the bottom
  labs(
    title = "Effect of Horsepower on Fuel Efficiency",
    subtitle = "Categorized by Number of Cylinders",
    x = "Horsepower",
    y = "Fuel Efficiency (MPG)",
    color = "Number of Cylinders"
  ) +
  scale_color_brewer(palette = "Blues") #color palette of scatter plot

# Create a bar plot for murder arrests by state
ggplot(USArrests, aes(x = reorder(rownames(USArrests), Murder), y = Murder)) +
  geom_bar(stat = "identity", fill = "orchid") +  # modifly the bar colors
  labs(
    title = "Murder Arrests per 100,000 Residents by State", # Add title
    x = "State",                                             # Label for x-axis
    y = "Murder Arrests (per 100,000 residents)"             # Label for y-axis
  ) +
  theme_minimal() +                                          # Apply theme
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1),       # Rotate x-axis labels for readability
    plot.title = element_text(size = 14, face = "bold", hjust = 0.5), # Customize title appearance
    axis.title.x = element_text(size = 12),                 # Customize x-axis title font size
    axis.title.y = element_text(size = 12),                 # Customize y-axis title font size
    axis.text = element_text(size = 10)                     # Customize axis text size
  )

Violin and Boxplots

# Load required packages
#packages <- c("ggplot2", "tidyverse", "dplyr", "ggpubr", "see")
#lapply(packages, function(x) if (!require(x, character.only = TRUE)) install.packages(x))
library(ggplot2)
library(tidyverse)
library(dplyr)
library(ggpubr)
library(see)
## Warning: package 'see' was built under R version 4.3.3
#install.packages("gghalves") 
library(gghalves)

#Import and load data
CAM<-read.csv("Violin_Plot_Data.csv")
## Warning in read.table(file = file, header = header, sep = sep, quote = quote, :
## incomplete final line found by readTableHeader on 'Violin_Plot_Data.csv'
print(CAM)
##         F1Performance  Repeat1  Repeat2  Repeat3  Repeat4  Repeat5  Repeat6
## 1  SVMWithGradCAMMaps 0.670051 0.701571 0.680628 0.710660 0.648649 0.715686
## 2 SVMWithDeepShapMaps 0.673913 0.610390 0.630872 0.618357 0.662577 0.608696
##    Repeat7  Repeat8  Repeat9 Repeat10 Repeat11 Repeat12 Repeat13 Repeat14
## 1 0.713568 0.684932 0.699029 0.687500 0.720812 0.716418 0.666667 0.683417
## 2 0.623529 0.642857 0.607477 0.645833 0.631579 0.660099 0.662420 0.610778
##   Repeat15 Repeat16 Repeat17 Repeat18 Repeat19 Repeat20
## 1 0.666667 0.663317 0.691943 0.680412 0.686869 0.686551
## 2 0.701754 0.659091 0.577540 0.666667 0.678571 0.596685
head(CAM)
##         F1Performance  Repeat1  Repeat2  Repeat3  Repeat4  Repeat5  Repeat6
## 1  SVMWithGradCAMMaps 0.670051 0.701571 0.680628 0.710660 0.648649 0.715686
## 2 SVMWithDeepShapMaps 0.673913 0.610390 0.630872 0.618357 0.662577 0.608696
##    Repeat7  Repeat8  Repeat9 Repeat10 Repeat11 Repeat12 Repeat13 Repeat14
## 1 0.713568 0.684932 0.699029 0.687500 0.720812 0.716418 0.666667 0.683417
## 2 0.623529 0.642857 0.607477 0.645833 0.631579 0.660099 0.662420 0.610778
##   Repeat15 Repeat16 Repeat17 Repeat18 Repeat19 Repeat20
## 1 0.666667 0.663317 0.691943 0.680412 0.686869 0.686551
## 2 0.701754 0.659091 0.577540 0.666667 0.678571 0.596685
data_long <- CAM %>%
  pivot_longer(
    cols = starts_with("Repeat"),
    names_to = "Repeat", 
    values_to = "values")
head(data_long)
## # A tibble: 6 × 3
##   F1Performance      Repeat  values
##   <chr>              <chr>    <dbl>
## 1 SVMWithGradCAMMaps Repeat1  0.670
## 2 SVMWithGradCAMMaps Repeat2  0.702
## 3 SVMWithGradCAMMaps Repeat3  0.681
## 4 SVMWithGradCAMMaps Repeat4  0.711
## 5 SVMWithGradCAMMaps Repeat5  0.649
## 6 SVMWithGradCAMMaps Repeat6  0.716
##Creating the Violin Plots
ggplot(data_long, aes(x = F1Performance, y = values, fill = F1Performance)) +
  #add and customize jitter points 
  geom_jitter(
    position = position_jitter(0.1),
    aes(color = F1Performance),
    size = 6,
    alpha = 0.8
  ) +
    # Add violin plot with quantile lines
  geom_violin(
    size = 2,
    alpha = 0.5,
    draw_quantiles = c(0.25, 0.5, 0.75),
    quantile.size = 2
  ) +
    # Flip coordinates
  coord_flip() +
    # Customize fill and color scales
  scale_fill_manual(values = c("magenta4", "darkorange2")) +
  scale_color_manual(values = c("magenta4", "darkorange2")) +
    # Add summary statistics (median points)
  stat_summary(
    fun = median,
    geom = "point",
    shape = 21,
    size = 3,
    fill = "white",
    color = "black",
    stroke = 1.5
  ) +
    # Customize y-axis scale
  scale_y_continuous(
    limits = c(min(data_long$values), max(data_long$values)),
    breaks = seq(min(data_long$values), max(data_long$values), by = 0.02),
    labels = scales::number_format(accuracy = 0.02)
  ) +
  #customize visuals
    theme_minimal() +
  theme(
    legend.title = element_text(face = "bold", size = 14),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    axis.line.x = element_line(size = 2, color = "black"),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5),
    legend.position = "none"
  ) +
    # Add text annotations
  geom_text(
    aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64),
    vjust = -4.5,
    color = "darkorange2",
    size = 4.5
  ) +
  geom_text(
    aes(x = "SVMWithDeepShapMaps", y = 0.6, label = "SVM + Deep SHAP"),
    vjust = -3.5,
    color = "magenta4",
    size = 4.5
  ) +
  # Add axis label and plot title
  ylab("F1") +
  ggtitle("Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in geom_violin(size = 2, alpha = 0.5, draw_quantiles = c(0.25, 0.5, :
## Ignoring unknown parameters: `quantile.size`
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

## Create Half Violin Plots
ggplot(data_long, aes(x = F1Performance, y = values, fill = F1Performance)) +
# add jitter points
   geom_jitter(
    position = position_jitter(0.1),
    aes(color = F1Performance),
    size = 6,
    alpha = 0.8
  ) +
  #add half violin plot
    geom_violinhalf(
    size = 2,
    alpha = 0.5,
    draw_quantiles = c(0.25, 0.5, 0.75),
    quantile.size = 2
  ) +
  #flip coordinates
  coord_flip() +
  #customize colors
    scale_fill_manual(values = c("magenta4", "darkorange2")) +
  scale_color_manual(values = c("magenta4", "darkorange2")) +
  #add summary statistics
    stat_summary(
    fun = median,
    geom = "point",
    shape = 21,
    size = 3,
    fill = "white",
    color = "black",
    stroke = 1.5
  ) +
  #Customize y-axis
    scale_y_continuous(
    limits = c(min(data_long$values), max(data_long$values)),
    breaks = seq(min(data_long$values), max(data_long$values), by = 0.02),
    labels = scales::number_format(accuracy = 0.02)
  ) +
  #Customize visuals
    theme_minimal() +
  theme(
    legend.title = element_text(face = "bold", size = 14),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    axis.line.x = element_line(size = 2, color = "black"),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5),
    legend.position = "none"
  ) +
  #Add text
  geom_text(
    aes(
      x = "SVMWithGradCAMMaps",
      label = "SVM + GRAD-CAM++",
      y = 0.64
    ),
    vjust = -4.5,
    color = "darkorange2",
    size = 4.5
  ) +
  geom_text(
    aes(
      x = "SVMWithDeepShapMaps",
      y = 0.6,
      label = "SVM + Deep SHAP"
    ),
    vjust = -3.5,
    color = "magenta4",
    size = 4.5
  ) +
  # Add y-axis label and plot title
  ylab("F1") +
  ggtitle("Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.")
## Warning in geom_violinhalf(size = 2, alpha = 0.5, draw_quantiles = c(0.25, :
## Ignoring unknown parameters: `size`, `draw_quantiles`, and `quantile.size`
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Half violin plot with box plot
ggplot(data_long, aes(x = F1Performance, y = values, fill = F1Performance)) +
  geom_jitter(
    position = position_jitter(0.1),
    aes(color = F1Performance),
    size = 6,
    alpha = 0.8
  ) +
  # Add half-violin plot
  geom_violinhalf(
    size = 2,
    alpha = 0.5,
    draw_quantiles = c(0.25, 0.5, 0.75),
    quantile.size = 2
  ) +
  # Add boxplot 
  geom_boxplot(
    aes(color = F1Performance),
    width = 0.4,
    alpha = 0.3,
    outlier.shape = NA # Suppress outliers for cleaner visualization
  ) +
  coord_flip() +
  scale_fill_manual(values = c("magenta4", "darkorange2")) +
  scale_color_manual(values = c("magenta4", "darkorange2")) +
  stat_summary(
    fun = median,
    geom = "point",
    shape = 21,
    size = 3,
    fill = "white",
    color = "black",
    stroke = 1.5
  ) +
  scale_y_continuous(
    limits = c(min(data_long$values), max(data_long$values)),
    breaks = seq(min(data_long$values), max(data_long$values), by = 0.02),
    labels = scales::number_format(accuracy = 0.02)
  ) +
  theme_minimal() +
  theme(
    legend.title = element_text(face = "bold", size = 14),
    axis.text.y = element_blank(),
    axis.ticks.y = element_blank(),
    axis.title.y = element_blank(),
    axis.line.x = element_line(size = 2, color = "black"),
    plot.title = element_text(hjust = 0.5, face = "bold"),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.x = element_blank(),
    panel.grid.major.x = element_line(color = "grey", linetype = "dashed", size = 1.5),
    legend.position = "none"
  ) +
  geom_text(
    aes(
      x = "SVMWithGradCAMMaps",
      label = "SVM + GRAD-CAM++",
      y = 0.64
    ),
    vjust = -4.5,
    color = "darkorange2",
    size = 4.5
  ) +
  geom_text(
    aes(
      x = "SVMWithDeepShapMaps",
      y = 0.6,
      label = "SVM + Deep SHAP"
    ),
    vjust = -3.5,
    color = "magenta4",
    size = 4.5
  ) +
  ylab("F1") +
  ggtitle("Fig. 7. Grad-CAM++ saliency maps capture unique predictive information.")
## Warning in geom_violinhalf(size = 2, alpha = 0.5, draw_quantiles = c(0.25, :
## Ignoring unknown parameters: `size`, `draw_quantiles`, and `quantile.size`
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).

Density Plot

# Load necessary libraries
library(ggExtra)
library(tidyverse)
library(dplyr)
library(ggplot2)

##Making a Density Plot 
# Load population data
population_data <- read.csv("log_population_data.csv")
head(population_data)
##   Log10_Current_Population Log10_Past_Population
## 1                 4.288032              5.674204
## 2                 3.817497              5.908109
## 3                 4.671286              6.095078
## 4                 3.538305              5.200114
## 5                 4.602143              6.388435
## 6                 4.839555              6.187712
ggplot(population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population)) +
  stat_density_2d(geom = "polygon", aes(fill = ..level..), color = "white") + 
  scale_fill_viridis_c(option = "viridis") +# Choose colors
  ggtitle("2D Density Plot of Population Sizes") +  # Add title
  xlab("Log10(Current population size N0)") + # Label for x-axis
  ylab("Log10(Past population size N1)") +  # Label for y-axis
  theme_minimal() 
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(level)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Bubble Plot with a Marginal Density Plot

# Load data
longevity_data <- read.csv("longevity_data.csv")
head(longevity_data)
##                     species    class           order maximum_lifespan_yr mass_g
## 1 Dicrostonyx_groenlandicus Mammalia        Rodentia                 3.3   66.0
## 2      Didelphis_virginiana Mammalia Didelphimorphia                 6.6 3000.0
## 3         Diphylla_ecaudata Mammalia      Chiroptera                 8.0   28.0
## 4     Dipodillus_campestris Mammalia        Rodentia                 7.3   28.4
## 5        Dipodomys_merriami Mammalia        Rodentia                 9.7   42.0
## 6   Dendrolagus_goodfellowi Mammalia   Diprotodontia                23.6 7400.0
##     volancy fossoriallity foraging_environment daily_activity
## 1 nonvolant semifossorial          terrestrial     cathemeral
## 2 nonvolant  nonfossorial         semiarboreal      nocturnal
## 3    volant  nonfossorial          terrestrial      nocturnal
## 4 nonvolant semifossorial          terrestrial      nocturnal
## 5 nonvolant semifossorial          terrestrial      nocturnal
## 6 nonvolant  nonfossorial         semiarboreal     cathemeral
#prepare data for analysis
long <- longevity_data %>%
  mutate(
    log_mass = log10(mass_g),                 # Log-transform mass values
    log_lifespan = log10(maximum_lifespan_yr) # Log-transform lifespan values
  ) %>%
  group_by(order) %>%      # Group data by order
  mutate(order_size = n())  # Count the number of observations per order
head(long)
## # A tibble: 6 × 12
## # Groups:   order [4]
##   species           class order maximum_lifespan_yr mass_g volancy fossoriallity
##   <chr>             <chr> <chr>               <dbl>  <dbl> <chr>   <chr>        
## 1 Dicrostonyx_groe… Mamm… Rode…                 3.3   66   nonvol… semifossorial
## 2 Didelphis_virgin… Mamm… Dide…                 6.6 3000   nonvol… nonfossorial 
## 3 Diphylla_ecaudata Mamm… Chir…                 8     28   volant  nonfossorial 
## 4 Dipodillus_campe… Mamm… Rode…                 7.3   28.4 nonvol… semifossorial
## 5 Dipodomys_merria… Mamm… Rode…                 9.7   42   nonvol… semifossorial
## 6 Dendrolagus_good… Mamm… Dipr…                23.6 7400   nonvol… nonfossorial 
## # ℹ 5 more variables: foraging_environment <chr>, daily_activity <chr>,
## #   log_mass <dbl>, log_lifespan <dbl>, order_size <int>
p <- ggplot(long, aes(x = log_mass, y = log_lifespan, color = class)) +
  #points with size based on order size
  geom_point(aes(size = order_size), alpha = 0.3) + 
  geom_smooth(method = "lm", aes(group = class),    # Add regression lines
              se = FALSE, linetype = "solid") +
  scale_color_manual(values = c("lightgreen", "darkslategray")) + # choose colors
  ggtitle("Bubble Chart of Longevity and Body Mass") +  # Add title
  xlab("Log(Body Mass [g])") +                          # Label for x-axis
  ylab("Log(Maximum Lifespan [yr])") +                  # Label for y-axis
  theme_minimal() +                                     # Apply minimal theme
  theme(
    legend.position = "none",                           # Remove legend
    plot.title = element_text(size = 14, face = "bold"),# Customize title
    axis.title = element_text(size = 12, face = "bold") # Customize axis titles
  ) +
  annotate("text", x = 5.5, y = 1.9, label = "Aves",  # Add annotation for Aves
           color = "lightgreen", size = 5, fontface = "bold") +
  annotate("text", x = 6, y = 1.2, label = "Mammals", # Add annotation for Mammals
           color = "darkslategray", size = 5, fontface = "bold")
# Add marginal density plot
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

Histogram with a Density Overlay

# Load diamonds dataset
data("diamonds")
head(diamonds)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
summary(diamonds)
##      carat               cut        color        clarity          depth      
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065   Min.   :43.00  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258   1st Qu.:61.00  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194   Median :61.80  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171   Mean   :61.75  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066   3rd Qu.:62.50  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655   Max.   :79.00  
##                                     J: 2808   (Other): 2531                  
##      table           price             x                y         
##  Min.   :43.00   Min.   :  326   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710   1st Qu.: 4.720  
##  Median :57.00   Median : 2401   Median : 5.700   Median : 5.710  
##  Mean   :57.46   Mean   : 3933   Mean   : 5.731   Mean   : 5.735  
##  3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540   3rd Qu.: 6.540  
##  Max.   :95.00   Max.   :18823   Max.   :10.740   Max.   :58.900  
##                                                                   
##        z         
##  Min.   : 0.000  
##  1st Qu.: 2.910  
##  Median : 3.530  
##  Mean   : 3.539  
##  3rd Qu.: 4.040  
##  Max.   :31.800  
## 
ggplot(diamonds, aes(x = price)) +
  geom_histogram(aes(y = ..density..), bins = 30, 
                 fill = "lavender", alpha = 0.7) +
  geom_density(color = "orchid4", size = 1) + # Overlay density curve
  ggtitle("Diamond Prices") +                  # Add title
  xlab("Price") +                              # Label for x-axis
  ylab("Density") +                            # Label for y-axis
  theme_minimal() 

Multifaceted Plot

# ChickWeight Plot

chi <- ggplot(ChickWeight, aes(x = Time, y = weight, color = factor(Chick))) +
  geom_line(alpha = 0.9, size = 0.8) +
  geom_smooth(aes(group = factor(Diet)), se = TRUE, fill = "darkgray", color = "black", size = 1.2) +
  facet_wrap(~Diet, ncol = 4) +
  labs(
    title = "Chick Growth by Diet Type",
    x = "Time (Days)",
    y = "Weight (Grams)"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14),
    axis.title = element_text(size = 12),
    strip.text = element_text(size = 12, face = "bold")
  )

print(chi)
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'

# Load and Prepare CO2 Data
data("CO2")
CO2$TreatmentType <- interaction(CO2$Treatment, CO2$Type)
CO2$PlantType <- interaction(CO2$Plant, CO2$Type)

# Violin Plot
vn <- ggplot(CO2, aes(x = uptake, y = Treatment, fill = TreatmentType)) +
  geom_jitter(position = position_jitter(0.1), aes(color = TreatmentType)) +
  geom_violin(alpha = 0.5) +
  scale_fill_manual(values = c("thistle", "bisque", "lightsalmon", "lightgreen")) +
  scale_color_manual(values = c("thistle", "bisque", "lightsalmon", "lightgreen")) +
  labs(
    title = "CO2 Uptake of Grass Plants in Chilled and Unchilled Conditions",
    x = "Uptake Rates (μmol/m2sec)"
  ) +
  theme_minimal() +
  theme(
    plot.title.position = "plot",
    plot.margin = margin(t = 50, r = 20, b = 20, l = 20)
  )

print(vn)

# Line Plot
ln <- ggplot(CO2, aes(x = conc, y = uptake, fill = Treatment)) +
  geom_point(aes(color = Treatment)) +
  geom_smooth(aes(color = Treatment), method = "lm", alpha = 0.3) +
  facet_wrap(~Type, ncol = 4) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_color_manual(values = c("red", "blue")) +
  labs(
    title = "Ambient CO2 Concentration by Uptake Rates and Treatment",
    x = "CO2 Concentration (mL/L)",
    y = "Uptake Rates (μmol/m2sec)"
  ) +
  theme_minimal()

print(ln)
## `geom_smooth()` using formula = 'y ~ x'

# Boxplot
bx <- ggplot(CO2, aes(x = Treatment, y = uptake, fill = Treatment)) +
  geom_boxplot() +
  scale_fill_manual(values = c("nonchilled" = "red", "chilled" = "blue")) +
  labs(
    title = "CO2 Uptake Rates by Treatment Type",
    x = "Treatment",
    y = "Uptake Rates (μmol/m2sec)"
  ) +
  theme_minimal()

print(bx)

# Customize Themes for Combined Plot
vn <- vn +
  theme(
    plot.margin = margin(5, 5, 5, 5),
    aspect.ratio = 1,
    axis.title.y = element_blank(),
    axis.title.x = element_text(size = 7),
    plot.title = element_blank(),
    legend.position = "bottom",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 5),
    legend.title = element_blank(),
    legend.box = "horizontal",
    legend.direction = "horizontal",
    legend.key.height = unit(0.3, "cm"),
    legend.key.width = unit(1, "cm"),
    legend.box.spacing = unit(0.2, "cm")
  ) +
  guides(fill = guide_legend(ncol = 2))

ln <- ln +
  theme(
    plot.margin = margin(5, 5, 5, 5),
    aspect.ratio = 1,
    plot.title = element_blank(),
    legend.position = "bottom",
    axis.title.y = element_text(size = 7),
    axis.text.x = element_text(size = 7),
    axis.text.y = element_text(size = 7),
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 5),
    legend.title = element_blank(),
    legend.box = "horizontal",
    legend.direction = "horizontal",
    legend.key.height = unit(0.3, "cm"),
    legend.key.width = unit(0.5, "cm"),
    legend.box.spacing = unit(0.2, "cm")
  ) +
  guides(fill = guide_legend(ncol = 2))

bx <- bx +
  theme(
    plot.margin = margin(5, 5, 5, 5),
    aspect.ratio = 1,
    plot.title = element_blank(),
    axis.title.y = element_text(size = 5),
    axis.title.x = element_text(size = 7),
    legend.position = "right",
    legend.key.size = unit(0.3, "cm"),
    legend.text = element_text(size = 5),
    legend.title = element_blank(),
    legend.box = "horizontal",
    legend.direction = "horizontal",
    legend.key.height = unit(0.5, "cm"),
    legend.key.width = unit(0.5, "cm"),
    legend.box.spacing = unit(0.3, "cm")
  ) +
  guides(fill = guide_legend(ncol = 2))

# Combine Plots into Multi-Panel
combined <- ggarrange(
  bx, ln, vn,
  labels = c("A", "B", "C"),
  ncol = 2, nrow = 2,
  heights = c(1, 1),
  widths = c(1, 1),
  align = "hv"
)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Graphs cannot be vertically aligned unless the axis parameter is set.
## Placing graphs unaligned.
## Warning: Graphs cannot be horizontally aligned unless the axis parameter is
## set. Placing graphs unaligned.
# Add Title to Combined Plot
grid.arrange(
  combined,
  top = textGrob(
    "Carbon Dioxide Uptake in Grass Plants by Origin and in Chilled and Nonchilled Conditions",
    gp = gpar(fontsize = 10, fontface = "bold")
  )
)

Analysis of the Dataset Palmers Penguins

###install and load necessary packages
#install.packages("rmarkdown")
library(rmarkdown)
## Warning: package 'rmarkdown' was built under R version 4.3.3
#install.packages("knitr")
library(knitr)
## Warning: package 'knitr' was built under R version 4.3.3
#install.packages("tidyverse")
library(tidyverse)
#install.packages("ggplot2")
library(ggplot2)
#install.packages("hexbin")
library(hexbin)
## Warning: package 'hexbin' was built under R version 4.3.3
#install.packages("ggridges")
library(ggridges)
#install.packages("patchwork")
library(patchwork)
## Warning: package 'patchwork' was built under R version 4.3.3
#install.packages("ggpubr")
library(ggpubr)
#install.packages("gridExtra")
library(gridExtra)
library(grid)

###import data
#install.packages("palmerpenguins")
library(palmerpenguins)
data("penguins")
head(penguins)
## # A tibble: 6 × 8
##   species island    bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
##   <fct>   <fct>              <dbl>         <dbl>             <int>       <int>
## 1 Adelie  Torgersen           39.1          18.7               181        3750
## 2 Adelie  Torgersen           39.5          17.4               186        3800
## 3 Adelie  Torgersen           40.3          18                 195        3250
## 4 Adelie  Torgersen           NA            NA                  NA          NA
## 5 Adelie  Torgersen           36.7          19.3               193        3450
## 6 Adelie  Torgersen           39.3          20.6               190        3650
## # ℹ 2 more variables: sex <fct>, year <int>
### Plot 1:Penguin Counts Across Islands by Species 
plot1 <- ggplot(data = penguins, aes(x = island, fill = species)) + 
  geom_bar(position = "dodge") +  # Creates grouped bars
  labs(
    title = "Penguin Counts Across Islands by Species",
    x = "Island",
    y = "Total Count",
    fill = "Species"
  ) + 
  scale_fill_manual(values = c("Adelie" = "tan", "Chinstrap" = "coral", "Gentoo" = "lightblue")) +  # Custom colors for species
  theme_minimal() + 
  theme(
    plot.title = element_text(hjust = 0.5, size = 13, face = "bold"),  # Title styling
    axis.title.x = element_text(size = 12),  # X-axis label styling
    axis.title.y = element_text(size = 12),  # Y-axis label styling
    legend.title = element_text(size = 10),  # Legend title styling
    legend.text = element_text(size = 8)     # Legend text styling
  )
print(plot1)

### Plot2:Body Mass vs. Flipper Length by Penguin Species 
plot2 <- ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species)) +
  geom_point(size = 3, alpha = 0.8) +  # Add points with size and transparency
  labs(
    title = "Body Mass vs. Flipper Length by Penguin Species",
    x = "Flipper Length (mm)",
    y = "Body Mass (g)",
    color = "Species"
  ) +
  scale_color_manual(values = c("Adelie" = "tan", "Chinstrap" = "coral", "Gentoo" = "lightblue")) + #customize colors
  theme_classic() + #erases gridlines
  theme(
    plot.title = element_text(hjust = 0.3, size = 14, face = "bold"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12),
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 8)
  )

### Plot3:Bill Length vs. Bill Depth by Species 
plot3 <- ggplot(data = penguins, aes(x = bill_length_mm, y = bill_depth_mm, fill = species)) +
  geom_hex(alpha = 0.8) +  # Add hexbin layer with slight transparency
  scale_fill_manual(values = c("Adelie" = "tan", "Chinstrap" = "coral", "Gentoo" = "lightblue")) +  # Custom species colors
  labs(
    title = "Bill Length vs. Bill Depth by Species",
    x = "Bill Length (mm)",
    y = "Bill Depth (mm)",
    fill = "Species"
  ) +
  theme_classic() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14, face = "bold"),
    axis.title.x = element_text(size = 12),
    axis.title.y = element_text(size = 12),
    legend.title = element_text(size = 10),
    legend.text = element_text(size = 8)
  )
print(plot3)
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_binhex()`).

### Plot4: Multifaceted Plot 

# Customize plot1
plot1 <- plot1 +
  labs(
    title = "Population Size per Island",
    x = "Island",
    y = "Population"
  ) +
  theme(
    plot.margin = margin(5, 5, 5, 5),
    aspect.ratio = 1,
    axis.title.y = element_text(size = 7),
    axis.title.x = element_text(size = 7),
    axis.text.x = element_text(size = 6, angle = 45, hjust = 1),  # Adjust x-axis labels
    plot.title = element_text(hjust = 0.5, size = 10, face = "italic"),
    legend.position = "none"  # Remove legend
  )

# Customize plot2 
plot2 <- ggplot(data = penguins, aes(x = flipper_length_mm, y = body_mass_g, color = species)) +
  geom_point(size = 3, alpha = 0.8) +  
  labs(
    title = "Body Mass vs. Flipper Length",
    x = "Flipper Length (mm)",
    y = "Body Mass (g)",
    color = "Penguin Species" 
  ) +
  scale_color_manual(values = c("Adelie" = "tan", "Chinstrap" = "coral", "Gentoo" = "lightblue")) +  
  theme_classic() +  
  theme(
    plot.title = element_text(hjust = 0.3, size = 10, face = "italic"),  # Style the title
    axis.title.x = element_text(size = 7),  # Style x-axis title
    axis.title.y = element_text(size = 7),  # Style y-axis title
    axis.text.x = element_text(size = 7, angle = 90, hjust = 1, vjust = 0.5),  # Rotate x-axis labels
    legend.position = "bottom",  # Place legend at the bottom for extraction
    legend.box = "vertical",
    legend.direction = "vertical", # Make the legend vertical
    legend.key.size = unit(0.5, "cm"),  # Adjust size of legend keys
    legend.text = element_text(size = 8),  # Style legend text
    legend.title = element_text(size = 9, face = "bold")  # Style legend title
  )

# Extract the legend from plot2
legend_space <- get_legend(plot2)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Remove legend from plot2 
plot2 <- plot2 + theme(legend.position = "none")

# Customize plot3 
plot3 <- plot3 +
  labs(
    title = "Bill Length vs Bill Depth"  # Add the title
  ) +
  theme(
    plot.margin = margin(5, 5, 5, 5),
    aspect.ratio = 1,
    plot.title = element_text(hjust = 0.5, size = 10, face = "italic"),  # Style the title
    axis.title.y = element_text(size = 5),
    axis.title.x = element_text(size = 7),
    legend.position = "none"  # Remove legend
  )

# Combine the plots and place the legend
combined_plot <- (plot1 | plot2) / (plot3 | wrap_elements(full = legend_space)) +
  plot_layout(
    heights = c(1, 1), #make sure heights and widths are equal
    widths = c(1, 1)
  ) +
  plot_annotation(
    title = "Data on the Penguin Species of the Palmer Archipelago",
    theme = theme(
      plot.title = element_text(hjust = 0.5, size = 14, face = "bold")
    )
  )

# Display the final plot
print(combined_plot)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 2 rows containing non-finite outside the scale range
## (`stat_binhex()`).