packages <- c("ggplot2", "readr", "tidyverse", "dplyr", "ggpubr", "see", "rmarkdown", "knitr", "tinytex", "ggExtra")

check_install_packages <- function(pkg){
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}

sapply(packages, check_install_packages)
## $ggplot2
## NULL
## 
## $readr
## NULL
## 
## $tidyverse
## NULL
## 
## $dplyr
## NULL
## 
## $ggpubr
## NULL
## 
## $see
## NULL
## 
## $rmarkdown
## NULL
## 
## $knitr
## NULL
## 
## $tinytex
## NULL
## 
## $ggExtra
## NULL

Graphic Assignment 4

#head(ChickWeight)

ggplot(ChickWeight, aes(x = Time, y = weight))+
  geom_line(aes(color = Chick), alpha = .4)+ #had to set the alpha to .4 because I couldn't see the lines
  geom_smooth(color = "black", size = 1.2, se = TRUE)+
  facet_wrap(~Diet, ncol = 4)+
  theme_minimal()+
  theme(legend.position = "none",
        plot.title = element_text(face = "bold"),
        axis.title = element_text(face = "bold"),
        strip.text = element_text(size = 12, face = "bold"),
        axis.text.x = element_text(angle = 45, hjust = 1.5, vjust = 1.3),
        panel.spacing.x = unit(.5, "lines"))+
  labs(title = "Chick Growth by Diet Type",
       x = "Time (Days)", y = "Weight (Grams)")

#head(CO2, n = 84)

theme_set(
  theme_minimal()
)

P1 <- ggplot(CO2, aes(x = Treatment, y = uptake))+
  geom_violin(aes(fill = Type), trim = TRUE)+
  geom_dotplot(aes(color = Type), binaxis = 'y', stackdir = 'center', position = position_dodge())+
  scale_color_manual(values = c("#000", "#000"))+
  scale_fill_manual(values = c("#8000bb", "darkorange3"))+
  theme(axis.title = element_text(face = "bold", size = 9))+
  labs(y = "CO2 Uptake")
  
P2 <- ggplot(CO2, aes(x = conc, y = uptake, color = Treatment))+
  geom_line()+
  geom_point(aes(shape = Treatment))+
  scale_color_manual(values = c("#298c8c", "#800074"))+
  theme(axis.title = element_text(face = "bold", size = 9))+
  labs(
    x = "CO2 Concentration",
    y = "CO2 Uptake"
  )+
  facet_wrap(~Type)

P3 <- ggplot(CO2, aes(x = conc, y = uptake))+
  geom_col(aes(colour = Treatment, fill = Treatment), position = position_dodge())+
  scale_color_manual(values = c("#298c8c", "#800074"))+
  scale_fill_manual(values = c("#298c8c", "#800074"))+
  theme(axis.title = element_text(face = "bold", size = 9))+
  labs(
    x = "CO2 Concentration",
    y = "CO2 Uptake"
  )+
  facet_wrap(~Type)
  
plot <- ggarrange(P1, 
                  ggarrange(P3, P2, ncol = 2, labels = c("B", "C"), common.legend = TRUE, legend = "bottom"), 
                  nrow = 2, 
          labels = "A")

annotate_figure(plot, top = text_grob("C02 Uptake by Treament Group", size = 15, face = "bold"))


Graphic Assignment 3

population_data <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/log_population_data.csv")

#head(population_data)

ggplot(population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population))+
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour = "white")+
  scale_fill_distiller(palette = 9, direction = 1)+
  theme_minimal()+
  labs(title = "2D Density Plot of Population Sizes",
       x = "Log10(Current population size N0)",
       y = "Log10(past population size N1)",
       fill = "Density")

longevity_data <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/longevity_data.csv")

#head(longevity_data)

long <- longevity_data %>% 
  mutate( 
    log_mass = log10(mass_g),                          
    log_lifespan = log10(maximum_lifespan_yr))  %>%   
   group_by(order) %>%       
  mutate(order_size = n())

#head(long)

p = ggplot(long, aes(x = log_mass, y = log_lifespan))+
  geom_point(aes(color = class, size = order_size), alpha = 0.3)+
  geom_smooth(aes(color = class),method = lm, se = FALSE, linetype = "solid")+
  scale_color_manual(values = c("lightgreen", "darkslategray"))+
  labs(
    title = "Bubble Chart of Longevity and Body Mass",
    x = "Log (Body Mass [g])",
    y = "Log (Maximum Lifespan [yr])")+
  theme_minimal()+
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14, face = "bold"),
    axis.title = element_text(size = 12, face = "bold")
  )+
  annotate("text", x = 5, y = 1.8, hjust = -0.4, vjust = -0.9,
           label = "Aves", color = "lightgreen", size = 5, fontface = "bold")+
  annotate("text", x = 6, y = 1.2, label = "Mammals", color = "darkslategray", size = 5, fontface = "bold")
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4)

Interpretation Questions:

What is the benefit to adding density plots in the margin of your graphics? to show the distribution and the find potential bias.

Explain how you were able to depict 6 different measures in a single graphic. Be sure to clearly list the element and how it was depicted. scatter (points)-using geom_point, the points appear as translucent circles to show the data points density- used ggExtra::ggMarginal, to plot density plots to show the distribution of the data for the mass and lifespan bubble size- set size to order_size, to show how many samples were taken from each order. regression- used geom_smooth to show the relationship between lifespan and mass for each group. color- set the color to class so the two groups could be differentiated. Log values- used log10 function to find the log values for the x and y axis so the data could be shown in a more linear way.

What is the relationship between longevity and body mass? Is it more extreme in mammals or aves? There is a positive correlation between longevity and body mass and is more extreme in aves as the slope is steeper.

Is the data more biased toward smaller/larger or long/short lived animals? How do you know and why do you think that is? smaller/larger animals because the density plot on the top the curves are skewed to the left or right which would indicate potential bias while the density plot on the right are closer to a normal distribution and have coverage while the top one the aves grouup is missing coverage on the right side while condensed on the left side.

Is there an element missing from this graphic that you feel should be there? Hint: There is one that could be helpful if added that is not depicted currently in any other way on the graphic. A legend describing what the size of the point represents (sample size).

Height <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/height_data.csv")

Height$height_m <- Height$height_cm / 100

Height$BMI <- Height$weight_kg / (Height$height_m^2)

#head(Height)

p = ggplot(Height, aes(x = weight_kg, y = BMI))+
  geom_point(aes(color = sex, size = height_cm), alpha = 0.5)+
  geom_smooth(aes(color = sex), method = lm, se = FALSE)+
  scale_color_manual(values = c("#ff73b6", "#008dff"))+
  theme_minimal()+
  theme(legend.position = "right")+
  labs(
    title = "Bubble Chart of Weight and BMI",
    x = "Weight(kg)", y = "BMI", 
    color = "Sex", size = "Height(cm)")
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.3)

ggplot(Height, aes(x = BMI))+
  geom_histogram(aes(y = ..density.., color = sex, fill = sex), alpha = 1, position = "dodge")+
  geom_density(aes(color = sex, fill = sex), alpha = 0.3)+
  scale_color_manual(values = c("#ff73b6", "#008dff"))+
  scale_fill_manual(values = c("#ff73b6", "#008dff"))+
  theme_minimal()+
  theme(legend.title.position = "right")+
  guides(color = "none")+
  labs(
    title = "Distribution of BMI", subtitle = "Categorized by Sex",
    x = "BMI", y = "Density", 
    color = element_blank(), fill = "Sex" )

I can’t figure out how to get the legend to be on the outside of the marginal plots


Graphic Assignment 2

## Replica Violin and Half-Violin Plot

CAM <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/Violin_Plot_Data.csv")

data_long <- CAM %>%
  pivot_longer(
    cols = starts_with("Repeat"),
    names_to = "Repeat", 
    values_to = "values")

#head(data_long, 40)

ggplot(data_long, aes(x = F1Performance, y = values))+
  geom_jitter(aes(color = F1Performance), alpha = 0.8, size = 5, 
              position = position_jitter(width = 0.1))+
  scale_color_manual(values = c("darkorchid4", "darkorange1"))+
  geom_violin(aes(fill = F1Performance), alpha = 0.5, size = 2, 
              draw_quantiles = c("0.25", "0.50", "0.75"), quantile.size = 2)+
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black",
               stroke = 1.5)+
  scale_fill_manual(values = c("darkorchid4", "darkorange1"))+
  coord_flip()+
  theme_minimal()+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), 
        legend.position = "none",
        axis.line.x.bottom = element_line(color = "black", size = 1.5), 
        plot.title = element_text(hjust = 0.5, face="bold"), 
        panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), 
        panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -4.5, 
            color = "darkorange1", size = 4.5)+
   geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -4.5, 
            color = "darkorchid4", size = 4.5)+
  scale_y_continuous(limits = c(0.56, 0.74), 
                     breaks = seq(0.56, 0.74, by = 0.02), 
                     labels = seq(0.56, 0.74, by = 0.02))+
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
       )

ggplot(data_long, aes(x = F1Performance, y = values))+
  geom_jitter(aes(color = F1Performance), alpha = 0.8, size = 5, 
              position = position_jitter(width = 0.1))+
  scale_color_manual(values = c("darkorchid4", "darkorange1"))+
  geom_violinhalf(aes(fill = F1Performance), alpha = 0.5, size = 2, 
              draw_quantiles = c("0.25", "0.50", "0.75"), quantile.size = 2)+
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black",
               stroke = 1.5)+
  scale_fill_manual(values = c("darkorchid4", "darkorange1"))+
  coord_flip()+
  theme_minimal()+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), 
        legend.position = "none",
        axis.line.x.bottom = element_line(color = "black", size = 1.5), 
        plot.title = element_text(hjust = 0.5, face="bold"), 
        panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), 
        panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -4.5, 
            color = "darkorange1", size = 4.5)+
   geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -4.5, 
            color = "darkorchid4", size = 4.5)+
  scale_y_continuous(limits = c(0.56, 0.74), 
                     breaks = seq(0.56, 0.74, by = 0.02), 
                     labels = seq(0.56, 0.74, by = 0.02))+
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
       )

Before Flipping it seems like quantile.size isn’t doing anything but size is making the lines thicker. Then when doing the jitter points I had to run the code multiple time to get it so my points looked like yours. Is there a way to make it look like yours from the beginning or is it random.

## Revised Replica Violin Plot with Box Plot

ggplot(data_long, aes(x = F1Performance, y = values))+
  geom_violin(aes(fill = F1Performance), alpha = 0.5, size = 1)+
  scale_fill_manual(values = c("#298c8c", "#800074"))+
  geom_boxplot(width = 0.3, color = "black", fill = "NA", size = 1)+
  coord_flip()+
  theme_minimal()+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), 
        legend.position = "none",
        axis.line.x.bottom = element_line(color = "black", size = 1.5), 
        plot.title = element_text(hjust = 0.5, face="bold"), 
        panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), 
        panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -5.8, color = "#800074", size = 4.5)+
   geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -5.2, color = "#298c8c", size = 4.5)+
  scale_y_continuous(limits = c(0.56, 0.74), 
                     breaks = seq(0.56, 0.74, by = 0.02), 
                     labels = seq(0.56, 0.74, by = 0.02))+
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
       )


Graphic Assignment 1

data("USArrests")
#head(USArrests, n = 50)

Discussion:

What are the variables available? The type of felony they were arrested for and the percent urban population for each state

How is each variable defined or calculated? Murder, Assault, and Rape are all arrests per 100,000 and Urban population is a percentage

Is each one numerical or categorical? they are all numerical

## GGplot Graphic Code

ggplot(mtcars, aes(x = mpg, y = hp)) +
  geom_point(aes(color = cyl), size = 2.4, shape = 8) +
  scale_color_manual(aesthetics = c("8" = "#298c9c")) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  labs(
    title = "Effect of Horsepower on Fuel Efficiency", 
    subtitle = "Categorized by Number of Cylinders", 
    x = "Horsepower", y = "Fuel Efficiency (MPG)"
  )

## GGplot Graphic Code

ggplot(USArrests, aes(x = UrbanPop, y = Murder))+ geom_point(color = “purple”, size = 2.5, shape = 19)+ geom_smooth(method = lm, se = TRUE, color = “gold”, level = 0.95)+ theme_gray()+ labs( title = “The Effect of Urban Populaton on Murder Arrest Rates”, x = “Percent Urban Population (By State)”, y = “Murder Arrests (per 100,000)” )

ggplot(USArrests, aes(x = UrbanPop, y = Assault))+ geom_point(color = “purple”, size = 2.5, shape = 19)+ geom_smooth(method = lm, se = TRUE, color = “gold”, level = 0.95)+ theme_gray()+ labs( title = “The Effect of Urban Populaton on Assualt Arrest Rates”, x = “Percent Urban Population (By State)”, y = “Assualt Arrests (per 100,000)” )

ggplot(USArrests, aes(x = UrbanPop, y = Rape))+ geom_point(color = “purple”, size = 2.5, shape = 19)+ geom_smooth(method = lm, se = TRUE, color = “gold”, level = 0.95)+ theme_gray()+ labs( title = “The Effect of Urban Populaton on Rape Arrest Rates”, x = “Percent Urban Population (By State)”, y = “Rape Arrests (per 100,000)” )

ggplot(USArrests, aes(x = Murder, y = Assault))+ geom_point(aes(color = UrbanPop), size = 2.5, shape = 19)+ geom_smooth(method = lm, se = TRUE, color = “blue”)+ theme_gray()+ labs( title = “The Effect of Murder Arrest Rates on Assualt Arrest Rates”, subtitle = “Categorized by Urban Population Percentage”, x = “Murder Arrests (per 100,000)”, y = “Assualt Arrests (per 100,000)” ) ```