mod 5: 2D density graphics

# load packages 
library(readr)
library(ggplot2)
library(dplyr)
# read in csv and rename it via import dataset function
library(readr)
population_data <- read_csv("log_population_data.csv") 
View(population_data) 
head(population_data)
## # A tibble: 6 × 2
##   Log10_Current_Population Log10_Past_Population
##                      <dbl>                 <dbl>
## 1                     4.29                  5.67
## 2                     3.82                  5.91
## 3                     4.67                  6.10
## 4                     3.54                  5.20
## 5                     4.60                  6.39
## 6                     4.84                  6.19
# create the density plot
ggplot(population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population)) +
  stat_density_2d(aes(fill = after_stat(level)), geom = "polygon", color = "white") +
  scale_fill_gradient(low = "darkblue", high = "skyblue", name = "level") + 
  # I couldn't figure out how to use the scale_fill_distiller function so I used a differnt one
  
  theme_minimal() + 
  labs(title = "2D Density Plot of Population Sizes", x = "Log10(Current population size N0", y = "Log10(Past population ize N1)") 

## Adding density to margins

library(readr)
library(ggplot2)
library(dplyr)
library(ggExtra)
library(readr)
longevity_data <- read_csv("longevity_data.csv")
View(longevity_data)
head(longevity_data)
## # A tibble: 6 × 9
##   species           class order maximum_lifespan_yr mass_g volancy fossoriallity
##   <chr>             <chr> <chr>               <dbl>  <dbl> <chr>   <chr>        
## 1 Dicrostonyx_groe… Mamm… Rode…                 3.3   66   nonvol… semifossorial
## 2 Didelphis_virgin… Mamm… Dide…                 6.6 3000   nonvol… nonfossorial 
## 3 Diphylla_ecaudata Mamm… Chir…                 8     28   volant  nonfossorial 
## 4 Dipodillus_campe… Mamm… Rode…                 7.3   28.4 nonvol… semifossorial
## 5 Dipodomys_merria… Mamm… Rode…                 9.7   42   nonvol… semifossorial
## 6 Dendrolagus_good… Mamm… Dipr…                23.6 7400   nonvol… nonfossorial 
## # ℹ 2 more variables: foraging_environment <chr>, daily_activity <chr>
# put the data into log format
long <- longevity_data %>% #create a new dataframe called "long" that contains all your newly calculated variables
  mutate( #mutate tells the program to perform new calculations
    log_mass = log10(mass_g),                          # create a new column called "log_mass" which Log-transforms mass values
    log_lifespan = log10(maximum_lifespan_yr))  %>%          # create a new colummn called "log_lifespan" that Log-transforms lifespan value
   group_by(order) %>%        # this tells it that after "mutate", you are going to start a new function. for each "order" or group of animals    
  mutate(order_size = n())      #calculate the sample size of each order and put it in a column called "order_size". 

#Now you have a sample size for each order, and you have transformed each mass and lifespan value to log form. 

head(long)
## # A tibble: 6 × 12
## # Groups:   order [4]
##   species           class order maximum_lifespan_yr mass_g volancy fossoriallity
##   <chr>             <chr> <chr>               <dbl>  <dbl> <chr>   <chr>        
## 1 Dicrostonyx_groe… Mamm… Rode…                 3.3   66   nonvol… semifossorial
## 2 Didelphis_virgin… Mamm… Dide…                 6.6 3000   nonvol… nonfossorial 
## 3 Diphylla_ecaudata Mamm… Chir…                 8     28   volant  nonfossorial 
## 4 Dipodillus_campe… Mamm… Rode…                 7.3   28.4 nonvol… semifossorial
## 5 Dipodomys_merria… Mamm… Rode…                 9.7   42   nonvol… semifossorial
## 6 Dendrolagus_good… Mamm… Dipr…                23.6 7400   nonvol… nonfossorial 
## # ℹ 5 more variables: foraging_environment <chr>, daily_activity <chr>,
## #   log_mass <dbl>, log_lifespan <dbl>, order_size <int>
# create a dotplot 
p = ggplot(long, aes(x =log_mass, y = log_lifespan, color = class, size = order_size)) + 
  #makign the points transparent by 30%
  geom_point(alpha = 0.3) +
  #regression line
  geom_smooth(method = "lm", se = FALSE, linetype = "solid", aes(color = class)) +
  # change the color scheme
  scale_color_manual(values = c("lightgreen", "darkslategray")) + 
  # change labels and theme
  labs(title = "Bubble Chart of Longevity and Body Mass", x = "Log (Body Mass [g])", y = "Log (Maximum Lifespan [yr])") + 
  theme_minimal() + 
  # remove legends 
  theme(legend.position = "none") +
  # changing axis and title labels theme 
  theme(plot.title = element_text(size = 12, face = "bold"), 
        axis.title.x = element_text(size = 12, face = "bold"), 
        axis.title.y = element_text(size = 12, face = "bold")) + 
  # add text annotations inside the graph
  annotate("text", label = "Aves", x = 5.6, y = 1.9, color = "lightgreen", size = 5, face = "bold") + 
  annotate("text", label = "Mammals", x = 6.5, y = 1.4, color = "darkslategray", size = 5, face = "bold")

ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4)

interpretation questions:

  1. adding the density plots in the margins adds another visualization tool that shows how the different classes are distributed in the x and y axis.

  2. 1: log mass plotted on the x axis. 2: log lifespan plotted on the y axis. 3: making the classes differnet colors. 4: order size depicts the sample size of the populations. 5: transparency adds a visual aid allowing you to see more dense populations. 6: marginal density plots add another visual to detemrine where populations are the most dense. 6 1/2: regression lines show the trend of the two classes.

  3. the more body mass = longer lifespan. More extreme in Aves as seen by the trendlines.

  4. for Aves, there are more samples in the lower left of the graph (low body mass and short lifespan), compared to other areas of the graph. This could be a possible bias since most of the data collected was of that low body mass and short lifespan category. The same can be sais for the Mammals where the data is more accumulated towards a high lifespan and body mass.

  5. We could use the dodge function to create 2 side-by-side graphs that might make the graph easier to read. Or, we could change “se = TRUE” to show error along the trend lines.

Create Your Own Graphic

# load packages 
library(ggplot2)
library(dplyr)
library(ggExtra)
# using CO2 dataset 
data("CO2")
head(CO2)
## Grouped Data: uptake ~ conc | Plant
##   Plant   Type  Treatment conc uptake
## 1   Qn1 Quebec nonchilled   95   16.0
## 2   Qn1 Quebec nonchilled  175   30.4
## 3   Qn1 Quebec nonchilled  250   34.8
## 4   Qn1 Quebec nonchilled  350   37.2
## 5   Qn1 Quebec nonchilled  500   35.3
## 6   Qn1 Quebec nonchilled  675   39.2
p1 = ggplot(CO2, aes(x = conc, y = uptake, color = Plant, fill = Treatment, linetype = Treatment)) +
  # Line plot for trends
  geom_point(aes(group = interaction(Plant, Treatment)), size = 1, alpha = 0.8) + geom_line() +
  # set theme 
  theme_minimal() + theme(legend.position = "bottom") +
  # set labels 
  labs(title = "CO2 Uptake Across Plant Origin, Treatment, and Concentration", x = "Concentration of CO2(mL/L)", y = "Uptake of CO2 (μmol/m^2 sec)") + 
  theme(plot.title = element_text(face = "bold"), 
        plot.title.x = element_text(face = "bold"), 
        plot.title.y = element_text(face = "bold"))  
  ggMarginal(p1, type = "density", groupFill = TRUE, alpha = 0.4)