library(ggExtra)
library(tidyverse)
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)

#First Plot
population_data<-read.csv("log_population_data.csv")
head(population_data)
##   Log10_Current_Population Log10_Past_Population
## 1                 4.288032              5.674204
## 2                 3.817497              5.908109
## 3                 4.671286              6.095078
## 4                 3.538305              5.200114
## 5                 4.602143              6.388435
## 6                 4.839555              6.187712
ggplot(population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population)) +
  stat_density_2d(geom = "polygon", aes(fill = ..level..), color = "white") +
  scale_fill_viridis_c(option = "viridis")+
  ggtitle("2D Density Plot of Population Sizes")+
  xlab("Log10(Current population size N0")+
  ylab("Log10(Past population size N1")+
  theme_minimal()
## Warning: The dot-dot notation (`..level..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(level)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Second Plot
longevity_data<-read.csv("longevity_data.csv")
head(longevity_data)
##                     species    class           order maximum_lifespan_yr mass_g
## 1 Dicrostonyx_groenlandicus Mammalia        Rodentia                 3.3   66.0
## 2      Didelphis_virginiana Mammalia Didelphimorphia                 6.6 3000.0
## 3         Diphylla_ecaudata Mammalia      Chiroptera                 8.0   28.0
## 4     Dipodillus_campestris Mammalia        Rodentia                 7.3   28.4
## 5        Dipodomys_merriami Mammalia        Rodentia                 9.7   42.0
## 6   Dendrolagus_goodfellowi Mammalia   Diprotodontia                23.6 7400.0
##     volancy fossoriallity foraging_environment daily_activity
## 1 nonvolant semifossorial          terrestrial     cathemeral
## 2 nonvolant  nonfossorial         semiarboreal      nocturnal
## 3    volant  nonfossorial          terrestrial      nocturnal
## 4 nonvolant semifossorial          terrestrial      nocturnal
## 5 nonvolant semifossorial          terrestrial      nocturnal
## 6 nonvolant  nonfossorial         semiarboreal     cathemeral
long <- longevity_data %>% #create a new dataframe called "long" that contains all your newly calculated variables
  mutate( #mutate tells the program to perform new calculations
    log_mass = log10(mass_g),                          # create a new column called "log_mass" which Log-transforms mass values
    log_lifespan = log10(maximum_lifespan_yr))  %>%          # create a new colummn called "log_lifespan" that Log-transforms lifespan value
   group_by(order) %>%        # this tells it that after "mutate", you are going to start a new function. for each "order" or group of animals    
  mutate(order_size = n())
head(long)
## # A tibble: 6 × 12
## # Groups:   order [4]
##   species           class order maximum_lifespan_yr mass_g volancy fossoriallity
##   <chr>             <chr> <chr>               <dbl>  <dbl> <chr>   <chr>        
## 1 Dicrostonyx_groe… Mamm… Rode…                 3.3   66   nonvol… semifossorial
## 2 Didelphis_virgin… Mamm… Dide…                 6.6 3000   nonvol… nonfossorial 
## 3 Diphylla_ecaudata Mamm… Chir…                 8     28   volant  nonfossorial 
## 4 Dipodillus_campe… Mamm… Rode…                 7.3   28.4 nonvol… semifossorial
## 5 Dipodomys_merria… Mamm… Rode…                 9.7   42   nonvol… semifossorial
## 6 Dendrolagus_good… Mamm… Dipr…                23.6 7400   nonvol… nonfossorial 
## # ℹ 5 more variables: foraging_environment <chr>, daily_activity <chr>,
## #   log_mass <dbl>, log_lifespan <dbl>, order_size <int>
p=ggplot(long, aes(x=log_mass,y=log_lifespan, color=class))+
  geom_point(aes(size=order_size),alpha=0.3)+
  geom_smooth(method = "lm", aes(group = class), se = FALSE, linetype = "solid")+
  scale_color_manual(values = c("lightgreen", "darkslategray"))+
  ggtitle("Bubble Chart of Longevity and Body Mass")+
  xlab("Log(Body Mass[g])")+
  ylab("Log(Maximum Lifespan [yr])")+
  theme_minimal()+
   theme(
    legend.position = "none",plot.title = element_text(size = 14, face = "bold"),axis.title = element_text(size = 12, face = "bold"))+
  annotate("text", x=5.5, y=1.9, label="Aves",color="lightgreen", size=5, fontface= "bold")+
  annotate("text", x=6, y=1.2, label="Mammals",color="darkslategray", size=5,fontface= "bold")
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

#Create your own
data("diamonds")
library(ggplot2)
head(diamonds)
## # A tibble: 6 × 10
##   carat cut       color clarity depth table price     x     y     z
##   <dbl> <ord>     <ord> <ord>   <dbl> <dbl> <int> <dbl> <dbl> <dbl>
## 1  0.23 Ideal     E     SI2      61.5    55   326  3.95  3.98  2.43
## 2  0.21 Premium   E     SI1      59.8    61   326  3.89  3.84  2.31
## 3  0.23 Good      E     VS1      56.9    65   327  4.05  4.07  2.31
## 4  0.29 Premium   I     VS2      62.4    58   334  4.2   4.23  2.63
## 5  0.31 Good      J     SI2      63.3    58   335  4.34  4.35  2.75
## 6  0.24 Very Good J     VVS2     62.8    57   336  3.94  3.96  2.48
summary(diamonds)
##      carat               cut        color        clarity          depth      
##  Min.   :0.2000   Fair     : 1610   D: 6775   SI1    :13065   Min.   :43.00  
##  1st Qu.:0.4000   Good     : 4906   E: 9797   VS2    :12258   1st Qu.:61.00  
##  Median :0.7000   Very Good:12082   F: 9542   SI2    : 9194   Median :61.80  
##  Mean   :0.7979   Premium  :13791   G:11292   VS1    : 8171   Mean   :61.75  
##  3rd Qu.:1.0400   Ideal    :21551   H: 8304   VVS2   : 5066   3rd Qu.:62.50  
##  Max.   :5.0100                     I: 5422   VVS1   : 3655   Max.   :79.00  
##                                     J: 2808   (Other): 2531                  
##      table           price             x                y         
##  Min.   :43.00   Min.   :  326   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.:56.00   1st Qu.:  950   1st Qu.: 4.710   1st Qu.: 4.720  
##  Median :57.00   Median : 2401   Median : 5.700   Median : 5.710  
##  Mean   :57.46   Mean   : 3933   Mean   : 5.731   Mean   : 5.735  
##  3rd Qu.:59.00   3rd Qu.: 5324   3rd Qu.: 6.540   3rd Qu.: 6.540  
##  Max.   :95.00   Max.   :18823   Max.   :10.740   Max.   :58.900  
##                                                                   
##        z         
##  Min.   : 0.000  
##  1st Qu.: 2.910  
##  Median : 3.530  
##  Mean   : 3.539  
##  3rd Qu.: 4.040  
##  Max.   :31.800  
## 
ggplot(diamonds, aes(x = price)) +
  geom_histogram(aes(y = ..density..), bins = 30, fill = "skyblue", alpha = 0.7) +
  geom_density(color = "darkblue", size = 1) +
  ggtitle("Diamond Prices") +
  xlab("Price") +
  ylab("Density") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#Interpretation Questions

#1 density plots in the margins show the distribution of individual variables, this makes it easier to see patterns, clusters, or outliers without overcrowding the main graphic

#2 The x-axis represents the logarithmic scale of body mass, in grams, for different species. The y-axis represents the logarithmic scale of maximum lifespan, in years, for different species. Each bubble's color tells the viewer whether this point is about mammals or aves. The top marginal density plot shows the distribution of the logarithmic body mass for both categories. The right marginal density plot shows the distribution of the logarithmic maximum lifespan and the point size represents the number of species in each order

#3 The relationship is positive, indicating that larger animals live longer. Aves have a steeper slope than mammals, meaning the relationship between  longevity and body mass is stronger in birds

#4 The data is more biased towards shorter lived smaller animals, this is apparent because the points are more clustered in the lower left corner. This is most likely because smaller shorter lived animals, like mice, tend to have more offspring compared to larger longer lived animals.

#5 The plot is missing a legend. A legend would make it easier to decipher what the graphics on the plot represent.