KLUMP Data Visualization Code

Packages Installer Code

packages <- c("ggplot2", "readr", "tidyverse", "dplyr", "ggpubr", "see", "rmarkdown", "knitr", "tinytex", "ggExtra") #puts all of the packages into one word to be used later rather than repeating long text

#Checks to see if any of your listed packages need installed
check_install_packages <- function(pkg){
  if (!require(pkg, character.only = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}

#Downloads and reads packages in library if needed
sapply(packages, check_install_packages)

## $ggplot2
## NULL
## 
## $readr
## NULL
## 
## $tidyverse
## NULL
## 
## $dplyr
## NULL
## 
## $ggpubr
## NULL
## 
## $see
## NULL
## 
## $rmarkdown
## NULL
## 
## $knitr
## NULL
## 
## $tinytex
## NULL
## 
## $ggExtra
## NULL

Final Assignment

Canadian Asylum Claims Data Sets

Asylum_Age <- data.frame(Year=rep(c("2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"), each = 6), #Makes a column called Year that will repeat 2015-2024 6 times
                               Age_Range=c("0-14", "15-29", "30-44", "45-59", "60-74", "75+"),       #Makes another column called Age_Range that will have all 6 categories once for each year
                               Claimants=c(3365, 4925, 5300, 1735, 580, 130, 
                                       5425, 7120, 7800, 2515, 815, 165,
                                       13185, 11400, 18215, 5415, 1290, 200,
                                       14195, 13030, 19685, 6375, 1460, 265,
                                       13640, 17445, 22945, 7920, 1745, 315,
                                       3805, 7560, 7965, 3050, 1030, 240,
                                       4105, 8250, 7945, 3090, 1180, 255,
                                       18095, 29180, 32125, 9620, 2130, 460,
                                       21115, 49235, 53210, 16020, 3160, 590,
                                       21270, 55060, 59190, 19360, 3450, 705))                       # Makes a column called Claimants with these values starting with 3365
Asylum_Age$Year <- as.numeric(as.character(Asylum_Age$Year))                    # makes Year column treated as a continous numeric variable

normalized_data_Age <- Asylum_Age %>%                                           #names the results and passes asylum_Age to the next function
  group_by(Year) %>%                                                            #groups the data by year so all calculations for 2015 happen only for rows with 2015
  mutate(Proportion_Age = Claimants / sum(Claimants),                           #adds a column where the proportion of Claimants for each age group is calculated by year
        Percent_Label_Age = paste0(round(Proportion_Age * 100, 1), "%")) %>%    #stores the formatted percentages and puts the % sign after
  ungroup()                                                                     #ungroups

Asylum_Gender <- data.frame(Year=rep(c("2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"), each = 2),
                            Gender=c("Female", "Male"),
                            Claimants=c(7205, 8830, 10445, 13395, 
                                        22540, 27805, 24855, 30165, 
                                        28365, 35640, 10355, 13310, 
                                        10710, 14145, 37815, 53755,
                                        56295, 86995, 58095, 100890))
Asylum_Gender$Year <- as.numeric(as.character(Asylum_Gender$Year))

normalized_data_Gender <- Asylum_Gender %>%
  group_by(Year) %>%
  mutate(Proportion_Gender = Claimants / sum(Claimants), 
         Percent_Label_Gender = paste0(round(Proportion_Gender * 100, 1), "%")) %>%
  ungroup()
#does the same thing as the Asylum_Age dataframe

head(Asylum_Gender,100) #displays first 100 rows of data frame

##    Year Gender Claimants
## 1  2015 Female      7205
## 2  2015   Male      8830
## 3  2016 Female     10445
## 4  2016   Male     13395
## 5  2017 Female     22540
## 6  2017   Male     27805
## 7  2018 Female     24855
## 8  2018   Male     30165
## 9  2019 Female     28365
## 10 2019   Male     35640
## 11 2020 Female     10355
## 12 2020   Male     13310
## 13 2021 Female     10710
## 14 2021   Male     14145
## 15 2022 Female     37815
## 16 2022   Male     53755
## 17 2023 Female     56295
## 18 2023   Male     86995
## 19 2024 Female     58095
## 20 2024   Male    100890

head(Asylum_Age, 100) #displays first 100 rows of data frame

##    Year Age_Range Claimants
## 1  2015      0-14      3365
## 2  2015     15-29      4925
## 3  2015     30-44      5300
## 4  2015     45-59      1735
## 5  2015     60-74       580
## 6  2015       75+       130
## 7  2016      0-14      5425
## 8  2016     15-29      7120
## 9  2016     30-44      7800
## 10 2016     45-59      2515
## 11 2016     60-74       815
## 12 2016       75+       165
## 13 2017      0-14     13185
## 14 2017     15-29     11400
## 15 2017     30-44     18215
## 16 2017     45-59      5415
## 17 2017     60-74      1290
## 18 2017       75+       200
## 19 2018      0-14     14195
## 20 2018     15-29     13030
## 21 2018     30-44     19685
## 22 2018     45-59      6375
## 23 2018     60-74      1460
## 24 2018       75+       265
## 25 2019      0-14     13640
## 26 2019     15-29     17445
## 27 2019     30-44     22945
## 28 2019     45-59      7920
## 29 2019     60-74      1745
## 30 2019       75+       315
## 31 2020      0-14      3805
## 32 2020     15-29      7560
## 33 2020     30-44      7965
## 34 2020     45-59      3050
## 35 2020     60-74      1030
## 36 2020       75+       240
## 37 2021      0-14      4105
## 38 2021     15-29      8250
## 39 2021     30-44      7945
## 40 2021     45-59      3090
## 41 2021     60-74      1180
## 42 2021       75+       255
## 43 2022      0-14     18095
## 44 2022     15-29     29180
## 45 2022     30-44     32125
## 46 2022     45-59      9620
## 47 2022     60-74      2130
## 48 2022       75+       460
## 49 2023      0-14     21115
## 50 2023     15-29     49235
## 51 2023     30-44     53210
## 52 2023     45-59     16020
## 53 2023     60-74      3160
## 54 2023       75+       590
## 55 2024      0-14     21270
## 56 2024     15-29     55060
## 57 2024     30-44     59190
## 58 2024     45-59     19360
## 59 2024     60-74      3450
## 60 2024       75+       705

Canadian Data Origin and Description

https://open.canada.ca/data/en/dataset/b6cbcf4d-f763-4924-a2fb-8cc4a06e3de4?_gl=1*1gf1pvt*_ga*MTE5OTYyNDgzOS4xNzM2MDkyMjI1*_ga_S9JG8CZVYZ*MTczNjA5MjIyNC4xLjAuMTczNjA5MjIyNC42MC4wLjA.
The data for this section came from the Government of Canada. The data was in a very complex horizontal format and had to manually make a vertical data frame with the same data. My graphs show two key things. the first is a line graph showing how the number of claimants for asylum in Canada for each age group and gender changes since 2015. The second is a pie chart wrapped by each year show how the percentage of each age group or gender changes every year.

Canadian Asylum Claim Plots by Age Group

ggplot(Asylum_Age, aes(x = Year, y = Claimants))+   #names the data frame being used followed by the X and Y variables of the graphic.
  geom_line(aes(color = Age_Range), size = 1)+      #indicates using a line graph where the color of the lines is categorized by the age_range categories, and all lines have a size of 1
  geom_point(aes(color = Age_Range), size = 3)+     #adds points that are also colored by the age_range categories, and all pont sizes are 3
  scale_x_continuous(limits = c(2015, 2024),        #Scales for the entire x axis values, puts min at 2015 and max at 2024
                     breaks = seq(2015, 2024, by = 1), #adds a break starting at 2015 and ending at 2024, with a break every one year
                     labels = seq(2015, 2024, by = 1))+  #same as breaks adds a label for the x axis starting at 2015 and ending at 2024, with a label for every year
  scale_color_manual(values = c("#b5d1ae", "#80ae9a", "#568b87", "#326b77", "#1b485e", "#122740"))+   #manually adds color values for the geom line and point
  theme_minimal()+ #changes the theme to minimal
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.3))+ #rotates the x axis text, and moves it to fit and be more aesthetically pleasing
  labs(
    title = "Canadian Asylum Claimants Since 2015",
    subtitle = "Categorized by Age Groups",
    x = "Year",
    y = "Number of Claimants",
    color = "Age Groups") #adds a title, subtitle, x and y axis title, and names the legends

ggplot(normalized_data_Age, aes(x = "", y = Proportion_Age, fill = Age_Range))+  #selects the data frame and the x value is null the y is the proportion calculated from the asylum age and fills 
  geom_bar(stat = "identity", width = 1)+      #selects the function             #based on the group
  coord_polar(theta = "y")+    #makes it into a pie chart
  geom_text(aes(x = 1.13, label = Percent_Label_Age),                  #labels based off the respective part calculated above, x dictates where in the slice the label will be
            position = position_stack(vjust = 0.5),        #stack makes it so the labels appear in their respective slice and vjust makes a minor position adjustment
            size = 3.2, color = "white")+                          #makes the label white and size 3.2
  facet_wrap(~Year)+                                       #makes multiple pie charts where each one is a different year
  labs(title = "Asylum Claimants by Age Groups",   #titles the plot
       x = NULL, y = NULL)+                       #No x or y axis titles
  theme_void()+                                 #makes the theme void which makes it so there is no backgroud activity and more(clears lines in graph)
  scale_fill_manual(values = c("#b5d1ae", "#80ae9a", "#568b87", "#326b77", "#1b485e", "#122740"))+  #manually adds color values for the individual age groups
  theme(strip.text = element_text(size = 10, face = "bold"))      #changes the title for each seperate pie chart (the year)

Canadian Asylum Claim Plots by Gender

ggplot(Asylum_Gender, aes(x = Year, y = Claimants))+   #names the data frame being used followed by the X and Y variables of the graphic.
  geom_line(aes(color = Gender), size = 1)+            #indicates using a line graph where the color of the lines is categorized by the Gender categories, and all lines have a size of 1
  geom_point(aes(color = Gender, shape = Gender), size = 3)+  #adds points that are also colored by the Gender and the shape of the point differs by gender, and all pont sizes are 3
  scale_x_continuous(limits = c(2015, 2024),                  #Scales for the entire x axis values, puts min at 2015 and max at 2024
                     breaks = seq(2015, 2024, by = 1),        #adds a break starting at 2015 and ending at 2024, with a break every one year
                     labels = seq(2015, 2024, by = 1))+       #same as breaks adds a label for the x axis starting at 2015 and ending at 2024, with a label for every year
  scale_color_manual(values = c("pink", "lightblue"))+        #manually adds color values for the geom line and point
  theme_minimal()+ #changes the theme to minimal
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.3))+    #rotates the x axis text, and moves it to fit and be more aesthetically pleasing
  labs(
    title = "Canadian Asylum Claimants Since 2015",     #adds title name, x and y axis title, subtitle and names the legend
    subtitle = "Categorized by Gender",
    x = "Year",
    y = "Number of Claimants",
    color = "Gender")

ggplot(normalized_data_Gender, aes(x = "", y = Proportion_Gender, fill = Gender))+ #selects the data frame and the x value is null the y is the proportion calculated from the asylum age and fill
  geom_bar(stat = "identity", width = 1)+                  #selects the function                                   #based on group
  coord_polar(theta = "y")+  #makes it into a pie graph
  geom_text(aes(label = Percent_Label_Gender),                    #labels based off the respective part calculated above, x dictates where in the slice the label will be
            position = position_stack(vjust = 0.5),        #stack makes it so the labels appear in their respective slice and vjust makes a minor position adjustment
            size = 3)+ #labels are a size 3
  facet_wrap(~Year)+ #makes multiple pie charts where each one is a different year
  labs(title = "Asylum Claimants by Gender", #titles the plot
       x = NULL, y = NULL)+   #No x or y axis titles
  theme_void()+              #makes the theme void which makes it so there is no backgroud activity and more(clears lines in graph)
  scale_fill_manual(values = c("pink", "lightblue"))+ #manually adds color values for the individual categories
  theme(strip.text = element_text(size = 10, face = "bold")) #changes the title for each seperate pie chart (the year)

Graphic Assignment 4

Chick Growth Data

head(ChickWeight) #prints the data in a table

##   weight Time Chick Diet
## 1     42    0     1    1
## 2     51    2     1    1
## 3     59    4     1    1
## 4     64    6     1    1
## 5     76    8     1    1
## 6     93   10     1    1

Line Plot of Chick Growth by Diet Type

ggplot(ChickWeight, aes(x = Time, y = weight))+ #selectiing data frame and x and y axis
  geom_line(aes(color = Chick), alpha = .4)+ #colors the lines based on Chick ID and the lines are translucent (40%)
  geom_smooth(color = "black", size = 1.2, se = TRUE)+ #adds a black regression line with a se region
  facet_wrap(~Diet, ncol = 4)+ #wraps the charts by diet type 
  theme_minimal()+ #changes the theme to minimal
  theme(legend.position = "none",  #gets rid of legend
        plot.title = element_text(face = "bold"), #Makes plot title bold
        axis.title = element_text(face = "bold"), #Makes axis titles bold
        strip.text = element_text(size = 12, face = "bold"), #Makes graph label bold(1,2,3,4 etc)
        axis.text.x = element_text(angle = 45, hjust = 1.5, vjust = 1.3), #rotates the x axis text 45 degrees and makes minor adjustments
        panel.spacing.x = unit(.5, "lines"))+ #adds spacing between the wrapped graphs
  labs(title = "Chick Growth by Diet Type", #adds plot and axis titles
       x = "Time (Days)", y = "Weight (Grams)")

Plant CO2 Uptake Data

head(CO2, n = 84) #pastes first 84 row of data in a table

## Grouped Data: uptake ~ conc | Plant
##    Plant        Type  Treatment conc uptake
## 1    Qn1      Quebec nonchilled   95   16.0
## 2    Qn1      Quebec nonchilled  175   30.4
## 3    Qn1      Quebec nonchilled  250   34.8
## 4    Qn1      Quebec nonchilled  350   37.2
## 5    Qn1      Quebec nonchilled  500   35.3
## 6    Qn1      Quebec nonchilled  675   39.2
## 7    Qn1      Quebec nonchilled 1000   39.7
## 8    Qn2      Quebec nonchilled   95   13.6
## 9    Qn2      Quebec nonchilled  175   27.3
## 10   Qn2      Quebec nonchilled  250   37.1
## 11   Qn2      Quebec nonchilled  350   41.8
## 12   Qn2      Quebec nonchilled  500   40.6
## 13   Qn2      Quebec nonchilled  675   41.4
## 14   Qn2      Quebec nonchilled 1000   44.3
## 15   Qn3      Quebec nonchilled   95   16.2
## 16   Qn3      Quebec nonchilled  175   32.4
## 17   Qn3      Quebec nonchilled  250   40.3
## 18   Qn3      Quebec nonchilled  350   42.1
## 19   Qn3      Quebec nonchilled  500   42.9
## 20   Qn3      Quebec nonchilled  675   43.9
## 21   Qn3      Quebec nonchilled 1000   45.5
## 22   Qc1      Quebec    chilled   95   14.2
## 23   Qc1      Quebec    chilled  175   24.1
## 24   Qc1      Quebec    chilled  250   30.3
## 25   Qc1      Quebec    chilled  350   34.6
## 26   Qc1      Quebec    chilled  500   32.5
## 27   Qc1      Quebec    chilled  675   35.4
## 28   Qc1      Quebec    chilled 1000   38.7
## 29   Qc2      Quebec    chilled   95    9.3
## 30   Qc2      Quebec    chilled  175   27.3
## 31   Qc2      Quebec    chilled  250   35.0
## 32   Qc2      Quebec    chilled  350   38.8
## 33   Qc2      Quebec    chilled  500   38.6
## 34   Qc2      Quebec    chilled  675   37.5
## 35   Qc2      Quebec    chilled 1000   42.4
## 36   Qc3      Quebec    chilled   95   15.1
## 37   Qc3      Quebec    chilled  175   21.0
## 38   Qc3      Quebec    chilled  250   38.1
## 39   Qc3      Quebec    chilled  350   34.0
## 40   Qc3      Quebec    chilled  500   38.9
## 41   Qc3      Quebec    chilled  675   39.6
## 42   Qc3      Quebec    chilled 1000   41.4
## 43   Mn1 Mississippi nonchilled   95   10.6
## 44   Mn1 Mississippi nonchilled  175   19.2
## 45   Mn1 Mississippi nonchilled  250   26.2
## 46   Mn1 Mississippi nonchilled  350   30.0
## 47   Mn1 Mississippi nonchilled  500   30.9
## 48   Mn1 Mississippi nonchilled  675   32.4
## 49   Mn1 Mississippi nonchilled 1000   35.5
## 50   Mn2 Mississippi nonchilled   95   12.0
## 51   Mn2 Mississippi nonchilled  175   22.0
## 52   Mn2 Mississippi nonchilled  250   30.6
## 53   Mn2 Mississippi nonchilled  350   31.8
## 54   Mn2 Mississippi nonchilled  500   32.4
## 55   Mn2 Mississippi nonchilled  675   31.1
## 56   Mn2 Mississippi nonchilled 1000   31.5
## 57   Mn3 Mississippi nonchilled   95   11.3
## 58   Mn3 Mississippi nonchilled  175   19.4
## 59   Mn3 Mississippi nonchilled  250   25.8
## 60   Mn3 Mississippi nonchilled  350   27.9
## 61   Mn3 Mississippi nonchilled  500   28.5
## 62   Mn3 Mississippi nonchilled  675   28.1
## 63   Mn3 Mississippi nonchilled 1000   27.8
## 64   Mc1 Mississippi    chilled   95   10.5
## 65   Mc1 Mississippi    chilled  175   14.9
## 66   Mc1 Mississippi    chilled  250   18.1
## 67   Mc1 Mississippi    chilled  350   18.9
## 68   Mc1 Mississippi    chilled  500   19.5
## 69   Mc1 Mississippi    chilled  675   22.2
## 70   Mc1 Mississippi    chilled 1000   21.9
## 71   Mc2 Mississippi    chilled   95    7.7
## 72   Mc2 Mississippi    chilled  175   11.4
## 73   Mc2 Mississippi    chilled  250   12.3
## 74   Mc2 Mississippi    chilled  350   13.0
## 75   Mc2 Mississippi    chilled  500   12.5
## 76   Mc2 Mississippi    chilled  675   13.7
## 77   Mc2 Mississippi    chilled 1000   14.4
## 78   Mc3 Mississippi    chilled   95   10.6
## 79   Mc3 Mississippi    chilled  175   18.0
## 80   Mc3 Mississippi    chilled  250   17.9
## 81   Mc3 Mississippi    chilled  350   17.9
## 82   Mc3 Mississippi    chilled  500   17.9
## 83   Mc3 Mississippi    chilled  675   18.9
## 84   Mc3 Mississippi    chilled 1000   19.9

Plant CO2 Uptake by Treatment Type

theme_set( 
  theme_minimal()
) #makes the theme minimal for all plots

P1 <- ggplot(CO2, aes(x = Treatment, y = uptake))+ #selecting data frame and what is ploted on x and y axis, and this code will be named P1
  geom_violin(aes(fill = Type), trim = TRUE)+    #trims the tail off the violin plot and fill them by type
  geom_dotplot(aes(color = Type), binaxis = 'y', stackdir = 'center', position = position_dodge())+ #adds a dot plot that stacks vertically separated by type, and dont touch eachother
  scale_color_manual(values = c("#000", "#000"))+ #adding colors to the types
  scale_fill_manual(values = c("#8000bb", "darkorange3"))+ #adding fill values to the types
  theme(axis.title = element_text(face = "bold", size = 9))+ #makes the axis titles bold and size 9
  labs(y = "CO2 Uptake") #labels the y axis
  
P2 <- ggplot(CO2, aes(x = conc, y = uptake, color = Treatment))+ #selecting data frame and what is ploted on x and y axis, and color is categorized by treatment this code is called P2
  geom_line()+ #what function is being used
  geom_point(aes(shape = Treatment))+ #adding points where point shape differs by treatment 
  scale_color_manual(values = c("#298c8c", "#800074"))+ #adds color values
  theme(axis.title = element_text(face = "bold", size = 9))+ #axis title bold and size 9 
  labs(
    x = "CO2 Concentration",
    y = "CO2 Uptake"
  )+ #X and y axis titles
  facet_wrap(~Type) #wraps by type

P3 <- ggplot(CO2, aes(x = conc, y = uptake))+ #selecting data frame and what is plotted on x and y axis, code will be called P3
  geom_col(aes(colour = Treatment, fill = Treatment), position = position_dodge())+ #function where color and fill are by treatment group, the bars will stack next to each other
  scale_color_manual(values = c("#298c8c", "#800074"))+ #values for color for treatment group
  scale_fill_manual(values = c("#298c8c", "#800074"))+ #values for fill for treatment group
  theme(axis.title = element_text(face = "bold", size = 9))+ #axis titles bold and size 9
  labs(
    x = "CO2 Concentration",
    y = "CO2 Uptake"
  )+ #x and y axis titles
  facet_wrap(~Type) #wrapped by type
  
plot <- ggarrange(P1, #top row of combined plot and naming it plot
                  ggarrange(P3, P2, ncol = 2, labels = c("B", "C"), common.legend = TRUE, legend = "bottom"), #arranging for bottom of plot, 2 columns, adding labels, and giving them a common legend place on the bottom
                  nrow = 2, #two rows for combined plot
          labels = "A") #label for top plot

annotate_figure(plot, top = text_grob("C02 Uptake by Treament Group", size = 15, face = "bold")) #adding a overall title for combined figure

Graphic Assignment 3

Population Data and 2D Density Plot

population_data <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/log_population_data.csv") #reading CSV file from computer files

head(population_data) #pasting as a data table

##   Log10_Current_Population Log10_Past_Population
## 1                 4.288032              5.674204
## 2                 3.817497              5.908109
## 3                 4.671286              6.095078
## 4                 3.538305              5.200114
## 5                 4.602143              6.388435
## 6                 4.839555              6.187712

ggplot(population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population))+ #selecting data frame and what is ploted on x and y axis
  stat_density_2d(aes(fill = ..level..), geom = "polygon", colour = "white")+ #selecting function and filling based on level, telling geom function to use and making the lines between areas white
  scale_fill_distiller(palette = 9, direction = 1)+ #uses palette 9 in reverse to fill based on level
  theme_minimal()+ #theme is changed to minimal
  labs(title = "2D Density Plot of Population Sizes", 
       x = "Log10(Current population size N0)",
       y = "Log10(past population size N1)",
       fill = "Density") #giving titles to the plot and x and y axis and titleing the legend

Longevity Data and Bubble Plot with Density Margin Plots

longevity_data <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/longevity_data.csv") #reading csv file from computer

long <- longevity_data %>% #calling the dataframe after mutation long and piping the other data frame through to next line
  mutate( 
    log_mass = log10(mass_g),                          #making new column that is a log value
    log_lifespan = log10(maximum_lifespan_yr))  %>%   #making new column that is a log value
   group_by(order) %>%       #grouping by order
  mutate(order_size = n()) #new column where order size in n(the number of observations)

head(long) #pastes data in a table

## # A tibble: 6 × 12
## # Groups:   order [4]
##   species           class order maximum_lifespan_yr mass_g volancy fossoriallity
##   <chr>             <chr> <chr>               <dbl>  <dbl> <chr>   <chr>        
## 1 Dicrostonyx_groe… Mamm… Rode…                 3.3   66   nonvol… semifossorial
## 2 Didelphis_virgin… Mamm… Dide…                 6.6 3000   nonvol… nonfossorial 
## 3 Diphylla_ecaudata Mamm… Chir…                 8     28   volant  nonfossorial 
## 4 Dipodillus_campe… Mamm… Rode…                 7.3   28.4 nonvol… semifossorial
## 5 Dipodomys_merria… Mamm… Rode…                 9.7   42   nonvol… semifossorial
## 6 Dendrolagus_good… Mamm… Dipr…                23.6 7400   nonvol… nonfossorial 
## # ℹ 5 more variables: foraging_environment <chr>, daily_activity <chr>,
## #   log_mass <dbl>, log_lifespan <dbl>, order_size <int>

p = ggplot(long, aes(x = log_mass, y = log_lifespan))+ #selecting data frame and what is ploted on x and y axis and making it p
  geom_point(aes(color = class, size = order_size), alpha = 0.3)+ #scatter plot where points are colored by class and sized by order size and are 70% transparent
  geom_smooth(aes(color = class),method = lm, se = FALSE, linetype = "solid")+ #linear regression line for each class, no se, and line is solid
  scale_color_manual(values = c("lightgreen", "darkslategray"))+ #adding values for coloring by class
  labs(
    title = "Bubble Chart of Longevity and Body Mass",
    x = "Log (Body Mass [g])",
    y = "Log (Maximum Lifespan [yr])")+ #adds plot and axis titles
  theme_minimal()+ #makes theme minimal
  theme(
    legend.position = "none", #removes legend
    plot.title = element_text(size = 14, face = "bold"), #increases size and makes bold of plot title
    axis.title = element_text(size = 12, face = "bold") #same as plot title but for axis titles
  )+
  annotate("text", x = 5, y = 1.8, hjust = -0.4, vjust = -0.9,
           label = "Aves", color = "lightgreen", size = 5, fontface = "bold")+ #adds text onto the grpah that is labeled Aves and is lightgreen and aligns at the coords given
  annotate("text", x = 6, y = 1.2, label = "Mammals", color = "darkslategray", size = 5, fontface = "bold") #same as line above just different label, color, and position
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4) #adds density plots to the margins for each class and are 60% transparent

Interpretation Questions:

What is the benefit to adding density plots in the margin of your graphics? 
*to show the distribution and the find potential bias.

Explain how you were able to depict 6 different measures in a single graphic. Be sure to clearly list the element and how it was depicted. 
*scatter (points)-using geom_point, the points appear as translucent circles to show the data points 
*density- used ggExtra::ggMarginal, to plot density plots to show the distribution of the data for the *mass and lifespan bubble 
*size- set size to order_size, to show how many samples were taken from each order. 
*regression- used geom_smooth to show the relationship between lifespan and mass for each group. 
*color- set the color to class so the two groups could be differentiated. 
*Log values- used log10 function to find the log values for the x and y axis so the data could be shown in a more linear way.

What is the relationship between longevity and body mass? Is it more extreme in mammals or aves? 
*There is a positive correlation between longevity and body mass and is more extreme in aves as the slope is steeper.

Is the data more biased toward smaller/larger or long/short lived animals? How do you know and why do you think that is? 
*smaller/larger animals because the density plot on the top the curves are skewed to the left or right which would indicate potential bias while the density plot on the right are closer to a normal distribution and have coverage while the top one the aves grouup is missing coverage on the right side while condensed on the left side.

Is there an element missing from this graphic that you feel should be there? Hint: There is one that could be helpful if added that is not depicted currently in any other way on the graphic. 
*A legend describing what the size of the point represents (sample size).

BMI Data

Height <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/height_data.csv") #reads csv file from computer and calls it height

Height$height_m <- Height$height_cm / 100 #makes new column in data frame

Height$BMI <- Height$weight_kg / (Height$height_m^2) #makes new column in data frame

head(Height) #paste data frame in a table

##   sex height_cm weight_kg shoe_size_EU height_m      BMI
## 1   M       180        79           42     1.80 24.38272
## 2   M       165        65           41     1.65 23.87511
## 3   M       178        72           42     1.78 22.72440
## 4   M       160        53           43     1.60 20.70312
## 5   M       182        78           36     1.82 23.54788
## 6   F       158        55           38     1.58 22.03173

BMI Bubble Plot and Histogram by Sex

p = ggplot(Height, aes(x = weight_kg, y = BMI))+ #selecting data frame and what is ploted on x and y axis and calls it p 
  geom_point(aes(color = sex, size = height_cm), alpha = 0.5)+ #scatterplot where color is based off sex and size of point is based off height, and 50% transparent
  geom_smooth(aes(color = sex), method = lm, se = FALSE)+ #adds a regression line for each sex that is linear and has no se
  scale_color_manual(values = c("#ff73b6", "#008dff"))+ #adds values for the sexes
  theme_minimal()+ #makes theme minimal
  theme(legend.position = "left")+ #puts legend position on the left
  labs(
    title = "Bubble Chart of Weight and BMI",
    x = "Weight(kg)", y = "BMI", 
    color = "Sex", size = "Height(cm)") #adds plot title, axis titles, and legend titles
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.3) #adds density plots on the margins for each sex and 70% transparent

ggplot(Height, aes(x = BMI))+ #selecting data frame and what is ploted on x axis
  geom_histogram(aes(y = ..density.., color = sex, fill = sex), alpha = 1, position = "dodge")+  #makes a histogram with density on the y, and color and fill and grouped by sex, and the bars stack side by side
  geom_density(aes(color = sex, fill = sex), alpha = 0.3)+ #adds a density plot over histogram, coloring and filling by sex, 70% transparent
  scale_color_manual(values = c("#ff73b6", "#008dff"))+ #adds colors for the groups
  scale_fill_manual(values = c("#ff73b6", "#008dff"))+ #adds fill colors for the groups
  theme_minimal()+ #makes theme minimal
  theme(legend.title.position = "right")+ #pus the legend title on the right of the legend
  guides(color = "none")+ #removes legend for color
  labs(
    title = "Distribution of BMI", subtitle = "Categorized by Sex",
    x = "BMI", y = "Density", 
    color = element_blank(), fill = "Sex" ) #titles plot with subtitle, adds axis titles, and titles the legend

Graphic Assignment 2

Replica Violin and Half-Violin Data and Plot

CAM <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/Violin_Plot_Data.csv") #reads csv file from computer and names it CAM

data_long <- CAM %>% #calling data frame after mutation data_long and taking origional dataframe and making the data vertical and not horizontal rows vs columns
  pivot_longer(
    cols = starts_with("Repeat"),
    names_to = "Repeat", 
    values_to = "values")

head(data_long, 40) #pastes the first 40 rows

## # A tibble: 40 × 3
##    F1Performance      Repeat   values
##    <chr>              <chr>     <dbl>
##  1 SVMWithGradCAMMaps Repeat1   0.670
##  2 SVMWithGradCAMMaps Repeat2   0.702
##  3 SVMWithGradCAMMaps Repeat3   0.681
##  4 SVMWithGradCAMMaps Repeat4   0.711
##  5 SVMWithGradCAMMaps Repeat5   0.649
##  6 SVMWithGradCAMMaps Repeat6   0.716
##  7 SVMWithGradCAMMaps Repeat7   0.714
##  8 SVMWithGradCAMMaps Repeat8   0.685
##  9 SVMWithGradCAMMaps Repeat9   0.699
## 10 SVMWithGradCAMMaps Repeat10  0.688
## # ℹ 30 more rows

ggplot(data_long, aes(x = F1Performance, y = values))+ #selecting data frame, and setting x and y axis
  geom_jitter(aes(color = F1Performance), alpha = 0.8, size = 5, 
              position = position_jitter(width = 0.1))+ #adds jitter point that are 20% transparent, colored by treatment, and have a jitter width(variation) of 0.1
  scale_color_manual(values = c("darkorchid4", "darkorange1"))+ #adds values for the colors by treatment
  geom_violin(aes(fill = F1Performance), alpha = 0.5, size = 2, 
              draw_quantiles = c("0.25", "0.50", "0.75"), quantile.size = 2)+ #adds violinplots on top of jitter points, 50% transparent, and draws lines 1/4, 1/2, and 3/4 of the way through the violin plot, and increases their size
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black",
               stroke = 1.5)+ #adds a white point to show the median of the violin plot
  scale_fill_manual(values = c("darkorchid4", "darkorange1"))+ #adds values for the fill by treatment
  coord_flip()+ #flips the x and y coords to make plot horizontal
  theme_minimal()+ #makes theme minimal
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), #removes y axis title, text, and tick lines
        legend.position = "none", #removes legend
        axis.line.x.bottom = element_line(color = "black", size = 1.5), #adds a thicker line across the bottom
        plot.title = element_text(hjust = 0.5, face="bold"), #makes minor horizontal adjustment to plot title and makes it bold
        panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), #removes major y and minor x gridlines
        panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+ #makes major x gridlines dashed grey and thicker
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -4.5, 
            color = "darkorange1", size = 4.5)+ #adds label
   geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -4.5, 
            color = "darkorchid4", size = 4.5)+ #adds label
  scale_y_continuous(limits = c(0.56, 0.74), #min and max value for y axis which is now on the x axis
                     breaks = seq(0.56, 0.74, by = 0.02), #adds breaks with start and end and frequency
                     labels = seq(0.56, 0.74, by = 0.02))+ #adds labels with start and end and frequency 
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
       ) #titling the plot and the y axis

ggplot(data_long, aes(x = F1Performance, y = values))+ 
  geom_jitter(aes(color = F1Performance), alpha = 0.8, size = 5, 
              position = position_jitter(width = 0.1))+
  scale_color_manual(values = c("darkorchid4", "darkorange1"))+
  geom_violinhalf(aes(fill = F1Performance), alpha = 0.5, size = 2, 
              draw_quantiles = c("0.25", "0.50", "0.75"), quantile.size = 2)+ #everything is the same except this function makes a half violin plot
  stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black",
               stroke = 1.5)+
  scale_fill_manual(values = c("darkorchid4", "darkorange1"))+
  coord_flip()+
  theme_minimal()+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), 
        legend.position = "none",
        axis.line.x.bottom = element_line(color = "black", size = 1.5), 
        plot.title = element_text(hjust = 0.5, face="bold"), 
        panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), 
        panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -4.5, 
            color = "darkorange1", size = 4.5)+
   geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -4.5, 
            color = "darkorchid4", size = 4.5)+
  scale_y_continuous(limits = c(0.56, 0.74), 
                     breaks = seq(0.56, 0.74, by = 0.02), 
                     labels = seq(0.56, 0.74, by = 0.02))+
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
       )

Resivsed Replica Violin Plot with Boxplot

ggplot(data_long, aes(x = F1Performance, y = values))+ #same as violin with few key differences
  geom_violin(aes(fill = F1Performance), alpha = 0.5, size = 1)+
  scale_fill_manual(values = c("#298c8c", "#800074"))+ #different colors for plot
  geom_boxplot(width = 0.3, color = "black", fill = "NA", size = 1)+ #boxplot on top of violin with transparent fill and black color
  coord_flip()+
  theme_minimal()+
  theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), 
        legend.position = "none",
        axis.line.x.bottom = element_line(color = "black", size = 1.5), 
        plot.title = element_text(hjust = 0.5, face="bold"), 
        panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), 
        panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
  geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -5.8, color = "#800074", size = 4.5)+ #different colors to match
   geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -5.2, color = "#298c8c", size = 4.5)+ #different colors to match
  scale_y_continuous(limits = c(0.56, 0.74), 
                     breaks = seq(0.56, 0.74, by = 0.02), 
                     labels = seq(0.56, 0.74, by = 0.02))+
  labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
       )

Graphic Assignment 1

USA Arrest Data

data("USArrests") #loads the data
head(USArrests, n = 50) #pastes the first 50 rows

##                Murder Assault UrbanPop Rape
## Alabama          13.2     236       58 21.2
## Alaska           10.0     263       48 44.5
## Arizona           8.1     294       80 31.0
## Arkansas          8.8     190       50 19.5
## California        9.0     276       91 40.6
## Colorado          7.9     204       78 38.7
## Connecticut       3.3     110       77 11.1
## Delaware          5.9     238       72 15.8
## Florida          15.4     335       80 31.9
## Georgia          17.4     211       60 25.8
## Hawaii            5.3      46       83 20.2
## Idaho             2.6     120       54 14.2
## Illinois         10.4     249       83 24.0
## Indiana           7.2     113       65 21.0
## Iowa              2.2      56       57 11.3
## Kansas            6.0     115       66 18.0
## Kentucky          9.7     109       52 16.3
## Louisiana        15.4     249       66 22.2
## Maine             2.1      83       51  7.8
## Maryland         11.3     300       67 27.8
## Massachusetts     4.4     149       85 16.3
## Michigan         12.1     255       74 35.1
## Minnesota         2.7      72       66 14.9
## Mississippi      16.1     259       44 17.1
## Missouri          9.0     178       70 28.2
## Montana           6.0     109       53 16.4
## Nebraska          4.3     102       62 16.5
## Nevada           12.2     252       81 46.0
## New Hampshire     2.1      57       56  9.5
## New Jersey        7.4     159       89 18.8
## New Mexico       11.4     285       70 32.1
## New York         11.1     254       86 26.1
## North Carolina   13.0     337       45 16.1
## North Dakota      0.8      45       44  7.3
## Ohio              7.3     120       75 21.4
## Oklahoma          6.6     151       68 20.0
## Oregon            4.9     159       67 29.3
## Pennsylvania      6.3     106       72 14.9
## Rhode Island      3.4     174       87  8.3
## South Carolina   14.4     279       48 22.5
## South Dakota      3.8      86       45 12.8
## Tennessee        13.2     188       59 26.9
## Texas            12.7     201       80 25.5
## Utah              3.2     120       80 22.9
## Vermont           2.2      48       32 11.2
## Virginia          8.5     156       63 20.7
## Washington        4.0     145       73 26.2
## West Virginia     5.7      81       39  9.3
## Wisconsin         2.6      53       66 10.8
## Wyoming           6.8     161       60 15.6

USArrests$State <- rownames(USArrests) #making a new column for the state names
USArrests$AverageCrimeRate <- rowMeans(USArrests[c("Murder", "Assault", "Rape")], na.rm = TRUE) #makes a new column for the average crime rate with the mean of murder, assault, and rape column (by row)
USArrests$AverageCrimeRate <- round(USArrests$AverageCrimeRate, 2) #rounds the column to 2 decimal places

Discussion

What are the variables available? 
*The type of felony they were arrested for and the percent urban population for each state

How is each variable defined or calculated? 
*Murder, Assault, and Rape are all arrests per 100,000 and Urban population is a percentage

Is each one numerical or categorical? 
*they are all numerical

USA Arrest Plots

ggplot(USArrests, aes(x = Murder, y = Assault))+ #selects data frame, and the x and y axis
  geom_point(color = "black")+ #makes scatter plot with the points being black
  geom_smooth(method = lm, se = FALSE, color = "red")+ #adds a red linear regression line with no se
  labs(
    title = "Scatter Plot of Assault vs. Murder Rates", 
    x = "Murder Rate", y = "Assault Rate")+ #adds title plot and axis titles
  theme_classic() #makes theme classic

ggplot(USArrests, aes(x = State, y = AverageCrimeRate, group = 1))+ #selects data frame and axis
  geom_line(color = "#298c9c", size = 1)+ #makes line a color and size 1
  geom_point(color = "#800074", size = 2.5)+ #makes points a color and size 2.5
  theme_classic()+ #makes theme classic
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3), #rotates x axis text 90 degrees 
        panel.grid.major.x = element_line(), #adds major x grid lines
        panel.grid.major.y = element_line(), #adds major y grid lines
        )+
  labs(
    title = "Line Plot of Average Crime Rate by State",
    x = "State", y = "Average Crime Rate"
  ) #adds plot and x and y axis titles

Scatter Plot of Horsepower on Fuel Efficiency Categorized by Number of Cylinders

ggplot(mtcars, aes(x = mpg, y = hp)) + #selects data frame, and the x and y axis
  geom_point(aes(color = cyl), size = 2.4, shape = 8) + #categorizes the color by number of cylinders
  theme_minimal() + #makes theme minimal
  theme(legend.position = "bottom") + #moves legend to the bottom
  labs(
    title = "Effect of Horsepower on Fuel Efficiency", 
    subtitle = "Categorized by Number of Cylinders", 
    x = "Horsepower", y = "Fuel Efficiency (MPG)"
  ) #adds title, subtitle, and axis titles

KLUMP Data Visualization Code

Sean Klump

01/05/2025

Packages Installer Code

Final Assignment

Canadian Asylum Claims Data Sets

Canadian Data Origin and Description

Canadian Asylum Claim Plots by Age Group

Canadian Asylum Claim Plots by Gender

Graphic Assignment 4

Chick Growth Data

Line Plot of Chick Growth by Diet Type

Plant CO2 Uptake Data

Plant CO2 Uptake by Treatment Type

Graphic Assignment 3

Population Data and 2D Density Plot

Longevity Data and Bubble Plot with Density Margin Plots

Interpretation Questions:

BMI Data

BMI Bubble Plot and Histogram by Sex

Graphic Assignment 2

Replica Violin and Half-Violin Data and Plot

Resivsed Replica Violin Plot with Boxplot

Graphic Assignment 1

USA Arrest Data

Discussion

USA Arrest Plots

Scatter Plot of Horsepower on Fuel Efficiency Categorized by Number of Cylinders