packages <- c("ggplot2", "readr", "tidyverse", "dplyr", "ggpubr", "see", "rmarkdown", "knitr", "tinytex", "ggExtra") #puts all of the packages into one word to be used later rather than repeating long text
#Checks to see if any of your listed packages need installed
check_install_packages <- function(pkg){
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg, dependencies = TRUE)
library(pkg, character.only = TRUE)
}
}
#Downloads and reads packages in library if needed
sapply(packages, check_install_packages)
## $ggplot2
## NULL
##
## $readr
## NULL
##
## $tidyverse
## NULL
##
## $dplyr
## NULL
##
## $ggpubr
## NULL
##
## $see
## NULL
##
## $rmarkdown
## NULL
##
## $knitr
## NULL
##
## $tinytex
## NULL
##
## $ggExtra
## NULL
Asylum_Age <- data.frame(Year=rep(c("2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"), each = 6), #Makes a column called Year that will repeat 2015-2024 6 times
Age_Range=c("0-14", "15-29", "30-44", "45-59", "60-74", "75+"), #Makes another column called Age_Range that will have all 6 categories once for each year
Claimants=c(3365, 4925, 5300, 1735, 580, 130,
5425, 7120, 7800, 2515, 815, 165,
13185, 11400, 18215, 5415, 1290, 200,
14195, 13030, 19685, 6375, 1460, 265,
13640, 17445, 22945, 7920, 1745, 315,
3805, 7560, 7965, 3050, 1030, 240,
4105, 8250, 7945, 3090, 1180, 255,
18095, 29180, 32125, 9620, 2130, 460,
21115, 49235, 53210, 16020, 3160, 590,
21270, 55060, 59190, 19360, 3450, 705)) # Makes a column called Claimants with these values starting with 3365
Asylum_Age$Year <- as.numeric(as.character(Asylum_Age$Year)) # makes Year column treated as a continous numeric variable
normalized_data_Age <- Asylum_Age %>% #names the results and passes asylum_Age to the next function
group_by(Year) %>% #groups the data by year so all calculations for 2015 happen only for rows with 2015
mutate(Proportion_Age = Claimants / sum(Claimants), #adds a column where the proportion of Claimants for each age group is calculated by year
Percent_Label_Age = paste0(round(Proportion_Age * 100, 1), "%")) %>% #stores the formatted percentages and puts the % sign after
ungroup() #ungroups
Asylum_Gender <- data.frame(Year=rep(c("2015", "2016", "2017", "2018", "2019", "2020", "2021", "2022", "2023", "2024"), each = 2),
Gender=c("Female", "Male"),
Claimants=c(7205, 8830, 10445, 13395,
22540, 27805, 24855, 30165,
28365, 35640, 10355, 13310,
10710, 14145, 37815, 53755,
56295, 86995, 58095, 100890))
Asylum_Gender$Year <- as.numeric(as.character(Asylum_Gender$Year))
normalized_data_Gender <- Asylum_Gender %>%
group_by(Year) %>%
mutate(Proportion_Gender = Claimants / sum(Claimants),
Percent_Label_Gender = paste0(round(Proportion_Gender * 100, 1), "%")) %>%
ungroup()
#does the same thing as the Asylum_Age dataframe
head(Asylum_Gender,100) #displays first 100 rows of data frame
## Year Gender Claimants
## 1 2015 Female 7205
## 2 2015 Male 8830
## 3 2016 Female 10445
## 4 2016 Male 13395
## 5 2017 Female 22540
## 6 2017 Male 27805
## 7 2018 Female 24855
## 8 2018 Male 30165
## 9 2019 Female 28365
## 10 2019 Male 35640
## 11 2020 Female 10355
## 12 2020 Male 13310
## 13 2021 Female 10710
## 14 2021 Male 14145
## 15 2022 Female 37815
## 16 2022 Male 53755
## 17 2023 Female 56295
## 18 2023 Male 86995
## 19 2024 Female 58095
## 20 2024 Male 100890
head(Asylum_Age, 100) #displays first 100 rows of data frame
## Year Age_Range Claimants
## 1 2015 0-14 3365
## 2 2015 15-29 4925
## 3 2015 30-44 5300
## 4 2015 45-59 1735
## 5 2015 60-74 580
## 6 2015 75+ 130
## 7 2016 0-14 5425
## 8 2016 15-29 7120
## 9 2016 30-44 7800
## 10 2016 45-59 2515
## 11 2016 60-74 815
## 12 2016 75+ 165
## 13 2017 0-14 13185
## 14 2017 15-29 11400
## 15 2017 30-44 18215
## 16 2017 45-59 5415
## 17 2017 60-74 1290
## 18 2017 75+ 200
## 19 2018 0-14 14195
## 20 2018 15-29 13030
## 21 2018 30-44 19685
## 22 2018 45-59 6375
## 23 2018 60-74 1460
## 24 2018 75+ 265
## 25 2019 0-14 13640
## 26 2019 15-29 17445
## 27 2019 30-44 22945
## 28 2019 45-59 7920
## 29 2019 60-74 1745
## 30 2019 75+ 315
## 31 2020 0-14 3805
## 32 2020 15-29 7560
## 33 2020 30-44 7965
## 34 2020 45-59 3050
## 35 2020 60-74 1030
## 36 2020 75+ 240
## 37 2021 0-14 4105
## 38 2021 15-29 8250
## 39 2021 30-44 7945
## 40 2021 45-59 3090
## 41 2021 60-74 1180
## 42 2021 75+ 255
## 43 2022 0-14 18095
## 44 2022 15-29 29180
## 45 2022 30-44 32125
## 46 2022 45-59 9620
## 47 2022 60-74 2130
## 48 2022 75+ 460
## 49 2023 0-14 21115
## 50 2023 15-29 49235
## 51 2023 30-44 53210
## 52 2023 45-59 16020
## 53 2023 60-74 3160
## 54 2023 75+ 590
## 55 2024 0-14 21270
## 56 2024 15-29 55060
## 57 2024 30-44 59190
## 58 2024 45-59 19360
## 59 2024 60-74 3450
## 60 2024 75+ 705
https://open.canada.ca/data/en/dataset/b6cbcf4d-f763-4924-a2fb-8cc4a06e3de4?_gl=1*1gf1pvt*_ga*MTE5OTYyNDgzOS4xNzM2MDkyMjI1*_ga_S9JG8CZVYZ*MTczNjA5MjIyNC4xLjAuMTczNjA5MjIyNC42MC4wLjA.
The data for this section came from the Government of Canada. The data was in a very complex horizontal format and had to manually make a vertical data frame with the same data. My graphs show two key things. the first is a line graph showing how the number of claimants for asylum in Canada for each age group and gender changes since 2015. The second is a pie chart wrapped by each year show how the percentage of each age group or gender changes every year.
ggplot(Asylum_Age, aes(x = Year, y = Claimants))+ #names the data frame being used followed by the X and Y variables of the graphic.
geom_line(aes(color = Age_Range), size = 1)+ #indicates using a line graph where the color of the lines is categorized by the age_range categories, and all lines have a size of 1
geom_point(aes(color = Age_Range), size = 3)+ #adds points that are also colored by the age_range categories, and all pont sizes are 3
scale_x_continuous(limits = c(2015, 2024), #Scales for the entire x axis values, puts min at 2015 and max at 2024
breaks = seq(2015, 2024, by = 1), #adds a break starting at 2015 and ending at 2024, with a break every one year
labels = seq(2015, 2024, by = 1))+ #same as breaks adds a label for the x axis starting at 2015 and ending at 2024, with a label for every year
scale_color_manual(values = c("#b5d1ae", "#80ae9a", "#568b87", "#326b77", "#1b485e", "#122740"))+ #manually adds color values for the geom line and point
theme_minimal()+ #changes the theme to minimal
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.3))+ #rotates the x axis text, and moves it to fit and be more aesthetically pleasing
labs(
title = "Canadian Asylum Claimants Since 2015",
subtitle = "Categorized by Age Groups",
x = "Year",
y = "Number of Claimants",
color = "Age Groups") #adds a title, subtitle, x and y axis title, and names the legends
ggplot(normalized_data_Age, aes(x = "", y = Proportion_Age, fill = Age_Range))+ #selects the data frame and the x value is null the y is the proportion calculated from the asylum age and fills
geom_bar(stat = "identity", width = 1)+ #selects the function #based on the group
coord_polar(theta = "y")+ #makes it into a pie chart
geom_text(aes(x = 1.13, label = Percent_Label_Age), #labels based off the respective part calculated above, x dictates where in the slice the label will be
position = position_stack(vjust = 0.5), #stack makes it so the labels appear in their respective slice and vjust makes a minor position adjustment
size = 3.2, color = "white")+ #makes the label white and size 3.2
facet_wrap(~Year)+ #makes multiple pie charts where each one is a different year
labs(title = "Asylum Claimants by Age Groups", #titles the plot
x = NULL, y = NULL)+ #No x or y axis titles
theme_void()+ #makes the theme void which makes it so there is no backgroud activity and more(clears lines in graph)
scale_fill_manual(values = c("#b5d1ae", "#80ae9a", "#568b87", "#326b77", "#1b485e", "#122740"))+ #manually adds color values for the individual age groups
theme(strip.text = element_text(size = 10, face = "bold")) #changes the title for each seperate pie chart (the year)
ggplot(Asylum_Gender, aes(x = Year, y = Claimants))+ #names the data frame being used followed by the X and Y variables of the graphic.
geom_line(aes(color = Gender), size = 1)+ #indicates using a line graph where the color of the lines is categorized by the Gender categories, and all lines have a size of 1
geom_point(aes(color = Gender, shape = Gender), size = 3)+ #adds points that are also colored by the Gender and the shape of the point differs by gender, and all pont sizes are 3
scale_x_continuous(limits = c(2015, 2024), #Scales for the entire x axis values, puts min at 2015 and max at 2024
breaks = seq(2015, 2024, by = 1), #adds a break starting at 2015 and ending at 2024, with a break every one year
labels = seq(2015, 2024, by = 1))+ #same as breaks adds a label for the x axis starting at 2015 and ending at 2024, with a label for every year
scale_color_manual(values = c("pink", "lightblue"))+ #manually adds color values for the geom line and point
theme_minimal()+ #changes the theme to minimal
theme(
axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.3))+ #rotates the x axis text, and moves it to fit and be more aesthetically pleasing
labs(
title = "Canadian Asylum Claimants Since 2015", #adds title name, x and y axis title, subtitle and names the legend
subtitle = "Categorized by Gender",
x = "Year",
y = "Number of Claimants",
color = "Gender")
ggplot(normalized_data_Gender, aes(x = "", y = Proportion_Gender, fill = Gender))+ #selects the data frame and the x value is null the y is the proportion calculated from the asylum age and fill
geom_bar(stat = "identity", width = 1)+ #selects the function #based on group
coord_polar(theta = "y")+ #makes it into a pie graph
geom_text(aes(label = Percent_Label_Gender), #labels based off the respective part calculated above, x dictates where in the slice the label will be
position = position_stack(vjust = 0.5), #stack makes it so the labels appear in their respective slice and vjust makes a minor position adjustment
size = 3)+ #labels are a size 3
facet_wrap(~Year)+ #makes multiple pie charts where each one is a different year
labs(title = "Asylum Claimants by Gender", #titles the plot
x = NULL, y = NULL)+ #No x or y axis titles
theme_void()+ #makes the theme void which makes it so there is no backgroud activity and more(clears lines in graph)
scale_fill_manual(values = c("pink", "lightblue"))+ #manually adds color values for the individual categories
theme(strip.text = element_text(size = 10, face = "bold")) #changes the title for each seperate pie chart (the year)
head(ChickWeight) #prints the data in a table
## weight Time Chick Diet
## 1 42 0 1 1
## 2 51 2 1 1
## 3 59 4 1 1
## 4 64 6 1 1
## 5 76 8 1 1
## 6 93 10 1 1
ggplot(ChickWeight, aes(x = Time, y = weight))+ #selectiing data frame and x and y axis
geom_line(aes(color = Chick), alpha = .4)+ #colors the lines based on Chick ID and the lines are translucent (40%)
geom_smooth(color = "black", size = 1.2, se = TRUE)+ #adds a black regression line with a se region
facet_wrap(~Diet, ncol = 4)+ #wraps the charts by diet type
theme_minimal()+ #changes the theme to minimal
theme(legend.position = "none", #gets rid of legend
plot.title = element_text(face = "bold"), #Makes plot title bold
axis.title = element_text(face = "bold"), #Makes axis titles bold
strip.text = element_text(size = 12, face = "bold"), #Makes graph label bold(1,2,3,4 etc)
axis.text.x = element_text(angle = 45, hjust = 1.5, vjust = 1.3), #rotates the x axis text 45 degrees and makes minor adjustments
panel.spacing.x = unit(.5, "lines"))+ #adds spacing between the wrapped graphs
labs(title = "Chick Growth by Diet Type", #adds plot and axis titles
x = "Time (Days)", y = "Weight (Grams)")
head(CO2, n = 84) #pastes first 84 row of data in a table
## Grouped Data: uptake ~ conc | Plant
## Plant Type Treatment conc uptake
## 1 Qn1 Quebec nonchilled 95 16.0
## 2 Qn1 Quebec nonchilled 175 30.4
## 3 Qn1 Quebec nonchilled 250 34.8
## 4 Qn1 Quebec nonchilled 350 37.2
## 5 Qn1 Quebec nonchilled 500 35.3
## 6 Qn1 Quebec nonchilled 675 39.2
## 7 Qn1 Quebec nonchilled 1000 39.7
## 8 Qn2 Quebec nonchilled 95 13.6
## 9 Qn2 Quebec nonchilled 175 27.3
## 10 Qn2 Quebec nonchilled 250 37.1
## 11 Qn2 Quebec nonchilled 350 41.8
## 12 Qn2 Quebec nonchilled 500 40.6
## 13 Qn2 Quebec nonchilled 675 41.4
## 14 Qn2 Quebec nonchilled 1000 44.3
## 15 Qn3 Quebec nonchilled 95 16.2
## 16 Qn3 Quebec nonchilled 175 32.4
## 17 Qn3 Quebec nonchilled 250 40.3
## 18 Qn3 Quebec nonchilled 350 42.1
## 19 Qn3 Quebec nonchilled 500 42.9
## 20 Qn3 Quebec nonchilled 675 43.9
## 21 Qn3 Quebec nonchilled 1000 45.5
## 22 Qc1 Quebec chilled 95 14.2
## 23 Qc1 Quebec chilled 175 24.1
## 24 Qc1 Quebec chilled 250 30.3
## 25 Qc1 Quebec chilled 350 34.6
## 26 Qc1 Quebec chilled 500 32.5
## 27 Qc1 Quebec chilled 675 35.4
## 28 Qc1 Quebec chilled 1000 38.7
## 29 Qc2 Quebec chilled 95 9.3
## 30 Qc2 Quebec chilled 175 27.3
## 31 Qc2 Quebec chilled 250 35.0
## 32 Qc2 Quebec chilled 350 38.8
## 33 Qc2 Quebec chilled 500 38.6
## 34 Qc2 Quebec chilled 675 37.5
## 35 Qc2 Quebec chilled 1000 42.4
## 36 Qc3 Quebec chilled 95 15.1
## 37 Qc3 Quebec chilled 175 21.0
## 38 Qc3 Quebec chilled 250 38.1
## 39 Qc3 Quebec chilled 350 34.0
## 40 Qc3 Quebec chilled 500 38.9
## 41 Qc3 Quebec chilled 675 39.6
## 42 Qc3 Quebec chilled 1000 41.4
## 43 Mn1 Mississippi nonchilled 95 10.6
## 44 Mn1 Mississippi nonchilled 175 19.2
## 45 Mn1 Mississippi nonchilled 250 26.2
## 46 Mn1 Mississippi nonchilled 350 30.0
## 47 Mn1 Mississippi nonchilled 500 30.9
## 48 Mn1 Mississippi nonchilled 675 32.4
## 49 Mn1 Mississippi nonchilled 1000 35.5
## 50 Mn2 Mississippi nonchilled 95 12.0
## 51 Mn2 Mississippi nonchilled 175 22.0
## 52 Mn2 Mississippi nonchilled 250 30.6
## 53 Mn2 Mississippi nonchilled 350 31.8
## 54 Mn2 Mississippi nonchilled 500 32.4
## 55 Mn2 Mississippi nonchilled 675 31.1
## 56 Mn2 Mississippi nonchilled 1000 31.5
## 57 Mn3 Mississippi nonchilled 95 11.3
## 58 Mn3 Mississippi nonchilled 175 19.4
## 59 Mn3 Mississippi nonchilled 250 25.8
## 60 Mn3 Mississippi nonchilled 350 27.9
## 61 Mn3 Mississippi nonchilled 500 28.5
## 62 Mn3 Mississippi nonchilled 675 28.1
## 63 Mn3 Mississippi nonchilled 1000 27.8
## 64 Mc1 Mississippi chilled 95 10.5
## 65 Mc1 Mississippi chilled 175 14.9
## 66 Mc1 Mississippi chilled 250 18.1
## 67 Mc1 Mississippi chilled 350 18.9
## 68 Mc1 Mississippi chilled 500 19.5
## 69 Mc1 Mississippi chilled 675 22.2
## 70 Mc1 Mississippi chilled 1000 21.9
## 71 Mc2 Mississippi chilled 95 7.7
## 72 Mc2 Mississippi chilled 175 11.4
## 73 Mc2 Mississippi chilled 250 12.3
## 74 Mc2 Mississippi chilled 350 13.0
## 75 Mc2 Mississippi chilled 500 12.5
## 76 Mc2 Mississippi chilled 675 13.7
## 77 Mc2 Mississippi chilled 1000 14.4
## 78 Mc3 Mississippi chilled 95 10.6
## 79 Mc3 Mississippi chilled 175 18.0
## 80 Mc3 Mississippi chilled 250 17.9
## 81 Mc3 Mississippi chilled 350 17.9
## 82 Mc3 Mississippi chilled 500 17.9
## 83 Mc3 Mississippi chilled 675 18.9
## 84 Mc3 Mississippi chilled 1000 19.9
theme_set(
theme_minimal()
) #makes the theme minimal for all plots
P1 <- ggplot(CO2, aes(x = Treatment, y = uptake))+ #selecting data frame and what is ploted on x and y axis, and this code will be named P1
geom_violin(aes(fill = Type), trim = TRUE)+ #trims the tail off the violin plot and fill them by type
geom_dotplot(aes(color = Type), binaxis = 'y', stackdir = 'center', position = position_dodge())+ #adds a dot plot that stacks vertically separated by type, and dont touch eachother
scale_color_manual(values = c("#000", "#000"))+ #adding colors to the types
scale_fill_manual(values = c("#8000bb", "darkorange3"))+ #adding fill values to the types
theme(axis.title = element_text(face = "bold", size = 9))+ #makes the axis titles bold and size 9
labs(y = "CO2 Uptake") #labels the y axis
P2 <- ggplot(CO2, aes(x = conc, y = uptake, color = Treatment))+ #selecting data frame and what is ploted on x and y axis, and color is categorized by treatment this code is called P2
geom_line()+ #what function is being used
geom_point(aes(shape = Treatment))+ #adding points where point shape differs by treatment
scale_color_manual(values = c("#298c8c", "#800074"))+ #adds color values
theme(axis.title = element_text(face = "bold", size = 9))+ #axis title bold and size 9
labs(
x = "CO2 Concentration",
y = "CO2 Uptake"
)+ #X and y axis titles
facet_wrap(~Type) #wraps by type
P3 <- ggplot(CO2, aes(x = conc, y = uptake))+ #selecting data frame and what is plotted on x and y axis, code will be called P3
geom_col(aes(colour = Treatment, fill = Treatment), position = position_dodge())+ #function where color and fill are by treatment group, the bars will stack next to each other
scale_color_manual(values = c("#298c8c", "#800074"))+ #values for color for treatment group
scale_fill_manual(values = c("#298c8c", "#800074"))+ #values for fill for treatment group
theme(axis.title = element_text(face = "bold", size = 9))+ #axis titles bold and size 9
labs(
x = "CO2 Concentration",
y = "CO2 Uptake"
)+ #x and y axis titles
facet_wrap(~Type) #wrapped by type
plot <- ggarrange(P1, #top row of combined plot and naming it plot
ggarrange(P3, P2, ncol = 2, labels = c("B", "C"), common.legend = TRUE, legend = "bottom"), #arranging for bottom of plot, 2 columns, adding labels, and giving them a common legend place on the bottom
nrow = 2, #two rows for combined plot
labels = "A") #label for top plot
annotate_figure(plot, top = text_grob("C02 Uptake by Treament Group", size = 15, face = "bold")) #adding a overall title for combined figure
population_data <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/log_population_data.csv") #reading CSV file from computer files
head(population_data) #pasting as a data table
## Log10_Current_Population Log10_Past_Population
## 1 4.288032 5.674204
## 2 3.817497 5.908109
## 3 4.671286 6.095078
## 4 3.538305 5.200114
## 5 4.602143 6.388435
## 6 4.839555 6.187712
ggplot(population_data, aes(x = Log10_Current_Population, y = Log10_Past_Population))+ #selecting data frame and what is ploted on x and y axis
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour = "white")+ #selecting function and filling based on level, telling geom function to use and making the lines between areas white
scale_fill_distiller(palette = 9, direction = 1)+ #uses palette 9 in reverse to fill based on level
theme_minimal()+ #theme is changed to minimal
labs(title = "2D Density Plot of Population Sizes",
x = "Log10(Current population size N0)",
y = "Log10(past population size N1)",
fill = "Density") #giving titles to the plot and x and y axis and titleing the legend
longevity_data <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/longevity_data.csv") #reading csv file from computer
long <- longevity_data %>% #calling the dataframe after mutation long and piping the other data frame through to next line
mutate(
log_mass = log10(mass_g), #making new column that is a log value
log_lifespan = log10(maximum_lifespan_yr)) %>% #making new column that is a log value
group_by(order) %>% #grouping by order
mutate(order_size = n()) #new column where order size in n(the number of observations)
head(long) #pastes data in a table
## # A tibble: 6 × 12
## # Groups: order [4]
## species class order maximum_lifespan_yr mass_g volancy fossoriallity
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr>
## 1 Dicrostonyx_groe… Mamm… Rode… 3.3 66 nonvol… semifossorial
## 2 Didelphis_virgin… Mamm… Dide… 6.6 3000 nonvol… nonfossorial
## 3 Diphylla_ecaudata Mamm… Chir… 8 28 volant nonfossorial
## 4 Dipodillus_campe… Mamm… Rode… 7.3 28.4 nonvol… semifossorial
## 5 Dipodomys_merria… Mamm… Rode… 9.7 42 nonvol… semifossorial
## 6 Dendrolagus_good… Mamm… Dipr… 23.6 7400 nonvol… nonfossorial
## # ℹ 5 more variables: foraging_environment <chr>, daily_activity <chr>,
## # log_mass <dbl>, log_lifespan <dbl>, order_size <int>
p = ggplot(long, aes(x = log_mass, y = log_lifespan))+ #selecting data frame and what is ploted on x and y axis and making it p
geom_point(aes(color = class, size = order_size), alpha = 0.3)+ #scatter plot where points are colored by class and sized by order size and are 70% transparent
geom_smooth(aes(color = class),method = lm, se = FALSE, linetype = "solid")+ #linear regression line for each class, no se, and line is solid
scale_color_manual(values = c("lightgreen", "darkslategray"))+ #adding values for coloring by class
labs(
title = "Bubble Chart of Longevity and Body Mass",
x = "Log (Body Mass [g])",
y = "Log (Maximum Lifespan [yr])")+ #adds plot and axis titles
theme_minimal()+ #makes theme minimal
theme(
legend.position = "none", #removes legend
plot.title = element_text(size = 14, face = "bold"), #increases size and makes bold of plot title
axis.title = element_text(size = 12, face = "bold") #same as plot title but for axis titles
)+
annotate("text", x = 5, y = 1.8, hjust = -0.4, vjust = -0.9,
label = "Aves", color = "lightgreen", size = 5, fontface = "bold")+ #adds text onto the grpah that is labeled Aves and is lightgreen and aligns at the coords given
annotate("text", x = 6, y = 1.2, label = "Mammals", color = "darkslategray", size = 5, fontface = "bold") #same as line above just different label, color, and position
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.4) #adds density plots to the margins for each class and are 60% transparent
What is the benefit to adding density plots in the margin of your graphics?
*to show the distribution and the find potential bias.
Explain how you were able to depict 6 different measures in a single graphic. Be sure to clearly list the element and how it was depicted.
*scatter (points)-using geom_point, the points appear as translucent circles to show the data points
*density- used ggExtra::ggMarginal, to plot density plots to show the distribution of the data for the *mass and lifespan bubble
*size- set size to order_size, to show how many samples were taken from each order.
*regression- used geom_smooth to show the relationship between lifespan and mass for each group.
*color- set the color to class so the two groups could be differentiated.
*Log values- used log10 function to find the log values for the x and y axis so the data could be shown in a more linear way.
What is the relationship between longevity and body mass? Is it more extreme in mammals or aves?
*There is a positive correlation between longevity and body mass and is more extreme in aves as the slope is steeper.
Is the data more biased toward smaller/larger or long/short lived animals? How do you know and why do you think that is?
*smaller/larger animals because the density plot on the top the curves are skewed to the left or right which would indicate potential bias while the density plot on the right are closer to a normal distribution and have coverage while the top one the aves grouup is missing coverage on the right side while condensed on the left side.
Is there an element missing from this graphic that you feel should be there? Hint: There is one that could be helpful if added that is not depicted currently in any other way on the graphic.
*A legend describing what the size of the point represents (sample size).
Height <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/height_data.csv") #reads csv file from computer and calls it height
Height$height_m <- Height$height_cm / 100 #makes new column in data frame
Height$BMI <- Height$weight_kg / (Height$height_m^2) #makes new column in data frame
head(Height) #paste data frame in a table
## sex height_cm weight_kg shoe_size_EU height_m BMI
## 1 M 180 79 42 1.80 24.38272
## 2 M 165 65 41 1.65 23.87511
## 3 M 178 72 42 1.78 22.72440
## 4 M 160 53 43 1.60 20.70312
## 5 M 182 78 36 1.82 23.54788
## 6 F 158 55 38 1.58 22.03173
p = ggplot(Height, aes(x = weight_kg, y = BMI))+ #selecting data frame and what is ploted on x and y axis and calls it p
geom_point(aes(color = sex, size = height_cm), alpha = 0.5)+ #scatterplot where color is based off sex and size of point is based off height, and 50% transparent
geom_smooth(aes(color = sex), method = lm, se = FALSE)+ #adds a regression line for each sex that is linear and has no se
scale_color_manual(values = c("#ff73b6", "#008dff"))+ #adds values for the sexes
theme_minimal()+ #makes theme minimal
theme(legend.position = "left")+ #puts legend position on the left
labs(
title = "Bubble Chart of Weight and BMI",
x = "Weight(kg)", y = "BMI",
color = "Sex", size = "Height(cm)") #adds plot title, axis titles, and legend titles
ggExtra::ggMarginal(p, type = "density", groupFill = TRUE, alpha = 0.3) #adds density plots on the margins for each sex and 70% transparent
ggplot(Height, aes(x = BMI))+ #selecting data frame and what is ploted on x axis
geom_histogram(aes(y = ..density.., color = sex, fill = sex), alpha = 1, position = "dodge")+ #makes a histogram with density on the y, and color and fill and grouped by sex, and the bars stack side by side
geom_density(aes(color = sex, fill = sex), alpha = 0.3)+ #adds a density plot over histogram, coloring and filling by sex, 70% transparent
scale_color_manual(values = c("#ff73b6", "#008dff"))+ #adds colors for the groups
scale_fill_manual(values = c("#ff73b6", "#008dff"))+ #adds fill colors for the groups
theme_minimal()+ #makes theme minimal
theme(legend.title.position = "right")+ #pus the legend title on the right of the legend
guides(color = "none")+ #removes legend for color
labs(
title = "Distribution of BMI", subtitle = "Categorized by Sex",
x = "BMI", y = "Density",
color = element_blank(), fill = "Sex" ) #titles plot with subtitle, adds axis titles, and titles the legend
CAM <- read.csv("C:/Users/seank/Downloads/R_Coding_Course/Data/Violin_Plot_Data.csv") #reads csv file from computer and names it CAM
data_long <- CAM %>% #calling data frame after mutation data_long and taking origional dataframe and making the data vertical and not horizontal rows vs columns
pivot_longer(
cols = starts_with("Repeat"),
names_to = "Repeat",
values_to = "values")
head(data_long, 40) #pastes the first 40 rows
## # A tibble: 40 × 3
## F1Performance Repeat values
## <chr> <chr> <dbl>
## 1 SVMWithGradCAMMaps Repeat1 0.670
## 2 SVMWithGradCAMMaps Repeat2 0.702
## 3 SVMWithGradCAMMaps Repeat3 0.681
## 4 SVMWithGradCAMMaps Repeat4 0.711
## 5 SVMWithGradCAMMaps Repeat5 0.649
## 6 SVMWithGradCAMMaps Repeat6 0.716
## 7 SVMWithGradCAMMaps Repeat7 0.714
## 8 SVMWithGradCAMMaps Repeat8 0.685
## 9 SVMWithGradCAMMaps Repeat9 0.699
## 10 SVMWithGradCAMMaps Repeat10 0.688
## # ℹ 30 more rows
ggplot(data_long, aes(x = F1Performance, y = values))+ #selecting data frame, and setting x and y axis
geom_jitter(aes(color = F1Performance), alpha = 0.8, size = 5,
position = position_jitter(width = 0.1))+ #adds jitter point that are 20% transparent, colored by treatment, and have a jitter width(variation) of 0.1
scale_color_manual(values = c("darkorchid4", "darkorange1"))+ #adds values for the colors by treatment
geom_violin(aes(fill = F1Performance), alpha = 0.5, size = 2,
draw_quantiles = c("0.25", "0.50", "0.75"), quantile.size = 2)+ #adds violinplots on top of jitter points, 50% transparent, and draws lines 1/4, 1/2, and 3/4 of the way through the violin plot, and increases their size
stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black",
stroke = 1.5)+ #adds a white point to show the median of the violin plot
scale_fill_manual(values = c("darkorchid4", "darkorange1"))+ #adds values for the fill by treatment
coord_flip()+ #flips the x and y coords to make plot horizontal
theme_minimal()+ #makes theme minimal
theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(), #removes y axis title, text, and tick lines
legend.position = "none", #removes legend
axis.line.x.bottom = element_line(color = "black", size = 1.5), #adds a thicker line across the bottom
plot.title = element_text(hjust = 0.5, face="bold"), #makes minor horizontal adjustment to plot title and makes it bold
panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(), #removes major y and minor x gridlines
panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+ #makes major x gridlines dashed grey and thicker
geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -4.5,
color = "darkorange1", size = 4.5)+ #adds label
geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -4.5,
color = "darkorchid4", size = 4.5)+ #adds label
scale_y_continuous(limits = c(0.56, 0.74), #min and max value for y axis which is now on the x axis
breaks = seq(0.56, 0.74, by = 0.02), #adds breaks with start and end and frequency
labels = seq(0.56, 0.74, by = 0.02))+ #adds labels with start and end and frequency
labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
) #titling the plot and the y axis
ggplot(data_long, aes(x = F1Performance, y = values))+
geom_jitter(aes(color = F1Performance), alpha = 0.8, size = 5,
position = position_jitter(width = 0.1))+
scale_color_manual(values = c("darkorchid4", "darkorange1"))+
geom_violinhalf(aes(fill = F1Performance), alpha = 0.5, size = 2,
draw_quantiles = c("0.25", "0.50", "0.75"), quantile.size = 2)+ #everything is the same except this function makes a half violin plot
stat_summary(fun = median, geom = "point", shape = 21, size = 3, fill = "white", color = "black",
stroke = 1.5)+
scale_fill_manual(values = c("darkorchid4", "darkorange1"))+
coord_flip()+
theme_minimal()+
theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
legend.position = "none",
axis.line.x.bottom = element_line(color = "black", size = 1.5),
plot.title = element_text(hjust = 0.5, face="bold"),
panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -4.5,
color = "darkorange1", size = 4.5)+
geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -4.5,
color = "darkorchid4", size = 4.5)+
scale_y_continuous(limits = c(0.56, 0.74),
breaks = seq(0.56, 0.74, by = 0.02),
labels = seq(0.56, 0.74, by = 0.02))+
labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
)
ggplot(data_long, aes(x = F1Performance, y = values))+ #same as violin with few key differences
geom_violin(aes(fill = F1Performance), alpha = 0.5, size = 1)+
scale_fill_manual(values = c("#298c8c", "#800074"))+ #different colors for plot
geom_boxplot(width = 0.3, color = "black", fill = "NA", size = 1)+ #boxplot on top of violin with transparent fill and black color
coord_flip()+
theme_minimal()+
theme(axis.title.y = element_blank(), axis.text.y = element_blank(), axis.ticks.y = element_blank(),
legend.position = "none",
axis.line.x.bottom = element_line(color = "black", size = 1.5),
plot.title = element_text(hjust = 0.5, face="bold"),
panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank(),
panel.grid.major.x = element_line(colour = "grey", linewidth = 1.5, linetype = "dashed"))+
geom_text(aes(x = "SVMWithGradCAMMaps", label = "SVM + GRAD-CAM++", y = 0.64), vjust = -5.8, color = "#800074", size = 4.5)+ #different colors to match
geom_text(aes(x = "SVMWithDeepShapMaps", label = "SVM + Deep SHAP", y = 0.59), vjust = -5.2, color = "#298c8c", size = 4.5)+ #different colors to match
scale_y_continuous(limits = c(0.56, 0.74),
breaks = seq(0.56, 0.74, by = 0.02),
labels = seq(0.56, 0.74, by = 0.02))+
labs(title = "Fig. 7. Grad-CAM++ saliency maps capture unique predicitve information", y = "F1"
)
data("USArrests") #loads the data
head(USArrests, n = 50) #pastes the first 50 rows
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
## Connecticut 3.3 110 77 11.1
## Delaware 5.9 238 72 15.8
## Florida 15.4 335 80 31.9
## Georgia 17.4 211 60 25.8
## Hawaii 5.3 46 83 20.2
## Idaho 2.6 120 54 14.2
## Illinois 10.4 249 83 24.0
## Indiana 7.2 113 65 21.0
## Iowa 2.2 56 57 11.3
## Kansas 6.0 115 66 18.0
## Kentucky 9.7 109 52 16.3
## Louisiana 15.4 249 66 22.2
## Maine 2.1 83 51 7.8
## Maryland 11.3 300 67 27.8
## Massachusetts 4.4 149 85 16.3
## Michigan 12.1 255 74 35.1
## Minnesota 2.7 72 66 14.9
## Mississippi 16.1 259 44 17.1
## Missouri 9.0 178 70 28.2
## Montana 6.0 109 53 16.4
## Nebraska 4.3 102 62 16.5
## Nevada 12.2 252 81 46.0
## New Hampshire 2.1 57 56 9.5
## New Jersey 7.4 159 89 18.8
## New Mexico 11.4 285 70 32.1
## New York 11.1 254 86 26.1
## North Carolina 13.0 337 45 16.1
## North Dakota 0.8 45 44 7.3
## Ohio 7.3 120 75 21.4
## Oklahoma 6.6 151 68 20.0
## Oregon 4.9 159 67 29.3
## Pennsylvania 6.3 106 72 14.9
## Rhode Island 3.4 174 87 8.3
## South Carolina 14.4 279 48 22.5
## South Dakota 3.8 86 45 12.8
## Tennessee 13.2 188 59 26.9
## Texas 12.7 201 80 25.5
## Utah 3.2 120 80 22.9
## Vermont 2.2 48 32 11.2
## Virginia 8.5 156 63 20.7
## Washington 4.0 145 73 26.2
## West Virginia 5.7 81 39 9.3
## Wisconsin 2.6 53 66 10.8
## Wyoming 6.8 161 60 15.6
USArrests$State <- rownames(USArrests) #making a new column for the state names
USArrests$AverageCrimeRate <- rowMeans(USArrests[c("Murder", "Assault", "Rape")], na.rm = TRUE) #makes a new column for the average crime rate with the mean of murder, assault, and rape column (by row)
USArrests$AverageCrimeRate <- round(USArrests$AverageCrimeRate, 2) #rounds the column to 2 decimal places
What are the variables available?
*The type of felony they were arrested for and the percent urban population for each state
How is each variable defined or calculated?
*Murder, Assault, and Rape are all arrests per 100,000 and Urban population is a percentage
Is each one numerical or categorical?
*they are all numerical
ggplot(USArrests, aes(x = Murder, y = Assault))+ #selects data frame, and the x and y axis
geom_point(color = "black")+ #makes scatter plot with the points being black
geom_smooth(method = lm, se = FALSE, color = "red")+ #adds a red linear regression line with no se
labs(
title = "Scatter Plot of Assault vs. Murder Rates",
x = "Murder Rate", y = "Assault Rate")+ #adds title plot and axis titles
theme_classic() #makes theme classic
ggplot(USArrests, aes(x = State, y = AverageCrimeRate, group = 1))+ #selects data frame and axis
geom_line(color = "#298c9c", size = 1)+ #makes line a color and size 1
geom_point(color = "#800074", size = 2.5)+ #makes points a color and size 2.5
theme_classic()+ #makes theme classic
theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.3), #rotates x axis text 90 degrees
panel.grid.major.x = element_line(), #adds major x grid lines
panel.grid.major.y = element_line(), #adds major y grid lines
)+
labs(
title = "Line Plot of Average Crime Rate by State",
x = "State", y = "Average Crime Rate"
) #adds plot and x and y axis titles
ggplot(mtcars, aes(x = mpg, y = hp)) + #selects data frame, and the x and y axis
geom_point(aes(color = cyl), size = 2.4, shape = 8) + #categorizes the color by number of cylinders
theme_minimal() + #makes theme minimal
theme(legend.position = "bottom") + #moves legend to the bottom
labs(
title = "Effect of Horsepower on Fuel Efficiency",
subtitle = "Categorized by Number of Cylinders",
x = "Horsepower", y = "Fuel Efficiency (MPG)"
) #adds title, subtitle, and axis titles