Homework 4
Setup Code
#==============================================================================#
# Setup Options
#==============================================================================#
# remove all objects if restarting script
rm(list=ls())
# options
options(
tibble.width = Inf, # print all columns
scipen = 999 # remove scientific notation
)
#==============================================================================#
# Install Packages
#==============================================================================#
# In R, we first have to download the packages we want from the online
# repository called CRAN.
# Once installed, you have to load it in each session with the library()
# function.
# download package
#install.packages("DT")
#install.packages("lubridate")
#install.packages("tidyverse")
#==============================================================================#
# Packages
#==============================================================================#
# Here we must load the packages for the current environment to make them
# accessible.
# load libraries
library(DT)
library(lubridate)Warning: package 'lubridate' was built under R version 4.1.3
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
library(tidyverse)Warning: package 'tidyverse' was built under R version 4.1.3
Warning: package 'ggplot2' was built under R version 4.1.3
Warning: package 'tibble' was built under R version 4.1.3
Warning: package 'tidyr' was built under R version 4.1.3
Warning: package 'readr' was built under R version 4.1.3
Warning: package 'purrr' was built under R version 4.1.3
Warning: package 'dplyr' was built under R version 4.1.3
Warning: package 'stringr' was built under R version 4.1.3
Warning: package 'forcats' was built under R version 4.1.3
-- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
v dplyr 1.1.2 v readr 2.1.4
v forcats 1.0.0 v stringr 1.5.0
v ggplot2 3.4.2 v tibble 3.2.1
v purrr 1.0.1 v tidyr 1.3.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#==============================================================================#
# Set Paths
#==============================================================================#
# set all paths
path_main <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\swine_data_MSU.csv"
path_data <- str_c(path_main, "Data/", sep="")
path_plots <- str_c(path_main, "Plots/", sep="")
path_scripts <- str_c(path_main, "Scripts/", sep="")
# NOTE: str_c() is from the stringr package, which is a part of 'tidyverse'
# this is equivalent to paste() in base R, but we'll try to use tidyverse
# functions when possible in this class.
# set working directory
#setwd(path_main)
# NOTE: We cannot set the working directory in quarto with this method
# as we do in an R script. Use the root.dir option in YAML.
#==============================================================================#
# Set Inputs
#==============================================================================#
# set file names here
data_file <- "swine_data_MSU.csv"
#==============================================================================#
# Check session info again
#==============================================================================#
# check sessionInfo
sessionInfo()R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
[5] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 ggplot2_3.4.2
[9] tidyverse_2.0.0 lubridate_1.9.2 DT_0.20
loaded via a namespace (and not attached):
[1] compiler_4.1.2 pillar_1.9.0 tools_4.1.2 digest_0.6.29
[5] jsonlite_1.8.4 evaluate_0.14 lifecycle_1.0.3 gtable_0.3.0
[9] timechange_0.2.0 pkgconfig_2.0.3 rlang_1.1.0 cli_3.6.1
[13] rstudioapi_0.15.0 yaml_2.2.1 xfun_0.29 fastmap_1.1.0
[17] withr_2.5.0 knitr_1.37 hms_1.1.3 generics_0.1.1
[21] vctrs_0.6.1 htmlwidgets_1.5.4 grid_4.1.2 tidyselect_1.2.0
[25] glue_1.6.2 R6_2.5.1 fansi_0.5.0 rmarkdown_2.11
[29] tzdb_0.2.0 magrittr_2.0.3 scales_1.2.1 htmltools_0.5.2
[33] colorspace_2.0-2 utf8_1.2.2 stringi_1.7.6 munsell_0.5.0
Question 1
Read in the MSU swine dataset. You can find in the Data/ folder.
library(tidyverse)
library(car)Warning: package 'car' was built under R version 4.1.3
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
library(readr)
library(DT)
library(readr)
data_MSU <- read_delim(
file = "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\swine_data_MSU.csv",
delim = ",",
skip = 1,
col_names = c("ID", "wt_birth", "car_wt", "num_ribs", "car_bf10", "car_lma",
"sex", "perc_duroc", "sire", "dam", "litter", "slgdt_cd"),
col_types = "cddindfiic"
)Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
datatable(data_MSU)Question 2
Part a
Start with a simple bar chart counting the number of offspring per sire. Change all the bar colors to your favorite color.
data_MSU %>%
count(sire, sex) %>% # count by line first
ggplot(., aes(x=sire, y=n, fill = sex)) + # aes maps x axis -> Line
geom_col(fill = "#93ABE1") Part b
Copy part a and change the fill color by Sex.
data_MSU %>%
count(sire, sex) %>%
ggplot(aes(x = sire, y = n, fill = sex)) +
geom_col() +
scale_fill_manual(values = c("M" = "red", "F" = "blue"))Part c
Copy part b and change the fill color to your two favorite colors (by sex).
data_MSU %>%
count(sire, sex) %>%
ggplot(aes(x = sire, y = n, fill = sex)) +
geom_col() +
scale_fill_manual(values = c("M" = "#93ABE1", "F" = "#FFA07A"))Part d
Copy part c and now facet by Sex (along with the fill by Sex).
data_MSU %>%
count(sire, sex) %>%
ggplot(aes(x = sire, y = n, fill = sex)) +
geom_col() +
scale_fill_manual(values = c("M" = "#93ABE1", "F" = "#FFA07A")) +
facet_wrap(~ sex)Part e
Copy part d and now add all 5 labels to the plot.
data_MSU %>%
count(sire, sex) %>%
ggplot(aes(x = sire, y = n, fill = sex)) +
geom_col() +
scale_fill_manual(values = c("M" = "#537a59", "F" = "#FFA07A")) +
facet_wrap(~ sex) +
labs(
title = "Bar chart of offspring per sire",
subtitle = "MSU swine data set",
x = "Sire",
y = "Count (n)",
caption = "Count the numer of offspring per sire utilizing the MSU swine data set"
)Part f
Copy part e and change the color of the title to your favorite color. The theme option you need is plot.title.
data_MSU %>%
count(sire, sex) %>%
ggplot(aes(x = sire, y = n, fill = sex)) +
geom_col() +
scale_fill_manual(values = c("M" = "#537a59", "F" = "#FFA07A")) +
facet_wrap(~ sex) +
labs(
title = "Bar chart of offspring per sire",
subtitle = "MSU swine data set",
x = "Sire",
y = "Count (n)",
caption = "Count the numer of offspring per sire utilizing the MSU swine data set"
)+
theme(plot.title = element_text(size=12, color="darkblue")
)Question 3
Part a
Please create a simple scatterplot of carcass LMA on carcass BF.
data_MSU %>%
fill(car_bf10, car_lma) %>%
ggplot(aes(x = car_bf10, y = car_lma)) +
geom_point()Part b
Copy part a and now color the points by sex.
data_MSU %>%
fill(car_bf10, car_lma) %>%
ggplot(aes(x = car_bf10, y = car_lma, color = sex)) +
geom_point() +
scale_color_manual(values = c("M" = "#537a59", "F" = "#FFA07A")) Part c
Copy part b and add a smooth line (will be 2 separate lines).
data_MSU %>%
ggplot(aes(x = car_bf10, y = car_lma, color = sex)) +
geom_point() +
geom_smooth(formula = y ~ x, se = FALSE, aes(group = sex), color = "purple", size = 1, linetype = "solid") +
scale_color_manual(values = c("M" = "#537a59", "F" = "#FFA07A"))Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
i Please use `linewidth` instead.
`geom_smooth()` using method = 'loess'
Warning: Removed 13 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 13 rows containing missing values (`geom_point()`).
Part d
Copy part c and change the lines to linear (straight) lines.
data_MSU %>%
ggplot(aes(x = car_bf10, y = car_lma, color = sex)) +
geom_point() +
geom_smooth(method = "lm", formula = y ~ x, se = FALSE, aes(group = sex), color = "purple", size = 1, linetype = "solid") +
scale_color_manual(values = c("M" = "#537a59", "F" = "#FFA07A"))Warning: Removed 13 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 13 rows containing missing values (`geom_point()`).
Part e
Copy part d and change the point shapes by Sire.
data_MSU$sire <- as.factor(data_MSU$sire)
data_MSU %>%
ggplot(aes(x = car_bf10, y = car_lma, color = sex, shape = sire)) +
geom_point() +
stat_smooth(method = "lm", formula = y ~ x, se = FALSE, aes(group = sex), color = "purple", size = 2, linetype = "solid") +
scale_color_manual(values = c("M" = "#537a59", "F" = "#FFA07A"))Warning: Removed 13 rows containing non-finite values (`stat_smooth()`).
Warning: The following aesthetics were dropped during statistical transformation: shape
i This can happen when ggplot fails to infer the correct grouping structure in
the data.
i Did you forget to specify a `group` aesthetic or to convert a numerical
variable into a factor?
Warning: Removed 13 rows containing missing values (`geom_point()`).
Question 4
Part a
Create a boxplot of birth weight on sire.
data_MSU %>%
ggplot(., aes(x=sire, y= wt_birth)) +
geom_boxplot()Part b
Copy part a and change the fill and outline colors of the boxplots.
data_MSU %>%
ggplot(aes(x = sire, y = wt_birth)) +
geom_boxplot(fill = "#6B4226", color = "#2E86C1")Part c
Copy part b and facet by sex.
data_MSU %>%
ggplot(aes(x = sire, y = wt_birth)) +
geom_boxplot(fill = "#6B4226", color = "#2E86C1") +
facet_wrap(~ sex)Question 5
Part a
Start by making a table of the average backfat by sire as well as the SD.
data_MSU %>%
group_by(sire) %>%
summarise(
AvgBF = mean(car_bf10, na.rm=TRUE),
SD = sd(car_bf10, na.rm=TRUE)
) # A tibble: 6 x 3
sire AvgBF SD
<fct> <dbl> <dbl>
1 311 27.1 7.66
2 326 24.7 8.00
3 328 23.8 7.20
4 330 24.9 6.34
5 331 25.9 7.70
6 335 20.8 5.74
Part b
Now plot the average with SD in a bar chart.
data_MSU %>%
group_by(sire) %>%
summarise(
AvgBF = mean(car_bf10, na.rm = TRUE),
SD = sd(car_bf10, na.rm = TRUE)
) %>%
ggplot(aes(x = sire, y = AvgBF)) +
geom_bar(stat = "identity", fill = "#537a59", color = "black", width = 0.6) +
geom_errorbar(aes(ymin = AvgBF - SD, ymax = AvgBF + SD), width = 0.2, color = "blue")