Homework 5
Setup Code
#==============================================================================#
# Setup Options
#==============================================================================#
# remove all objects if restarting script
rm(list=ls())
# options
options(
tibble.width = Inf, # print all columns
scipen = 999 # remove scientific notation
)
#==============================================================================#
# Install Packages
#==============================================================================#
# In R, we first have to download the packages we want from the online
# repository called CRAN.
# Once installed, you have to load it in each session with the library()
# function.
# download package
#install.packages("DT")
#install.packages("lubridate")
#install.packages("tidyverse")
#==============================================================================#
# Packages
#==============================================================================#
# Here we must load the packages for the current environment to make them
# accessible.
# load libraries
library(DT)
library(plotly)Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.1.3
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
library(cowplot)Warning: package 'cowplot' was built under R version 4.1.3
library(lubridate)Warning: package 'lubridate' was built under R version 4.1.3
Attaching package: 'lubridate'
The following object is masked from 'package:cowplot':
stamp
The following objects are masked from 'package:base':
date, intersect, setdiff, union
library(tidyverse)Warning: package 'tidyverse' was built under R version 4.1.3
Warning: package 'tibble' was built under R version 4.1.3
Warning: package 'tidyr' was built under R version 4.1.3
Warning: package 'readr' was built under R version 4.1.3
Warning: package 'purrr' was built under R version 4.1.3
Warning: package 'dplyr' was built under R version 4.1.3
Warning: package 'stringr' was built under R version 4.1.3
Warning: package 'forcats' was built under R version 4.1.3
-- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
v dplyr 1.1.2 v stringr 1.5.0
v forcats 1.0.0 v tibble 3.2.1
v purrr 1.0.1 v tidyr 1.3.0
v readr 2.1.4
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks plotly::filter(), stats::filter()
x dplyr::lag() masks stats::lag()
x lubridate::stamp() masks cowplot::stamp()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#==============================================================================#
# Set Paths
#==============================================================================#
# set all paths
path_main <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez"
path_data <- str_c(path_main, "Data/", sep="")
path_plots <- str_c(path_main, "Plots/", sep="")
path_scripts <- str_c(path_main, "Scripts/", sep="")
# NOTE: str_c() is from the stringr package, which is a part of 'tidyverse'
# this is equivalent to paste() in base R, but we'll try to use tidyverse
# functions when possible in this class.
# set working directory
#setwd(path_main)
# NOTE: We cannot set the working directory in quarto with this method
# as we do in an R script. Use the root.dir option in YAML.
#==============================================================================#
# Set Inputs
#==============================================================================#
# set file names here
data_file_milk <- "Sheep_Milk_Wide.csv"
data_file_wt <- "Sheep_Weight_Wide.csv"
#==============================================================================#
# Check session info again
#==============================================================================#
# check sessionInfo
sessionInfo()R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)
Matrix products: default
locale:
[1] LC_COLLATE=English_United States.1252
[2] LC_CTYPE=English_United States.1252
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C
[5] LC_TIME=English_United States.1252
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] forcats_1.0.0 stringr_1.5.0 dplyr_1.1.2 purrr_1.0.1
[5] readr_2.1.4 tidyr_1.3.0 tibble_3.2.1 tidyverse_2.0.0
[9] lubridate_1.9.2 cowplot_1.1.1 plotly_4.10.2 ggplot2_3.4.2
[13] DT_0.20
loaded via a namespace (and not attached):
[1] compiler_4.1.2 pillar_1.9.0 tools_4.1.2 digest_0.6.29
[5] timechange_0.2.0 viridisLite_0.4.0 jsonlite_1.8.4 evaluate_0.14
[9] lifecycle_1.0.3 gtable_0.3.0 pkgconfig_2.0.3 rlang_1.1.0
[13] cli_3.6.1 rstudioapi_0.15.0 yaml_2.2.1 xfun_0.29
[17] fastmap_1.1.0 httr_1.4.6 withr_2.5.0 knitr_1.37
[21] hms_1.1.3 generics_0.1.1 vctrs_0.6.1 htmlwidgets_1.5.4
[25] grid_4.1.2 tidyselect_1.2.0 glue_1.6.2 data.table_1.14.2
[29] R6_2.5.1 fansi_0.5.0 rmarkdown_2.11 tzdb_0.2.0
[33] magrittr_2.0.3 scales_1.2.1 htmltools_0.5.2 colorspace_2.0-2
[37] utf8_1.2.2 stringi_1.7.6 lazyeval_0.2.2 munsell_0.5.0
Question 1
Use read_delim() (must use) to read in both the milk and weight data from the sheep dataset we worked on in week 1. Please read them in how they are and do not alter them prior to reading in.
library(tidyverse)
library(car)Warning: package 'car' was built under R version 4.1.3
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
library(readr)
library(DT)
library(readr)
data_weigth <- read_delim(
file = "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\Sheep_Weight_Wide.csv",
delim = ",",
skip = 1,
col_names = c("EweType","Diet","Wk0","Wk1","Wk2", "Wk3","Wk4","Wk5","Wk6","Wk7"),
col_types = "ffnnnnnnnn", # Set Types
na = c(""))
datatable(data_weigth)data_milk <- read_delim(
file = "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\Sheep_Milk_Wide.csv",
delim = ",",
skip = 1,
col_names = c("EweType","Diet","Wk0","Wk1","Wk2", "Wk3","Wk4","Wk5","Wk6","Wk7"),
col_types = "ffnnnnnnnn", # Set Types
na = c(""))
datatable(data_milk)Question 2
Part a
We do not have a unique ID in this dataset, but later we will need to join the datasets.
Create a unique identifier for each row in the wide format datasets. I suggest issing an operator such as 1:nrow(data) to get a unique sequence and then convert to a factor or character variable.
Also use relocate() to move this new ID column to the 1st column.
data_weigth <- data_weigth %>%
mutate(ID = as.character(1:nrow(data_weigth)))
data_weigth <- data_weigth %>%
relocate(ID, .before = 1)
#now with the milk data set
data_milk <- data_milk %>%
mutate(ID = as.character(1:nrow(data_milk)))
data_milk <- data_weigth %>%
relocate(ID, .before = 1)Part b
Please pivot the milk dataset to the current ‘wide’ format to the ‘long’ format. I suggest saving this dataset as another name for later and then printing.
data_long_milk <- data_milk %>%
pivot_longer(
cols = Wk0:Wk7,
names_to = "Week",
values_to = "MilkProduction"
)
print(data_long_milk)# A tibble: 240 x 5
ID EweType Diet Week MilkProduction
<chr> <fct> <fct> <chr> <dbl>
1 1 1 1 Wk0 67.5
2 1 1 1 Wk1 68
3 1 1 1 Wk2 68
4 1 1 1 Wk3 66.5
5 1 1 1 Wk4 69.5
6 1 1 1 Wk5 69.5
7 1 1 1 Wk6 72
8 1 1 1 Wk7 77.5
9 2 1 1 Wk0 86
10 2 1 1 Wk1 88
# i 230 more rows
Part c
Please pivot the weight dataset to the current ‘wide’ format to the ‘long’ format. I suggest saving this dataset as another name for later and then printing.
data_long_weigth <- data_weigth %>%
pivot_longer(
cols = Wk0:Wk7,
names_to = "Week",
values_to = "Weight"
)
print(data_long_weigth)# A tibble: 240 x 5
ID EweType Diet Week Weight
<chr> <fct> <fct> <chr> <dbl>
1 1 1 1 Wk0 67.5
2 1 1 1 Wk1 68
3 1 1 1 Wk2 68
4 1 1 1 Wk3 66.5
5 1 1 1 Wk4 69.5
6 1 1 1 Wk5 69.5
7 1 1 1 Wk6 72
8 1 1 1 Wk7 77.5
9 2 1 1 Wk0 86
10 2 1 1 Wk1 88
# i 230 more rows
Question 3
Part a
Take the long milk data produced in question 2 and make histogram of Milk for week 0. Change the bar color. Save this plot for later.
Print with ggplotly() from the plotly package.
data_long_milk0 <- data_long_milk %>%
filter(Week == "Wk0") %>%
ggplot(aes(x = MilkProduction)) +
geom_histogram(fill = "#4D6291", color = "white")
ggplotly(data_long_milk0)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Part b
Take the long milk data produced in question 2 and make histogram of Milk for week 7. Change the bar color to another unique color. Save this plot for later.
Print with ggplotly() from the plotly package.
data_long_milk7 <- data_long_milk %>%
filter(Week == "Wk7") %>%
ggplot(aes(x = MilkProduction)) +
geom_histogram(fill = "#BD6C99", color = "#F3A469")
ggplotly(data_long_milk7)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Part c
Take the long weight data produced in question 2 and make histogram of Milk for week 0. Change the bar color to another unique color. Save this plot for later.
Print with ggplotly() from the plotly package.
data_long_weigth0 <- data_long_weigth %>%
filter(Week == "Wk0") %>%
ggplot(aes(x=Weight)) +
geom_histogram(fill = "#CCEDB1", color = "black")
ggplotly(data_long_weigth0)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Part d
y Take the long weight data produced in question 2 and make histogram of Milk for week 7. Change the bar color to another unique color. Save this plot for later.
Print with ggplotly() from the plotly package.
data_long_weigth7 <- data_long_weigth %>%
filter(Week == "Wk7") %>%
ggplot(aes(x=Weight)) +
geom_histogram(fill = "#EEC9E5", color = "black")
ggplotly(data_long_weigth7)`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Part e
Use cowplot to plot all 4 into one plot. All 4 should have different bar colors. Use labels “A”, “B”, “C”, “D” for parts A-D respectively in the plot. A and B are from milk and C and D are from weight.
Don’t use ggplotly()
library(ggplot2)
library(cowplot)
pA <- data_long_milk0
pB <- data_long_milk7
pC <- data_long_weigth0
pD <- data_long_weigth7
plot_grid(pA, pB, pC, pD,
nrow = 2,
labels = c('A', 'B', 'C','D'))`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Question 4
Part a
Please join the 2 long datasets you created in question 2. Use what columns you need.
data_long_milk1 <- data_long_milk %>%
select(ID, EweType, Diet, Week, MilkProduction)
datatable(data_long_milk1)data_long_wg1 <- data_long_weigth %>%
select(ID, EweType, Diet, Week, Weight)
datatable(data_long_wg1)data_joined <- inner_join(data_long_milk1, data_long_wg1, by = c("ID", "EweType", "Diet", "Week"))
data_joined# A tibble: 240 x 6
ID EweType Diet Week MilkProduction Weight
<chr> <fct> <fct> <chr> <dbl> <dbl>
1 1 1 1 Wk0 67.5 67.5
2 1 1 1 Wk1 68 68
3 1 1 1 Wk2 68 68
4 1 1 1 Wk3 66.5 66.5
5 1 1 1 Wk4 69.5 69.5
6 1 1 1 Wk5 69.5 69.5
7 1 1 1 Wk6 72 72
8 1 1 1 Wk7 77.5 77.5
9 2 1 1 Wk0 86 86
10 2 1 1 Wk1 88 88
# i 230 more rows
Question 5
Part a
Use the dataset from Question 4 (should have milk and weight data in long format).
Use a for loop to loop over all 3 diets.
- Print a message with the current diet number
- Print a scatterplot of Milk production on Weight for each diet
- Add a linear line to the scatterplot
- Add at minimum a title and subtitle, the subtitle should change with each diet (1,2,3)
unique_diets <- unique(data_joined$Diet)
scatterplots <- list()
for (diet in unique_diets) {
diet_data <- subset(data.joined, Diet == diet)
plot <- ggplot(diet_data, aes(x = Weight, y = MilkProduction)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE) +
labs(
title = paste("Scatterplot of Milk Production on Weight (Diet", diet, ")"),
subtitle = paste("Diet Number:", diet)
)
print(plot)
scatterplots[[diet]] <- plot
}Error in subset(data.joined, Diet == diet): object 'data.joined' not found