Homework 3

Subtitle Here

Author

Your Name

Published

September 20, 2023

Setup Code

#==============================================================================#
# Setup Options
#==============================================================================#

# remove all objects if restarting script
rm(list=ls())

# options
options(
  tibble.width = Inf,  # print all columns
  scipen = 999         # remove scientific notation
)

#==============================================================================#
# Install Packages
#==============================================================================#

# In R, we first have to download the packages we want from the online 
# repository called CRAN. 

# Once installed, you have to load it in each session with the library() 
# function. 

# download package
#install.packages("DT")
#install.packages("lubridate")
#install.packages("tidyverse")

#==============================================================================#
# Packages
#==============================================================================#

# Here we must load the packages for the current environment to make them
# accessible. 

# load libraries
library(DT)
library(lubridate)

Warning: package 'lubridate' was built under R version 4.1.3


Attaching package: 'lubridate'

The following objects are masked from 'package:base':

    date, intersect, setdiff, union

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.1.3

Warning: package 'ggplot2' was built under R version 4.1.3

Warning: package 'tibble' was built under R version 4.1.3

Warning: package 'tidyr' was built under R version 4.1.3

Warning: package 'readr' was built under R version 4.1.3

Warning: package 'purrr' was built under R version 4.1.3

Warning: package 'dplyr' was built under R version 4.1.3

Warning: package 'stringr' was built under R version 4.1.3

Warning: package 'forcats' was built under R version 4.1.3

-- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
v dplyr   1.1.2     v readr   2.1.4
v forcats 1.0.0     v stringr 1.5.0
v ggplot2 3.4.2     v tibble  3.2.1
v purrr   1.0.1     v tidyr   1.3.0

-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

#==============================================================================#
# Set Paths
#==============================================================================#

# set all paths
path_main    <- "C:\\Users\\anune\\Downloads\\ANS500AB"
path_data    <- str_c(path_main, "Data/", sep="")
path_plots   <- str_c(path_main, "Plots/", sep="")
path_scripts <- str_c(path_main, "Scripts/", sep="")

# NOTE: str_c() is from the stringr package, which is a part of 'tidyverse'
# this is equivalent to paste() in base R, but we'll try to use tidyverse
# functions when possible in this class. 

# set working directory
#setwd(path_main)

# NOTE: We cannot set the working directory in quarto with this method
# as we do in an R script. Use the root.dir option in YAML. 

#==============================================================================#
# Set Inputs
#==============================================================================#



#==============================================================================#
# Check session info again
#==============================================================================#

# check sessionInfo
sessionInfo()

R version 4.1.2 (2021-11-01)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 22621)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] forcats_1.0.0   stringr_1.5.0   dplyr_1.1.2     purrr_1.0.1    
 [5] readr_2.1.4     tidyr_1.3.0     tibble_3.2.1    ggplot2_3.4.2  
 [9] tidyverse_2.0.0 lubridate_1.9.2 DT_0.20        

loaded via a namespace (and not attached):
 [1] compiler_4.1.2    pillar_1.9.0      tools_4.1.2       digest_0.6.29    
 [5] jsonlite_1.8.4    evaluate_0.14     lifecycle_1.0.3   gtable_0.3.0     
 [9] timechange_0.2.0  pkgconfig_2.0.3   rlang_1.1.0       cli_3.6.1        
[13] rstudioapi_0.15.0 yaml_2.2.1        xfun_0.29         fastmap_1.1.0    
[17] withr_2.5.0       knitr_1.37        hms_1.1.3         generics_0.1.1   
[21] vctrs_0.6.1       htmlwidgets_1.5.4 grid_4.1.2        tidyselect_1.2.0 
[25] glue_1.6.2        R6_2.5.1          fansi_0.5.0       rmarkdown_2.11   
[29] tzdb_0.2.0        magrittr_2.0.3    scales_1.2.1      htmltools_0.5.2  
[33] colorspace_2.0-2  utf8_1.2.2        stringi_1.7.6     munsell_0.5.0

Question 1

Part a

library(tidyverse)
library(car)

Warning: package 'car' was built under R version 4.1.3

Loading required package: carData


Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some

library(readr)
library(DT)
file_path <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220726.csv"
#data_20220726 <- read.csv("CowManager_Iowa_State_University_20220722.csv", colClasses = c(NA, "numeric"))
data_20220726 <- read_delim(
  file  = paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220726.csv"),  # path to file
  delim = "|",  # | separated (fields/columns)
  skip  = 1,        # Skip the first line (we will give col names)
  col_names = c("id", "Cow","time", "count", "notactive","ruminating", 
                "eating", "active", "high_active","temp"), 
  col_types = "nncnnnnnnn",
  na        = c("missing"),
  locale = locale(encoding = "UTF-8") #without this part it was only printing one colum

)

Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)

data_20220726 <- data_20220726 %>%
  mutate(time =as.POSIXct(time, tz = "UTC", format = "%Y%m%d %H:%M:%OS") - hours(6))


head(data_20220726)

# A tibble: 6 x 10
     id   Cow time                count notactive ruminating eating active
  <dbl> <dbl> <dttm>              <dbl>     <dbl>      <dbl>  <dbl>  <dbl>
1 12608 12608 NA                     60         1          0     16      7
2 12609 12609 NA                     59        18         10      8     12
3 12624 12624 2022-07-25 15:00:00    60         1          4     28      3
4 12537 12537 2022-07-25 15:00:00    60         0          1     38      2
5 12181 12181 2022-07-25 15:00:00    61         3          8     40      4
6 12567 12567 2022-07-25 15:00:00    60         5          2     18      7
  high_active  temp
        <dbl> <dbl>
1          36  NA  
2          11  NA  
3          24  32  
4          19  31.1
5           6  31.8
6          28  31.1

datatable(data_20220726)

Part b

library(readr)
file_path <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220725.csv"

data_20220725 <- read_delim(
  file  = paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220725.csv"),  # path to file
  delim = "|",   # | separated (fields/columns))
# skip  = 1,        # Skip the first line (we will give col names)
  col_names = c("id", "Cow","time", "count", "notactive","ruminating", 
                "eating", "active", "high_active","temp"), 
  col_types = "nncnnnnnnn",
  na        = c("missing"),
  locale = locale(encoding = "UTF-8") #without this part it was only printing one colum

)

data_20220725 <- data_20220725 %>%
  mutate(time =as.POSIXct(time, tz = "UTC", format = "%Y%m%d %H:%M:%OS") - hours(6))
datatable(data_20220725)

Part c

library(readr)
file_path <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220724.csv"

data_20220724 <- read_delim(
  file  = paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220724.csv"),  # path to file
  delim = "," , #comma separated (fields/columns)
# skip  = 1,        # Skip the first line (we will give col names)
  col_names = c("id", "Cow","time", "count", "notactive","ruminating", 
                "eating", "active", "high_active","temp"), 
  col_types = "nncnnnnnnn",
  na        = c("missing"),
  locale = locale(encoding = "UTF-8") #without this part it was only printing one colum

)

Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
  dat <- vroom(...)
  problems(dat)

data_20220724 <- data_20220724 %>%
  mutate(time =as.POSIXct(time, tz = "UTC", format = "%Y%m%d %H:%M:%OS") - hours(6))
head(data_20220724)

# A tibble: 6 x 10
     id   Cow time                count notactive ruminating eating active
  <dbl> <dbl> <dttm>              <dbl>     <dbl>      <dbl>  <dbl>  <dbl>
1    NA    NA NA                     NA        NA         NA     NA     NA
2 11643 11643 2022-07-16 16:00:00    60        60          0      0      0
3 11643 11643 2022-07-17 06:00:00    60        60          0      0      0
4 11643 11643 2022-07-17 07:00:00    60        60          0      0      0
5 12558 12558 2022-07-23 05:00:00    61        21         30      0      9
6 12615 12615 2022-07-23 05:00:00    60        12         42      0      5
  high_active  temp
        <dbl> <dbl>
1          NA  NA  
2           0  23.0
3           0  22.6
4           0  22.7
5           1  27.7
6           1  29.8

datatable(data_20220724)

Part d

library(readr)
file_path <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220723.csv"

data_20220723 <- read_delim(
  file  = paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220723.csv"),  # path to file
  delim = "|",#comma separated (fields/columns)
 skip  = 6,# skip 6 first lines
  col_names = c("id", "Cow","time", "count", "notactive","ruminating", 
                "eating", "active", "high_active","temp"), 
  col_types = "nncnnnnnnn",
  na        = c("missing"),
  locale = locale(encoding = "UTF-8") #without this part it was only printing one colum

)
data_20220723 <- data_20220723 %>%
  mutate(time =as.POSIXct(time, tz = "UTC", format = "%Y%m%d %H:%M:%OS") - hours(6))
datatable(data_20220723)

Part e

library(readr)
file_path <- "C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220722.csv"
#data1 <- read.csv("CowManager_Iowa_State_University_20220722.csv", colClasses = c(NA, "numeric"))
data_20220722 <- read_delim(
  file  = paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220722.csv"),  # path to file
  delim = "|",  # | separated (fields/columns)
  skip  = 1,        # Skip the first line (we will give col names)
  col_names = c("id", "Cow","time", "count", "notactive","ruminating", 
                "eating", "active", "high_active","temp"), 
  col_types = "nncnnnnnnn",
  na        = c("missing"),
  locale = locale(encoding = "UTF-8") #without this part it was only printing one colum

)

data_20220722 <- data_20220722 %>%
  mutate(time =as.POSIXct(time, tz = "UTC", format = "%Y%m%d %H:%M:%OS") - hours(6))
datatable(data_20220722)

Question 2

Part a

library(ggplot2)



# simple bar chart - count first
data_20220722 %>%
  count(Cow) %>%                # count by line first
ggplot(., aes(x=Cow, y=n)) +    # aes maps x axis -> Line
  geom_col()

Part b

data_20220722 %>%
  count(Cow) %>%                # count by line first
ggplot(., aes(x=Cow, y=n)) +    # aes maps x axis -> Line
  geom_col(width = 0.5)

Part c

# simple bar chart - count first
data_20220722 %>%
  count(count) %>%                # count by line first
ggplot(., aes(x=count, y=n)) +    # aes maps x axis -> Line
  geom_col()

Part d

data_20220722 %>%
  count(count) %>%                # count by line first
ggplot(., aes(x=count, y=n)) +    # aes maps x axis -> Line
  geom_col(fill  = "dodgerblue3")

Part e

data_20220722 %>%
  count(count) %>%                # count by line first
ggplot(., aes(x=count, y=n)) +    # aes maps x axis -> Line
  geom_col(width = 0.4, fill = "dodgerblue3") +
  labs(
    title    = "Measurement per hour",
    subtitle = "Ames, Iowa, ISU Dairy, July 22, 2022",
    x        = "Measurement Count",
    y        = "n",
    caption  = "Number of measurements where the hour behavior is based on."
  ) +
  theme_dark() +
  theme(
    axis.title = element_text(size=12),
    axis.text  = element_text(size=9, color="darkgray"),
    axis.ticks = element_line(size=5),
    plot.title = element_text(size=20, color="darkblue")
  )

Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
i Please use the `linewidth` argument instead.

Part f

# simple histogram
data_20220722 %>%
ggplot(., aes(x=temp)) +    # aes maps x axis temp
  geom_histogram(fill="#93ABE1", color="white")  # refer to R colors palettes

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Part g

data_20220722 %>%
  ggplot(aes(x = temp)) +
  geom_histogram(fill = "#BD67B1", color = "white") + # refer to R colors palettes
  theme_minimal() + # Use a minimal theme for the plot
  labs(
    title    = "Temperature histogram",
    subtitle = "Ames, Iowa, ISU Dairy, July 22, 2022",
    x        = "Temperature in Celcius degrees",
    y        = "n",
    caption  = "Average ear temperature of the animal per hour, checking normalized data for Iowa State University."
  ) +
  theme(
    axis.title = element_text(size=12),
    axis.text  = element_text(size=9, color="darkgray"),
    axis.ticks = element_line(size=5),
    plot.title = element_text(size=20, color="darkblue")
  )

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.