#==============================================================================## Setup Options#==============================================================================## remove all objects if restarting scriptrm(list=ls())# optionsoptions(tibble.width =Inf, # print all columnsscipen =999# remove scientific notation)#==============================================================================## Install Packages#==============================================================================## In R, we first have to download the packages we want from the online # repository called CRAN. # Once installed, you have to load it in each session with the library() # function. # download package#install.packages("DT")#install.packages("lubridate")#install.packages("tidyverse")#==============================================================================## Packages#==============================================================================## Here we must load the packages for the current environment to make them# accessible. # load librarieslibrary(DT)library(lubridate)
Warning: package 'lubridate' was built under R version 4.1.3
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
library(tidyverse)
Warning: package 'tidyverse' was built under R version 4.1.3
Warning: package 'ggplot2' was built under R version 4.1.3
Warning: package 'tibble' was built under R version 4.1.3
Warning: package 'tidyr' was built under R version 4.1.3
Warning: package 'readr' was built under R version 4.1.3
Warning: package 'purrr' was built under R version 4.1.3
Warning: package 'dplyr' was built under R version 4.1.3
Warning: package 'stringr' was built under R version 4.1.3
Warning: package 'forcats' was built under R version 4.1.3
-- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
v dplyr 1.1.2 v readr 2.1.4
v forcats 1.0.0 v stringr 1.5.0
v ggplot2 3.4.2 v tibble 3.2.1
v purrr 1.0.1 v tidyr 1.3.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#==============================================================================## Set Paths#==============================================================================## set all pathspath_main <-"C:\\Users\\anune\\Downloads\\ANS500AB"path_data <-str_c(path_main, "Data/", sep="")path_plots <-str_c(path_main, "Plots/", sep="")path_scripts <-str_c(path_main, "Scripts/", sep="")# NOTE: str_c() is from the stringr package, which is a part of 'tidyverse'# this is equivalent to paste() in base R, but we'll try to use tidyverse# functions when possible in this class. # set working directory#setwd(path_main)# NOTE: We cannot set the working directory in quarto with this method# as we do in an R script. Use the root.dir option in YAML. #==============================================================================## Set Inputs#==============================================================================##==============================================================================## Check session info again#==============================================================================## check sessionInfosessionInfo()
Warning: package 'car' was built under R version 4.1.3
Loading required package: carData
Attaching package: 'car'
The following object is masked from 'package:dplyr':
recode
The following object is masked from 'package:purrr':
some
library(readr)library(DT)file_path <-"C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220726.csv"#data_20220726 <- read.csv("CowManager_Iowa_State_University_20220722.csv", colClasses = c(NA, "numeric"))data_20220726 <-read_delim(file =paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220726.csv"), # path to filedelim ="|", # | separated (fields/columns)skip =1, # Skip the first line (we will give col names)col_names =c("id", "Cow","time", "count", "notactive","ruminating", "eating", "active", "high_active","temp"), col_types ="nncnnnnnnn",na =c("missing"),locale =locale(encoding ="UTF-8") #without this part it was only printing one colum)
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
library(readr)file_path <-"C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220725.csv"data_20220725 <-read_delim(file =paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220725.csv"), # path to filedelim ="|", # | separated (fields/columns))# skip = 1, # Skip the first line (we will give col names)col_names =c("id", "Cow","time", "count", "notactive","ruminating", "eating", "active", "high_active","temp"), col_types ="nncnnnnnnn",na =c("missing"),locale =locale(encoding ="UTF-8") #without this part it was only printing one colum)data_20220725 <- data_20220725 %>%mutate(time =as.POSIXct(time, tz ="UTC", format ="%Y%m%d %H:%M:%OS") -hours(6))datatable(data_20220725)
Part c
library(readr)file_path <-"C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220724.csv"data_20220724 <-read_delim(file =paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220724.csv"), # path to filedelim ="," , #comma separated (fields/columns)# skip = 1, # Skip the first line (we will give col names)col_names =c("id", "Cow","time", "count", "notactive","ruminating", "eating", "active", "high_active","temp"), col_types ="nncnnnnnnn",na =c("missing"),locale =locale(encoding ="UTF-8") #without this part it was only printing one colum)
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
# A tibble: 6 x 10
id Cow time count notactive ruminating eating active
<dbl> <dbl> <dttm> <dbl> <dbl> <dbl> <dbl> <dbl>
1 NA NA NA NA NA NA NA NA
2 11643 11643 2022-07-16 16:00:00 60 60 0 0 0
3 11643 11643 2022-07-17 06:00:00 60 60 0 0 0
4 11643 11643 2022-07-17 07:00:00 60 60 0 0 0
5 12558 12558 2022-07-23 05:00:00 61 21 30 0 9
6 12615 12615 2022-07-23 05:00:00 60 12 42 0 5
high_active temp
<dbl> <dbl>
1 NA NA
2 0 23.0
3 0 22.6
4 0 22.7
5 1 27.7
6 1 29.8
datatable(data_20220724)
Part d
library(readr)file_path <-"C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220723.csv"data_20220723 <-read_delim(file =paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220723.csv"), # path to filedelim ="|",#comma separated (fields/columns)skip =6,# skip 6 first linescol_names =c("id", "Cow","time", "count", "notactive","ruminating", "eating", "active", "high_active","temp"), col_types ="nncnnnnnnn",na =c("missing"),locale =locale(encoding ="UTF-8") #without this part it was only printing one colum)data_20220723 <- data_20220723 %>%mutate(time =as.POSIXct(time, tz ="UTC", format ="%Y%m%d %H:%M:%OS") -hours(6))datatable(data_20220723)
Part e
library(readr)file_path <-"C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220722.csv"#data1 <- read.csv("CowManager_Iowa_State_University_20220722.csv", colClasses = c(NA, "numeric"))data_20220722 <-read_delim(file =paste0("C:\\Users\\anune\\Downloads\\ANS500AB\\AndreaNunez\\CowManager_Iowa_State_University_20220722.csv"), # path to filedelim ="|", # | separated (fields/columns)skip =1, # Skip the first line (we will give col names)col_names =c("id", "Cow","time", "count", "notactive","ruminating", "eating", "active", "high_active","temp"), col_types ="nncnnnnnnn",na =c("missing"),locale =locale(encoding ="UTF-8") #without this part it was only printing one colum)data_20220722 <- data_20220722 %>%mutate(time =as.POSIXct(time, tz ="UTC", format ="%Y%m%d %H:%M:%OS") -hours(6))datatable(data_20220722)
Question 2
Part a
library(ggplot2)# simple bar chart - count firstdata_20220722 %>%count(Cow) %>%# count by line firstggplot(., aes(x=Cow, y=n)) +# aes maps x axis -> Linegeom_col()
Part b
data_20220722 %>%count(Cow) %>%# count by line firstggplot(., aes(x=Cow, y=n)) +# aes maps x axis -> Linegeom_col(width =0.5)
Part c
# simple bar chart - count firstdata_20220722 %>%count(count) %>%# count by line firstggplot(., aes(x=count, y=n)) +# aes maps x axis -> Linegeom_col()
Part d
data_20220722 %>%count(count) %>%# count by line firstggplot(., aes(x=count, y=n)) +# aes maps x axis -> Linegeom_col(fill ="dodgerblue3")
Part e
data_20220722 %>%count(count) %>%# count by line firstggplot(., aes(x=count, y=n)) +# aes maps x axis -> Linegeom_col(width =0.4, fill ="dodgerblue3") +labs(title ="Measurement per hour",subtitle ="Ames, Iowa, ISU Dairy, July 22, 2022",x ="Measurement Count",y ="n",caption ="Number of measurements where the hour behavior is based on." ) +theme_dark() +theme(axis.title =element_text(size=12),axis.text =element_text(size=9, color="darkgray"),axis.ticks =element_line(size=5),plot.title =element_text(size=20, color="darkblue") )
Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
i Please use the `linewidth` argument instead.
Part f
# simple histogramdata_20220722 %>%ggplot(., aes(x=temp)) +# aes maps x axis tempgeom_histogram(fill="#93ABE1", color="white") # refer to R colors palettes
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Part g
data_20220722 %>%ggplot(aes(x = temp)) +geom_histogram(fill ="#BD67B1", color ="white") +# refer to R colors palettestheme_minimal() +# Use a minimal theme for the plotlabs(title ="Temperature histogram",subtitle ="Ames, Iowa, ISU Dairy, July 22, 2022",x ="Temperature in Celcius degrees",y ="n",caption ="Average ear temperature of the animal per hour, checking normalized data for Iowa State University." ) +theme(axis.title =element_text(size=12),axis.text =element_text(size=9, color="darkgray"),axis.ticks =element_line(size=5),plot.title =element_text(size=20, color="darkblue") )
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.