Import data

# excel file
my_data <- read_excel("myData.xlsx")
my_data
## # A tibble: 224 × 7
##    observed_month prod_type     prod_process   n_hens n_eggs source Eggs_Per_Hen
##    <chr>          <chr>         <chr>           <dbl>  <dbl> <chr>         <dbl>
##  1 42582          hatching eggs all          57975000 1.15e9 ChicE…         19.8
##  2 42613          hatching eggs all          57595000 1.14e9 ChicE…         19.8
##  3 42643          hatching eggs all          57161000 1.09e9 ChicE…         19.1
##  4 42674          hatching eggs all          56857000 1.13e9 ChicE…         19.8
##  5 42704          hatching eggs all          57116000 1.10e9 ChicE…         19.2
##  6 42735          hatching eggs all          57750000 1.13e9 ChicE…         19.6
##  7 42766          hatching eggs all          57991000 1.12e9 ChicE…         19.4
##  8 42794          hatching eggs all          58286000 1.01e9 ChicE…         17.4
##  9 42825          hatching eggs all          58735000 1.13e9 ChicE…         19.2
## 10 42855          hatching eggs all          59072000 1.10e9 ChicE…         18.6
## # ℹ 214 more rows

Changed the months column data from the numeric data form to date form

library(readxl)
myData <- read_excel("myData.xlsx", col_types = c("date", 
    "text", "text", "numeric", "numeric", 
    "text", "numeric"))
myData
## # A tibble: 224 × 7
##    observed_month      prod_type  prod_process n_hens n_eggs source Eggs_Per_Hen
##    <dttm>              <chr>      <chr>         <dbl>  <dbl> <chr>         <dbl>
##  1 2016-07-31 00:00:00 hatching … all          5.80e7 1.15e9 ChicE…         19.8
##  2 2016-08-31 00:00:00 hatching … all          5.76e7 1.14e9 ChicE…         19.8
##  3 2016-09-30 00:00:00 hatching … all          5.72e7 1.09e9 ChicE…         19.1
##  4 2016-10-31 00:00:00 hatching … all          5.69e7 1.13e9 ChicE…         19.8
##  5 2016-11-30 00:00:00 hatching … all          5.71e7 1.10e9 ChicE…         19.2
##  6 2016-12-31 00:00:00 hatching … all          5.77e7 1.13e9 ChicE…         19.6
##  7 2017-01-31 00:00:00 hatching … all          5.80e7 1.12e9 ChicE…         19.4
##  8 2017-02-28 00:00:00 hatching … all          5.83e7 1.01e9 ChicE…         17.4
##  9 2017-03-31 00:00:00 hatching … all          5.87e7 1.13e9 ChicE…         19.2
## 10 2017-04-30 00:00:00 hatching … all          5.91e7 1.10e9 ChicE…         18.6
## # ℹ 214 more rows

Apply the following dplyr verbs to your data

Filter rows

filter(myData, prod_type=="table eggs")
## # A tibble: 165 × 7
##    observed_month      prod_type  prod_process n_hens n_eggs source Eggs_Per_Hen
##    <dttm>              <chr>      <chr>         <dbl>  <dbl> <chr>         <dbl>
##  1 2016-07-31 00:00:00 table eggs all          3.00e8 7.35e9 ChicE…         24.5
##  2 2016-08-31 00:00:00 table eggs all          3.01e8 7.41e9 ChicE…         24.6
##  3 2016-09-30 00:00:00 table eggs all          3.03e8 7.20e9 ChicE…         23.8
##  4 2016-10-31 00:00:00 table eggs all          3.06e8 7.53e9 ChicE…         24.6
##  5 2016-11-30 00:00:00 table eggs all          3.11e8 7.47e9 ChicE…         24.0
##  6 2016-12-31 00:00:00 table eggs all          3.19e8 7.95e9 ChicE…         24.9
##  7 2017-01-31 00:00:00 table eggs all          3.18e8 7.90e9 ChicE…         24.8
##  8 2017-02-28 00:00:00 table eggs all          3.17e8 7.08e9 ChicE…         22.3
##  9 2017-03-31 00:00:00 table eggs all          3.16e8 7.84e9 ChicE…         24.8
## 10 2017-04-30 00:00:00 table eggs all          3.15e8 7.55e9 ChicE…         24.0
## # ℹ 155 more rows

Arrange rows

arrange(myData, desc(Eggs_Per_Hen))
## # A tibble: 224 × 7
##    observed_month      prod_type  prod_process n_hens n_eggs source Eggs_Per_Hen
##    <dttm>              <chr>      <chr>         <dbl>  <dbl> <chr>         <dbl>
##  1 2020-12-31 00:00:00 table eggs all          3.27e8 8.36e9 ChicE…         25.6
##  2 2020-10-31 00:00:00 table eggs all          3.23e8 8.25e9 ChicE…         25.6
##  3 2020-07-31 00:00:00 table eggs all          3.15e8 8.01e9 ChicE…         25.5
##  4 2020-08-31 00:00:00 table eggs all          3.16e8 8.04e9 ChicE…         25.5
##  5 2021-01-31 00:00:00 table eggs all          3.27e8 8.26e9 ChicE…         25.3
##  6 2019-12-31 00:00:00 table eggs all          3.41e8 8.60e9 ChicE…         25.2
##  7 2019-10-31 00:00:00 table eggs all          3.36e8 8.45e9 ChicE…         25.2
##  8 2020-01-31 00:00:00 table eggs all          3.38e8 8.49e9 ChicE…         25.1
##  9 2020-03-31 00:00:00 table eggs all          3.31e8 8.29e9 ChicE…         25.1
## 10 2020-12-31 00:00:00 table eggs cage-free (… 1.71e7 4.29e8 PY202…         25.0
## # ℹ 214 more rows

Select columns

select(myData, observed_month, n_hens, n_eggs, Eggs_Per_Hen)
## # A tibble: 224 × 4
##    observed_month        n_hens     n_eggs Eggs_Per_Hen
##    <dttm>                 <dbl>      <dbl>        <dbl>
##  1 2016-07-31 00:00:00 57975000 1147000000         19.8
##  2 2016-08-31 00:00:00 57595000 1142700000         19.8
##  3 2016-09-30 00:00:00 57161000 1093300000         19.1
##  4 2016-10-31 00:00:00 56857000 1126700000         19.8
##  5 2016-11-30 00:00:00 57116000 1096600000         19.2
##  6 2016-12-31 00:00:00 57750000 1132900000         19.6
##  7 2017-01-31 00:00:00 57991000 1123400000         19.4
##  8 2017-02-28 00:00:00 58286000 1014500000         17.4
##  9 2017-03-31 00:00:00 58735000 1128500000         19.2
## 10 2017-04-30 00:00:00 59072000 1097200000         18.6
## # ℹ 214 more rows

Add columns

select(myData, n_eggs) %>%
    
    mutate(total_eggs= cumsum(n_eggs))
## # A tibble: 224 × 2
##        n_eggs  total_eggs
##         <dbl>       <dbl>
##  1 1147000000  1147000000
##  2 1142700000  2289700000
##  3 1093300000  3383000000
##  4 1126700000  4509700000
##  5 1096600000  5606300000
##  6 1132900000  6739200000
##  7 1123400000  7862600000
##  8 1014500000  8877100000
##  9 1128500000 10005600000
## 10 1097200000 11102800000
## # ℹ 214 more rows

Summarize by groups

# average eggs per month
summarise(myData, average = mean(Eggs_Per_Hen, na.rm= TRUE))
## # A tibble: 1 × 1
##   average
##     <dbl>
## 1    22.4