Import data
# excel file
my_data <- read_excel("myData.xlsx")
my_data
## # A tibble: 224 × 7
## observed_month prod_type prod_process n_hens n_eggs source Eggs_Per_Hen
## <chr> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 42582 hatching eggs all 57975000 1.15e9 ChicE… 19.8
## 2 42613 hatching eggs all 57595000 1.14e9 ChicE… 19.8
## 3 42643 hatching eggs all 57161000 1.09e9 ChicE… 19.1
## 4 42674 hatching eggs all 56857000 1.13e9 ChicE… 19.8
## 5 42704 hatching eggs all 57116000 1.10e9 ChicE… 19.2
## 6 42735 hatching eggs all 57750000 1.13e9 ChicE… 19.6
## 7 42766 hatching eggs all 57991000 1.12e9 ChicE… 19.4
## 8 42794 hatching eggs all 58286000 1.01e9 ChicE… 17.4
## 9 42825 hatching eggs all 58735000 1.13e9 ChicE… 19.2
## 10 42855 hatching eggs all 59072000 1.10e9 ChicE… 18.6
## # ℹ 214 more rows
Apply the following dplyr verbs to your data
Filter rows
filter(myData, prod_type=="table eggs")
## # A tibble: 165 × 7
## observed_month prod_type prod_process n_hens n_eggs source Eggs_Per_Hen
## <dttm> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 2016-07-31 00:00:00 table eggs all 3.00e8 7.35e9 ChicE… 24.5
## 2 2016-08-31 00:00:00 table eggs all 3.01e8 7.41e9 ChicE… 24.6
## 3 2016-09-30 00:00:00 table eggs all 3.03e8 7.20e9 ChicE… 23.8
## 4 2016-10-31 00:00:00 table eggs all 3.06e8 7.53e9 ChicE… 24.6
## 5 2016-11-30 00:00:00 table eggs all 3.11e8 7.47e9 ChicE… 24.0
## 6 2016-12-31 00:00:00 table eggs all 3.19e8 7.95e9 ChicE… 24.9
## 7 2017-01-31 00:00:00 table eggs all 3.18e8 7.90e9 ChicE… 24.8
## 8 2017-02-28 00:00:00 table eggs all 3.17e8 7.08e9 ChicE… 22.3
## 9 2017-03-31 00:00:00 table eggs all 3.16e8 7.84e9 ChicE… 24.8
## 10 2017-04-30 00:00:00 table eggs all 3.15e8 7.55e9 ChicE… 24.0
## # ℹ 155 more rows
Arrange rows
arrange(myData, desc(Eggs_Per_Hen))
## # A tibble: 224 × 7
## observed_month prod_type prod_process n_hens n_eggs source Eggs_Per_Hen
## <dttm> <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 2020-12-31 00:00:00 table eggs all 3.27e8 8.36e9 ChicE… 25.6
## 2 2020-10-31 00:00:00 table eggs all 3.23e8 8.25e9 ChicE… 25.6
## 3 2020-07-31 00:00:00 table eggs all 3.15e8 8.01e9 ChicE… 25.5
## 4 2020-08-31 00:00:00 table eggs all 3.16e8 8.04e9 ChicE… 25.5
## 5 2021-01-31 00:00:00 table eggs all 3.27e8 8.26e9 ChicE… 25.3
## 6 2019-12-31 00:00:00 table eggs all 3.41e8 8.60e9 ChicE… 25.2
## 7 2019-10-31 00:00:00 table eggs all 3.36e8 8.45e9 ChicE… 25.2
## 8 2020-01-31 00:00:00 table eggs all 3.38e8 8.49e9 ChicE… 25.1
## 9 2020-03-31 00:00:00 table eggs all 3.31e8 8.29e9 ChicE… 25.1
## 10 2020-12-31 00:00:00 table eggs cage-free (… 1.71e7 4.29e8 PY202… 25.0
## # ℹ 214 more rows
Select columns
select(myData, observed_month, n_hens, n_eggs, Eggs_Per_Hen)
## # A tibble: 224 × 4
## observed_month n_hens n_eggs Eggs_Per_Hen
## <dttm> <dbl> <dbl> <dbl>
## 1 2016-07-31 00:00:00 57975000 1147000000 19.8
## 2 2016-08-31 00:00:00 57595000 1142700000 19.8
## 3 2016-09-30 00:00:00 57161000 1093300000 19.1
## 4 2016-10-31 00:00:00 56857000 1126700000 19.8
## 5 2016-11-30 00:00:00 57116000 1096600000 19.2
## 6 2016-12-31 00:00:00 57750000 1132900000 19.6
## 7 2017-01-31 00:00:00 57991000 1123400000 19.4
## 8 2017-02-28 00:00:00 58286000 1014500000 17.4
## 9 2017-03-31 00:00:00 58735000 1128500000 19.2
## 10 2017-04-30 00:00:00 59072000 1097200000 18.6
## # ℹ 214 more rows
Add columns
select(myData, n_eggs) %>%
mutate(total_eggs= cumsum(n_eggs))
## # A tibble: 224 × 2
## n_eggs total_eggs
## <dbl> <dbl>
## 1 1147000000 1147000000
## 2 1142700000 2289700000
## 3 1093300000 3383000000
## 4 1126700000 4509700000
## 5 1096600000 5606300000
## 6 1132900000 6739200000
## 7 1123400000 7862600000
## 8 1014500000 8877100000
## 9 1128500000 10005600000
## 10 1097200000 11102800000
## # ℹ 214 more rows
Summarize by groups
# average eggs per month
summarise(myData, average = mean(Eggs_Per_Hen, na.rm= TRUE))
## # A tibble: 1 × 1
## average
## <dbl>
## 1 22.4