Exercício 2 - Manipulação de Dados

Chegou a hora de fixar o nosso aprendizado das funções aprendidas em aula do tidyr e do dplyr!

Para isso, vamos usar um conjunto de dados do tidytuesday

E os dados escolhidos por mim foram o de produção de ovos nos EUA

Então a primira coisa que vocês terão que fazer é ler sobre os dados no link acima, baixar os dados de acordo com as instruções, carregar o tidyverse e seguir as orientações abaixo!

Carregando o conjunto de dados

eggproduction <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-11/egg-production.csv')

Carregando as bibliotecas necessárias para a resolução dos exercícios

library(tidyverse)

Exercício 1 - removendo colunas de um conjunto de dados

Remover a última coluna dataset (a coluna chamada source)

# Via função select, do dplyr
eggproduction_lastcolumn <- select(eggproduction, -source)
str(eggproduction_lastcolumn)

tibble [220 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:220] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:220] "all" "all" "all" "all" ...
 $ n_hens        : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

eggproduction_lastcolumn2 <- select(eggproduction, -6)
str(eggproduction_lastcolumn2)

tibble [220 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:220] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:220] "all" "all" "all" "all" ...
 $ n_hens        : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

# Via R base (lembrando que, no R, a sintaxe é: dados[linhas, colunas])
eggproduction_lastcolumn3 <- eggproduction[,-6]
str(eggproduction_lastcolumn3)

tibble [220 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:220] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:220] "all" "all" "all" "all" ...
 $ n_hens        : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

Exercício 2 - selecionando variáveis numéricas de um conjunto de dados

Selecionar apenas as variáveis numéricas

# Via função select_if, do dplyr
eggproduction_numeric <- select_if(eggproduction_lastcolumn, is.numeric)
str(eggproduction_numeric)

tibble [220 x 2] (S3: tbl_df/tbl/data.frame)
 $ n_hens: num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs: num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

eggproduction_numeric2 <- select_if(eggproduction_lastcolumn, where(is.numeric))
str(eggproduction_numeric2)

tibble [220 x 2] (S3: tbl_df/tbl/data.frame)
 $ n_hens: num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs: num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

# Via R base
eggproduction_numeric3 <- eggproduction_lastcolumn[,-c(1:3)] #2
str(eggproduction_numeric3)

tibble [220 x 2] (S3: tbl_df/tbl/data.frame)
 $ n_hens: num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs: num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

Exercício 3 - criando subconjuntos a partir de um conjunto de dados

Criar um dataset só com hatching e outro dataset apenas com table eggs

# Via função slice, do dplyr
eggproduction_hatching <- slice(eggproduction_lastcolumn, -56:-220)
str(eggproduction_hatching)

tibble [55 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:55], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:55] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:55] "all" "all" "all" "all" ...
 $ n_hens        : num [1:55] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:55] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

eggproduction_hatching2 <- subset(eggproduction_lastcolumn, prod_type == "hatching eggs")
str(eggproduction_hatching2)

tibble [55 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:55], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:55] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:55] "all" "all" "all" "all" ...
 $ n_hens        : num [1:55] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:55] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

eggproduction_table_eggs <- slice(eggproduction_lastcolumn, -1:-55)
str(eggproduction_table_eggs)

tibble [165 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:165], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:165] "table eggs" "table eggs" "table eggs" "table eggs" ...
 $ prod_process  : chr [1:165] "all" "all" "all" "all" ...
 $ n_hens        : num [1:165] 3.00e+08 3.01e+08 3.03e+08 3.06e+08 3.11e+08 ...
 $ n_eggs        : num [1:165] 7.35e+09 7.41e+09 7.20e+09 7.53e+09 7.47e+09 ...

eggproduction_table_eggs2 <- subset(eggproduction_lastcolumn, prod_type == "table eggs")
str(eggproduction_table_eggs2)

tibble [165 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:165], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:165] "table eggs" "table eggs" "table eggs" "table eggs" ...
 $ prod_process  : chr [1:165] "all" "all" "all" "all" ...
 $ n_hens        : num [1:165] 3.00e+08 3.01e+08 3.03e+08 3.06e+08 3.11e+08 ...
 $ n_eggs        : num [1:165] 7.35e+09 7.41e+09 7.20e+09 7.53e+09 7.47e+09 ...

Exercício 4 - criando subconjuntos a partir de fatores específicos dentro do conjunto de dados

Criar um dataset só com table eggs e todos os processos (all)

eggproduction_table_eggs_all <- slice(eggproduction_table_eggs, -56:-165)
str(eggproduction_table_eggs_all)

tibble [55 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:55], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:55] "table eggs" "table eggs" "table eggs" "table eggs" ...
 $ prod_process  : chr [1:55] "all" "all" "all" "all" ...
 $ n_hens        : num [1:55] 3.00e+08 3.01e+08 3.03e+08 3.06e+08 3.11e+08 ...
 $ n_eggs        : num [1:55] 7.35e+09 7.41e+09 7.20e+09 7.53e+09 7.47e+09 ...

class(eggproduction_table_eggs_all)

[1] "tbl_df"     "tbl"        "data.frame"

eggproduction_table_eggs_all2 <- eggproduction_table_eggs %>% slice(-56:-165)
str(eggproduction_table_eggs_all2)

tibble [55 x 5] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:55], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:55] "table eggs" "table eggs" "table eggs" "table eggs" ...
 $ prod_process  : chr [1:55] "all" "all" "all" "all" ...
 $ n_hens        : num [1:55] 3.00e+08 3.01e+08 3.03e+08 3.06e+08 3.11e+08 ...
 $ n_eggs        : num [1:55] 7.35e+09 7.41e+09 7.20e+09 7.53e+09 7.47e+09 ...

eggproduction_table_eggs_all3 <- eggproduction_table_eggs %>% filter(prod_process == "all" & prod_type == "table eggs")

# Fazendo a mesma coisa no Rbase
# Sem usar a função **subset()** e usando a função **subset()**

# Versões alternativas
eggproduction_table_eggs_all4 <- eggproduction_table_eggs[1:55,]
eggproduction_table_eggs_all5 <- subset(eggproduction_table_eggs, prod_process == "all" & prod_type == "table eggs")

all.equal(eggproduction_table_eggs_all2,eggproduction_table_eggs_all) # verifica se são "nearly identicals"

[1] TRUE

identical(eggproduction_table_eggs_all2, eggproduction_table_eggs_all3) # verifica se são "exactly equal"

[1] TRUE

Exercício 5 - mudando nome das variáveis

Mudar os nomes das variáveis para português

ls(eggproduction_lastcolumn)

[1] "n_eggs"         "n_hens"         "observed_month" "prod_process"  
[5] "prod_type"

eggproduction_lastcolumn_port <- rename(eggproduction_lastcolumn,
                                        n_ovos = n_eggs, n_galinhas = n_hens,
                                        `Mês observado` = observed_month,
                                        'Processo de produção' = prod_process,
                                        `Tipo de produção` = prod_type)

eggproduction_lastcolumn_port2 <- eggproduction_lastcolumn %>%
                                    rename(n_ovos = n_eggs, n_galinhas = n_hens,
                                        `Mês observado` = observed_month,
                                        `Processo de produção` = prod_process,
                                        `Tipo de produção` = prod_type)

ls(eggproduction_lastcolumn_port)

[1] "Mês observado"        "n_galinhas"           "n_ovos"              
[4] "Processo de produção" "Tipo de produção"

ls(eggproduction_lastcolumn_port2)

[1] "Mês observado"        "n_galinhas"           "n_ovos"              
[4] "Processo de produção" "Tipo de produção"

Exercício 6 - mudando nomes de fatores dentro de variáveis específicas

Mudar os nomes dos fatores para português

eggproduction_lastcolumn_port_final <- mutate(eggproduction_lastcolumn_port,
                                              `Tipo de produção` = recode(`Tipo de produção`, 'hatching eggs' = "ovos para incubação", 'tabble eggs' = "ovos de mesa"),
                                              `Processo de produção` = recode(`Processo de produção`, 'all' = "todos", 'cage-free (non-organic)' = "livre de gaiolas (não orgânico)", 'cage-free (organic)' = "livre de gaiolas (orgânico)")
                                              )

# dessa forma, as variáveis com os novos nomes são sobrepostas às variáveis com os nomes antigos

str(eggproduction_lastcolumn_port_final)

tibble [220 x 5] (S3: tbl_df/tbl/data.frame)
 $ Mês observado       : Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ Tipo de produção    : chr [1:220] "ovos para incubação" "ovos para incubação" "ovos para incubação" "ovos para incubação" ...
 $ Processo de produção: chr [1:220] "todos" "todos" "todos" "todos" ...
 $ n_galinhas          : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_ovos              : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

eggproduction_lastcolumn_port_final2 <- eggproduction_lastcolumn_port %>%
                                          mutate(`Tipo de produção` = recode(`Tipo de produção`, 'hatching eggs' = "ovos para incubação", 'tabble eggs' = "ovos de mesa"),
                                              `Processo de produção` = recode(`Processo de produção`, 'all' = "todos", 'cage-free (non-organic)' = "livre de gaiolas (não orgânico)", 'cage-free (organic)' = "livre de gaiolas (orgânico)")
                                              )
str(eggproduction_lastcolumn_port_final2)

tibble [220 x 5] (S3: tbl_df/tbl/data.frame)
 $ Mês observado       : Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ Tipo de produção    : chr [1:220] "ovos para incubação" "ovos para incubação" "ovos para incubação" "ovos para incubação" ...
 $ Processo de produção: chr [1:220] "todos" "todos" "todos" "todos" ...
 $ n_galinhas          : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_ovos              : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...

identical(eggproduction_lastcolumn_port_final2, eggproduction_lastcolumn_port_final)

[1] TRUE

Exercício 7 - criando nova variável a partir da combinação de variáveis pré-existentes

Criar uma nova variável chamada ‘produtividade’ com a razão entre número de ovos (n_eggs) e número de galinhas (hen)

# Usando funções rename e transform para criar nova variável
eggproduction_new_variables<- mutate(eggproduction_lastcolumn,
                                     produtividade = (n_eggs/n_hens))
str(eggproduction_new_variables)

tibble [220 x 6] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:220] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:220] "all" "all" "all" "all" ...
 $ n_hens        : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...
 $ produtividade : num [1:220] 19.8 19.8 19.1 19.8 19.2 ...

eggproduction_new_variables2 <- transform(eggproduction_lastcolumn,
                                          produtividade = (n_eggs/n_hens))
str(eggproduction_new_variables2)

'data.frame':   220 obs. of  6 variables:
 $ observed_month: Date, format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr  "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr  "all" "all" "all" "all" ...
 $ n_hens        : num  57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num  1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...
 $ produtividade : num  19.8 19.8 19.1 19.8 19.2 ...

eggproduction_new_variables3 <- eggproduction_lastcolumn %>%
                                      mutate(produtividade = (n_eggs/n_hens))
str(eggproduction_new_variables3)

tibble [220 x 6] (S3: tbl_df/tbl/data.frame)
 $ observed_month: Date[1:220], format: "2016-07-31" "2016-08-31" ...
 $ prod_type     : chr [1:220] "hatching eggs" "hatching eggs" "hatching eggs" "hatching eggs" ...
 $ prod_process  : chr [1:220] "all" "all" "all" "all" ...
 $ n_hens        : num [1:220] 57975000 57595000 57161000 56857000 57116000 ...
 $ n_eggs        : num [1:220] 1.15e+09 1.14e+09 1.09e+09 1.13e+09 1.10e+09 ...
 $ produtividade : num [1:220] 19.8 19.8 19.1 19.8 19.2 ...

identical(eggproduction_new_variables, eggproduction_new_variables3)

[1] TRUE

Exercício 8 - criando subconjunto de dados a partir de uma seleção específica das variáveis

Criar um dataset só de produtos cage free e criar novas variáveis separando n_eggs e n_hens por organicos e não orgânicos

ls(eggproduction_lastcolumn)

[1] "n_eggs"         "n_hens"         "observed_month" "prod_process"  
[5] "prod_type"

eggproduction_cage_free_organics <- eggproduction_lastcolumn %>%
  filter(prod_process != "all") %>% # selecionado apenas linhas cage-free
  separate(prod_process, into = c("process", "type"), sep = " ") %>% 
  pivot_wider(names_from = type, values_from = c(n_hens, n_eggs)) # separando em duas variáveis

str(eggproduction_cage_free_organics)

tibble [55 x 7] (S3: tbl_df/tbl/data.frame)
 $ observed_month      : Date[1:55], format: "2016-08-31" "2016-09-30" ...
 $ prod_type           : chr [1:55] "table eggs" "table eggs" "table eggs" "table eggs" ...
 $ process             : chr [1:55] "cage-free" "cage-free" "cage-free" "cage-free" ...
 $ n_hens_(non-organic): num [1:55] 17000000 17000000 23500000 23500000 23500000 23500000 24400000 26100000 26600000 27300000 ...
 $ n_hens_(organic)    : num [1:55] 13500000 13500000 14100000 14100000 14100000 14100000 14100000 14500000 14600000 14600000 ...
 $ n_eggs_(non-organic): num [1:55] 3.98e+08 3.84e+08 5.46e+08 5.31e+08 5.43e+08 ...
 $ n_eggs_(organic)    : num [1:55] 3.16e+08 3.05e+08 3.28e+08 3.19e+08 3.26e+08 ...

head(eggproduction_cage_free_organics)

# A tibble: 6 x 7
  observed_month prod_type  process   `n_hens_(non-organic)` `n_hens_(organic)`
  <date>         <chr>      <chr>                      <dbl>              <dbl>
1 2016-08-31     table eggs cage-free               17000000           13500000
2 2016-09-30     table eggs cage-free               17000000           13500000
3 2016-10-31     table eggs cage-free               23500000           14100000
4 2016-11-30     table eggs cage-free               23500000           14100000
5 2016-12-31     table eggs cage-free               23500000           14100000
6 2017-01-31     table eggs cage-free               23500000           14100000
# i 2 more variables: `n_eggs_(non-organic)` <dbl>, `n_eggs_(organic)` <dbl>

eggproduction_cage_free_organics2 <- eggproduction_lastcolumn %>%
  filter(prod_process %in% c("cage-free (non-organic)", "cage-free (organic)")) %>% # selecionado apenas linhas cage-free
  separate(prod_process, into = c("process", "type"), sep = " ") %>% 
  pivot_wider(names_from = type, values_from = c(n_hens, n_eggs)) # separando em duas variáveis
str(eggproduction_cage_free_organics2)

tibble [55 x 7] (S3: tbl_df/tbl/data.frame)
 $ observed_month      : Date[1:55], format: "2016-08-31" "2016-09-30" ...
 $ prod_type           : chr [1:55] "table eggs" "table eggs" "table eggs" "table eggs" ...
 $ process             : chr [1:55] "cage-free" "cage-free" "cage-free" "cage-free" ...
 $ n_hens_(non-organic): num [1:55] 17000000 17000000 23500000 23500000 23500000 23500000 24400000 26100000 26600000 27300000 ...
 $ n_hens_(organic)    : num [1:55] 13500000 13500000 14100000 14100000 14100000 14100000 14100000 14500000 14600000 14600000 ...
 $ n_eggs_(non-organic): num [1:55] 3.98e+08 3.84e+08 5.46e+08 5.31e+08 5.43e+08 ...
 $ n_eggs_(organic)    : num [1:55] 3.16e+08 3.05e+08 3.28e+08 3.19e+08 3.26e+08 ...

head(eggproduction_cage_free_organics2)

# A tibble: 6 x 7
  observed_month prod_type  process   `n_hens_(non-organic)` `n_hens_(organic)`
  <date>         <chr>      <chr>                      <dbl>              <dbl>
1 2016-08-31     table eggs cage-free               17000000           13500000
2 2016-09-30     table eggs cage-free               17000000           13500000
3 2016-10-31     table eggs cage-free               23500000           14100000
4 2016-11-30     table eggs cage-free               23500000           14100000
5 2016-12-31     table eggs cage-free               23500000           14100000
6 2017-01-31     table eggs cage-free               23500000           14100000
# i 2 more variables: `n_eggs_(non-organic)` <dbl>, `n_eggs_(organic)` <dbl>

identical(eggproduction_cage_free_organics2, eggproduction_cage_free_organics)

[1] TRUE

Exercício 9 - combinando duas variáveis numéricas em uma única coluna

Criar uma variável categórica com os fatores n_eggs e n_hens, combinando seus valores em uma única coluna

eggproduction_cage_free_organics_merged <- eggproduction_lastcolumn %>%
  pivot_longer(4:5, names_to = "n_hens_eggs", values_to ="valor") 
head(eggproduction_cage_free_organics_merged)

# A tibble: 6 x 5
  observed_month prod_type     prod_process n_hens_eggs      valor
  <date>         <chr>         <chr>        <chr>            <dbl>
1 2016-07-31     hatching eggs all          n_hens        57975000
2 2016-07-31     hatching eggs all          n_eggs      1147000000
3 2016-08-31     hatching eggs all          n_hens        57595000
4 2016-08-31     hatching eggs all          n_eggs      1142700000
5 2016-09-30     hatching eggs all          n_hens        57161000
6 2016-09-30     hatching eggs all          n_eggs      1093300000

eggproduction_cage_free_organics_merged2 <- eggproduction_lastcolumn %>%
  pivot_longer(cols = starts_with("n_"), names_to = "n_hens_eggs", values_to ="valor") 
head(eggproduction_cage_free_organics_merged2)

# A tibble: 6 x 5
  observed_month prod_type     prod_process n_hens_eggs      valor
  <date>         <chr>         <chr>        <chr>            <dbl>
1 2016-07-31     hatching eggs all          n_hens        57975000
2 2016-07-31     hatching eggs all          n_eggs      1147000000
3 2016-08-31     hatching eggs all          n_hens        57595000
4 2016-08-31     hatching eggs all          n_eggs      1142700000
5 2016-09-30     hatching eggs all          n_hens        57161000
6 2016-09-30     hatching eggs all          n_eggs      1093300000

identical(eggproduction_cage_free_organics_merged, eggproduction_cage_free_organics_merged2)

[1] TRUE

eggproduction_cage_free_organics_merged3 <- eggproduction_lastcolumn %>%
  pivot_longer(cols = c(n_hens, n_eggs), names_to = "n_hens_eggs", values_to ="valor") 
head(eggproduction_cage_free_organics_merged3)

# A tibble: 6 x 5
  observed_month prod_type     prod_process n_hens_eggs      valor
  <date>         <chr>         <chr>        <chr>            <dbl>
1 2016-07-31     hatching eggs all          n_hens        57975000
2 2016-07-31     hatching eggs all          n_eggs      1147000000
3 2016-08-31     hatching eggs all          n_hens        57595000
4 2016-08-31     hatching eggs all          n_eggs      1142700000
5 2016-09-30     hatching eggs all          n_hens        57161000
6 2016-09-30     hatching eggs all          n_eggs      1093300000

Exercício 10 - sumarizando dados (média, desvio, etc) de acordo com fatores específicos de cada variável

Faça um sumário dos dados com as médias de n_hens e n_eggs por ano, por produto e por processo

ls(eggproduction_lastcolumn)

[1] "n_eggs"         "n_hens"         "observed_month" "prod_process"  
[5] "prod_type"

# Sumário geral
medias_geral <- eggproduction_lastcolumn %>%
  summarise(mean_eggs = mean(n_eggs), mean_hens = mean(n_hens))

medias_geral

# A tibble: 1 x 2
    mean_eggs  mean_hens
        <dbl>      <dbl>
1 2606667580. 110839873.

# Sumário com média de n_hens e n_eggs por ano
eggproduction_mean_values_ano <- eggproduction_lastcolumn %>%
  separate('observed_month', into = c("ano", "mes", "dia"), sep = "-") %>%
  select(-mes, -dia, -prod_type, -prod_process) %>%
  group_by(ano) %>%
  summarise_all(mean, na.rm=T)
head(eggproduction_mean_values_ano)

# A tibble: 6 x 3
  ano       n_hens      n_eggs
  <chr>      <dbl>       <dbl>
1 2016  107159545. 2529583907.
2 2017  104577833. 2441134654.
3 2018  110287312. 2560586974.
4 2019  115879688. 2729080714.
5 2020  116478896. 2777331511.
6 2021   93420500  2237599447.

# Sumário com média de n_hens e n_eggs por prod_type
eggproduction_mean_values_prodtype <- eggproduction_lastcolumn %>%
  select(-observed_month, -prod_process) %>%
  group_by(prod_type) %>%
  summarise_all(mean, na.rm=T)
head(eggproduction_mean_values_prodtype)

# A tibble: 2 x 3
  prod_type         n_hens      n_eggs
  <chr>              <dbl>       <dbl>
1 hatching eggs  61575200  1168747273.
2 table eggs    127261430. 3085974349.

# Sumário com média de n_hens e n_eggs por prod_process
eggproduction_mean_values_process <- eggproduction_lastcolumn %>%
  select(-observed_month, -prod_type) %>%
  group_by(prod_process) %>%
  summarise_all(mean, na.rm=T)
head(eggproduction_mean_values_process)

# A tibble: 3 x 3
  prod_process                n_hens      n_eggs
  <chr>                        <dbl>       <dbl>
1 all                     192233291. 4529145455.
2 cage-free (non-organic)  43494636. 1011675130.
3 cage-free (organic)      15398273.  356704280.

# Sumário das três variáveis de interesse ao mesmo tempo
eggproduction_mean_values_geral <- eggproduction_lastcolumn %>%
  separate('observed_month', into = c("ano", "mes", "dia"), sep = "-") %>%
  select(-mes, -dia) %>%
  group_by(ano, prod_type, prod_process) %>%
  summarise_all(mean, na.rm=T)
head(eggproduction_mean_values_geral)

# A tibble: 6 x 5
# Groups:   ano, prod_type [4]
  ano   prod_type     prod_process                n_hens      n_eggs
  <chr> <chr>         <chr>                        <dbl>       <dbl>
1 2016  hatching eggs all                      57409000  1123200000 
2 2016  table eggs    all                     306542667. 7486216667.
3 2016  table eggs    cage-free (non-organic)  20900000   480326307.
4 2016  table eggs    cage-free (organic)      13860000   318542883.
5 2017  hatching eggs all                      59521833. 1123725000 
6 2017  table eggs    all                     314964500  7637800000

# Sumário das três variáveis de interesse ao mesmo tempo
eggproduction_mean_values_geral2 <- eggproduction_lastcolumn %>%
  separate('observed_month', into = c("ano", "mes", "dia"), sep = "-") %>%
  group_by(ano, prod_type, prod_process) %>%
  summarise_if(is.numeric, mean, na.rm=T)
head(eggproduction_mean_values_geral2)

# A tibble: 6 x 5
# Groups:   ano, prod_type [4]
  ano   prod_type     prod_process                n_hens      n_eggs
  <chr> <chr>         <chr>                        <dbl>       <dbl>
1 2016  hatching eggs all                      57409000  1123200000 
2 2016  table eggs    all                     306542667. 7486216667.
3 2016  table eggs    cage-free (non-organic)  20900000   480326307.
4 2016  table eggs    cage-free (organic)      13860000   318542883.
5 2017  hatching eggs all                      59521833. 1123725000 
6 2017  table eggs    all                     314964500  7637800000