Tarefa_2_final

Author

Licet Fernanda Calambás Trochez

Published

April 5, 2024

Tip

#####Subir tabela de dados

eggproduction <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-11/egg-production.csv")

Rows: 220 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (3): prod_type, prod_process, source
dbl  (2): n_hens, n_eggs
date (1): observed_month

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

eggproduction

# A tibble: 220 × 6
   observed_month prod_type     prod_process   n_hens     n_eggs source         
   <date>         <chr>         <chr>           <dbl>      <dbl> <chr>          
 1 2016-07-31     hatching eggs all          57975000 1147000000 ChicEggs-09-23…
 2 2016-08-31     hatching eggs all          57595000 1142700000 ChicEggs-10-21…
 3 2016-09-30     hatching eggs all          57161000 1093300000 ChicEggs-11-22…
 4 2016-10-31     hatching eggs all          56857000 1126700000 ChicEggs-12-23…
 5 2016-11-30     hatching eggs all          57116000 1096600000 ChicEggs-01-24…
 6 2016-12-31     hatching eggs all          57750000 1132900000 ChicEggs-02-28…
 7 2017-01-31     hatching eggs all          57991000 1123400000 ChicEggs-03-21…
 8 2017-02-28     hatching eggs all          58286000 1014500000 ChicEggs-04-21…
 9 2017-03-31     hatching eggs all          58735000 1128500000 ChicEggs-05-22…
10 2017-04-30     hatching eggs all          59072000 1097200000 ChicEggs-06-23…
# ℹ 210 more rows

Note

###Exercício 1-Remover a última coluna dataset (a coluna chamada source)

library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.2     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.2     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
eggproduction2 <- select(eggproduction,-source) # o simbolo menos indica que tirar essa variável
head(eggproduction2)

# A tibble: 6 × 5
  observed_month prod_type     prod_process   n_hens     n_eggs
  <date>         <chr>         <chr>           <dbl>      <dbl>
1 2016-07-31     hatching eggs all          57975000 1147000000
2 2016-08-31     hatching eggs all          57595000 1142700000
3 2016-09-30     hatching eggs all          57161000 1093300000
4 2016-10-31     hatching eggs all          56857000 1126700000
5 2016-11-30     hatching eggs all          57116000 1096600000
6 2016-12-31     hatching eggs all          57750000 1132900000

#Outras formas de fazer
#eggproduction3 <- [,6]
#eggproduction4<- [1:6]

Note

###Exercício 2-Selecionar apenas as variáveis numéricas

numericas <- select(eggproduction2, n_hens, n_eggs) 
head(numericas)

# A tibble: 6 × 2
    n_hens     n_eggs
     <dbl>      <dbl>
1 57975000 1147000000
2 57595000 1142700000
3 57161000 1093300000
4 56857000 1126700000
5 57116000 1096600000
6 57750000 1132900000

###outra forma de fazer
#num<-eggproduction2%>%select_if(is.numeric)
#num2<-select(eggproduction2,is.numeric)

Note

###Exercício 3-Criar um dataset só com hatching e outro dataset apenas com table eggs

hatching <- subset(eggproduction2, prod_type=="hatching eggs")##só dados de hatching
head(hatching)

# A tibble: 6 × 5
  observed_month prod_type     prod_process   n_hens     n_eggs
  <date>         <chr>         <chr>           <dbl>      <dbl>
1 2016-07-31     hatching eggs all          57975000 1147000000
2 2016-08-31     hatching eggs all          57595000 1142700000
3 2016-09-30     hatching eggs all          57161000 1093300000
4 2016-10-31     hatching eggs all          56857000 1126700000
5 2016-11-30     hatching eggs all          57116000 1096600000
6 2016-12-31     hatching eggs all          57750000 1132900000

##outra forma de fazer
#hatch<-eggproduction2%>%filter(prod_type=="hatching eggs")#filter es de tydiverse
#hatch2<-eggproduction2%>%slice(1:55)

table_eg <- subset(eggproduction2, prod_type=="table eggs")#so quero dados de table_egg
head(table_eg )

# A tibble: 6 × 5
  observed_month prod_type  prod_process    n_hens     n_eggs
  <date>         <chr>      <chr>            <dbl>      <dbl>
1 2016-07-31     table eggs all          299669000 7350500000
2 2016-08-31     table eggs all          300917000 7409000000
3 2016-09-30     table eggs all          303270000 7204200000
4 2016-10-31     table eggs all          305852000 7534700000
5 2016-11-30     table eggs all          310728000 7468500000
6 2016-12-31     table eggs all          318820000 7950400000

Note

###Exercício 4-Criar um dataset só com table eggs e todos os processos (all)

new<-eggproduction2%>%filter(prod_process=="all"& prod_type=="table eggs")
new

# A tibble: 55 × 5
   observed_month prod_type  prod_process    n_hens     n_eggs
   <date>         <chr>      <chr>            <dbl>      <dbl>
 1 2016-07-31     table eggs all          299669000 7350500000
 2 2016-08-31     table eggs all          300917000 7409000000
 3 2016-09-30     table eggs all          303270000 7204200000
 4 2016-10-31     table eggs all          305852000 7534700000
 5 2016-11-30     table eggs all          310728000 7468500000
 6 2016-12-31     table eggs all          318820000 7950400000
 7 2017-01-31     table eggs all          318109000 7903500000
 8 2017-02-28     table eggs all          316807000 7079800000
 9 2017-03-31     table eggs all          316147000 7842000000
10 2017-04-30     table eggs all          314918000 7545900000
# ℹ 45 more rows

#outra forma de fazer
#new2<-eggproduction2%>%subset(prod_process=="all"& prod_type!="hatching eggs")
#new2

Note

###Exercício 5-Mudar os nomes das variáveis para português

ls(eggproduction2)#para ver quais sao os nomes das colunas, coloca eles em ordem alfabetica

[1] "n_eggs"         "n_hens"         "observed_month" "prod_process"  
[5] "prod_type"

##criando novo dataset. Quando são nomes com duas palavras se coloca entre crases ou aspas. 
port <- rename(eggproduction2, numero_ovos = n_eggs, numero_galilnhas = n_hens, mes_observacao=observed_month,produto_procesado =prod_process, tipo_produto=prod_type)
head(port)

# A tibble: 6 × 5
  mes_observacao tipo_produto  produto_procesado numero_galilnhas numero_ovos
  <date>         <chr>         <chr>                        <dbl>       <dbl>
1 2016-07-31     hatching eggs all                       57975000  1147000000
2 2016-08-31     hatching eggs all                       57595000  1142700000
3 2016-09-30     hatching eggs all                       57161000  1093300000
4 2016-10-31     hatching eggs all                       56857000  1126700000
5 2016-11-30     hatching eggs all                       57116000  1096600000
6 2016-12-31     hatching eggs all                       57750000  1132900000

Note

###Exercício 6-Mudar os nomes dos fatores para português

port2<-port%>%mutate(tipo_produto=recode(tipo_produto,`hatching eggs`="incubados",`table eggs`="Ovos de mesa"),produto_procesado=recode(produto_procesado,all="Gaiolas + Livres", `cage-free(non-organic)`="Livres:Não-organicos",`cage-free(organic)`="Livres: organicos"))
head(port2)

# A tibble: 6 × 5
  mes_observacao tipo_produto produto_procesado numero_galilnhas numero_ovos
  <date>         <chr>        <chr>                        <dbl>       <dbl>
1 2016-07-31     incubados    Gaiolas + Livres          57975000  1147000000
2 2016-08-31     incubados    Gaiolas + Livres          57595000  1142700000
3 2016-09-30     incubados    Gaiolas + Livres          57161000  1093300000
4 2016-10-31     incubados    Gaiolas + Livres          56857000  1126700000
5 2016-11-30     incubados    Gaiolas + Livres          57116000  1096600000
6 2016-12-31     incubados    Gaiolas + Livres          57750000  1132900000

Note

###Exercício 7-Criar uma nova variável chamada ‘produtividade’ com a razão entre número de ovos (n_eggs) e número de galinhas

produ<-mutate(port,produtividade=(numero_ovos/numero_galilnhas))##Melhor usar mutate, que é de tidyvese mesmo
head(produ)

# A tibble: 6 × 6
  mes_observacao tipo_produto  produto_procesado numero_galilnhas numero_ovos
  <date>         <chr>         <chr>                        <dbl>       <dbl>
1 2016-07-31     hatching eggs all                       57975000  1147000000
2 2016-08-31     hatching eggs all                       57595000  1142700000
3 2016-09-30     hatching eggs all                       57161000  1093300000
4 2016-10-31     hatching eggs all                       56857000  1126700000
5 2016-11-30     hatching eggs all                       57116000  1096600000
6 2016-12-31     hatching eggs all                       57750000  1132900000
# ℹ 1 more variable: produtividade <dbl>

##outra forma de fazer
#port3<-port%>%transform(Produtividade=numero_ovos/numero_galilnhas)
#port3

Note

###Exercício 8-Criar um dataset só de produtos cage free e criar novas variáveis separando n_eggs e n_hens por organicos e não orgânicos

sep<-eggproduction %>% filter(prod_process!="all")%>%separate(prod_process, into=c("process","type"),sep=" ") %>%pivot_wider(names_from = type, values_from = c(n_hens, n_eggs))
head(sep)

# A tibble: 6 × 8
  observed_month prod_type  process   source              `n_hens_(non-organic)`
  <date>         <chr>      <chr>     <chr>                                <dbl>
1 2016-08-31     table eggs cage-free PY20160919MCAGEFRE…               17000000
2 2016-09-30     table eggs cage-free PY20161004MCAGEFRE…               17000000
3 2016-10-31     table eggs cage-free PY20161102MCAGEFRE…               23500000
4 2016-11-30     table eggs cage-free PY20161205MCAGEFRE…               23500000
5 2016-12-31     table eggs cage-free PY20170109MCAGEFRE…               23500000
6 2017-01-31     table eggs cage-free PY20170206MCAGEFRE…               23500000
# ℹ 3 more variables: `n_hens_(organic)` <dbl>, `n_eggs_(non-organic)` <dbl>,
#   `n_eggs_(organic)` <dbl>

Note

###Exercício 9-Juntar as variáveis n_eggs e n_hens em uma única coluna

junto<-eggproduction2 %>% pivot_longer(cols = starts_with("n"),names_to = "tipo",values_to = "Value")
head(junto)

# A tibble: 6 × 5
  observed_month prod_type     prod_process tipo        Value
  <date>         <chr>         <chr>        <chr>       <dbl>
1 2016-07-31     hatching eggs all          n_hens   57975000
2 2016-07-31     hatching eggs all          n_eggs 1147000000
3 2016-08-31     hatching eggs all          n_hens   57595000
4 2016-08-31     hatching eggs all          n_eggs 1142700000
5 2016-09-30     hatching eggs all          n_hens   57161000
6 2016-09-30     hatching eggs all          n_eggs 1093300000

#junto2<-eggproduction2 %>% pivot_longer(cols = 4:5, names_to = "tipo",values_to #="Value")
#head(junto2)

Note

###Exercício 10- Faça um sumário dos dados com as médias de n_hens e n_eggs por ano, por produto e por processo

sum<-eggproduction%>%separate(observed_month, into = c("year","month","day"),sep="-")%>%select(-month, -day,-source)%>%group_by(year,prod_type,prod_process)%>%summarise(Média_ovos=mean(n_eggs),Média_galinha=mean(n_hens))

`summarise()` has grouped output by 'year', 'prod_type'. You can override using
the `.groups` argument.

sum

# A tibble: 24 × 5
# Groups:   year, prod_type [12]
   year  prod_type     prod_process             Média_ovos Média_galinha
   <chr> <chr>         <chr>                         <dbl>         <dbl>
 1 2016  hatching eggs all                     1123200000      57409000 
 2 2016  table eggs    all                     7486216667.    306542667.
 3 2016  table eggs    cage-free (non-organic)  480326307.     20900000 
 4 2016  table eggs    cage-free (organic)      318542883.     13860000 
 5 2017  hatching eggs all                     1123725000      59521833.
 6 2017  table eggs    all                     7637800000     314964500 
 7 2017  table eggs    cage-free (non-organic)  672053117.     29358333.
 8 2017  table eggs    cage-free (organic)      330960497.     14466667.
 9 2018  hatching eggs all                     1158825000      61591167.
10 2018  table eggs    all                     7834850000     324930250 
# ℹ 14 more rows

head(sum)

# A tibble: 6 × 5
# Groups:   year, prod_type [4]
  year  prod_type     prod_process             Média_ovos Média_galinha
  <chr> <chr>         <chr>                         <dbl>         <dbl>
1 2016  hatching eggs all                     1123200000      57409000 
2 2016  table eggs    all                     7486216667.    306542667.
3 2016  table eggs    cage-free (non-organic)  480326307.     20900000 
4 2016  table eggs    cage-free (organic)      318542883.     13860000 
5 2017  hatching eggs all                     1123725000      59521833.
6 2017  table eggs    all                     7637800000     314964500

##Outras formas de fazer
#sum2<-eggproduction%>%separate(observed_month, into = #c("year","month","day"),sep="-")%>%group_by(year,prod_type,prod_process)%>%summarise(Méd#ia_ovos=mean(n_eggs),Média_galinha=mean(n_hens))
#sum2
#head(sum2)

#sum3<-eggproduction%>%separate(observed_month, into = #c("year","month","day"),sep="-")%>%group_by(year,prod_type,prod_process)%>%summarise_if(#is.numeric, mean)
#sum3
#head(sum3)

Tarefa finalizada