Introducción

library(tidyverse)
library(visdat)
library(cowplot)
library(ggrepel)
library(mapproj)
library(ggthemes) 
library(here)
library(extrafont)
library(extrafont)
library(knitr)
library(magick)

Conjunto de datos

# Get the data 
penguins <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv')
## Parsed with column specification:
## cols(
##   species = col_character(),
##   island = col_character(),
##   bill_length_mm = col_double(),
##   bill_depth_mm = col_double(),
##   flipper_length_mm = col_double(),
##   body_mass_g = col_double(),
##   sex = col_character(),
##   year = col_double()
## )
kable(head(penguins, n=15), caption = "Penguins data. Source: Gorman, Williams and Fraser, 2014 ")
Penguins data. Source: Gorman, Williams and Fraser, 2014
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
Adelie Torgersen 39.1 18.7 181 3750 male 2007
Adelie Torgersen 39.5 17.4 186 3800 female 2007
Adelie Torgersen 40.3 18.0 195 3250 female 2007
Adelie Torgersen NA NA NA NA NA 2007
Adelie Torgersen 36.7 19.3 193 3450 female 2007
Adelie Torgersen 39.3 20.6 190 3650 male 2007
Adelie Torgersen 38.9 17.8 181 3625 female 2007
Adelie Torgersen 39.2 19.6 195 4675 male 2007
Adelie Torgersen 34.1 18.1 193 3475 NA 2007
Adelie Torgersen 42.0 20.2 190 4250 NA 2007
Adelie Torgersen 37.8 17.1 186 3300 NA 2007
Adelie Torgersen 37.8 17.3 180 3700 NA 2007
Adelie Torgersen 41.1 17.6 182 3200 female 2007
Adelie Torgersen 38.6 21.2 191 3800 male 2007
Adelie Torgersen 34.6 21.1 198 4400 male 2007

str

str(penguins)
## tibble [344 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ species          : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
##  $ island           : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : num [1:344] 3750 3800 3250 NA 3450 ...
##  $ sex              : chr [1:344] "male" "female" "female" NA ...
##  $ year             : num [1:344] 2007 2007 2007 2007 2007 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   species = col_character(),
##   ..   island = col_character(),
##   ..   bill_length_mm = col_double(),
##   ..   bill_depth_mm = col_double(),
##   ..   flipper_length_mm = col_double(),
##   ..   body_mass_g = col_double(),
##   ..   sex = col_character(),
##   ..   year = col_double()
##   .. )
str(penguins %>% group_by(year))
## tibble [344 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ species          : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
##  $ island           : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : num [1:344] 3750 3800 3250 NA 3450 ...
##  $ sex              : chr [1:344] "male" "female" "female" NA ...
##  $ year             : num [1:344] 2007 2007 2007 2007 2007 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   species = col_character(),
##   ..   island = col_character(),
##   ..   bill_length_mm = col_double(),
##   ..   bill_depth_mm = col_double(),
##   ..   flipper_length_mm = col_double(),
##   ..   body_mass_g = col_double(),
##   ..   sex = col_character(),
##   ..   year = col_double()
##   .. )
##  - attr(*, "groups")= tibble [3 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ year : num [1:3] 2007 2008 2009
##   ..$ .rows: list<int> [1:3] 
##   .. ..$ : int [1:110] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:114] 51 52 53 54 55 56 57 58 59 60 ...
##   .. ..$ : int [1:120] 101 102 103 104 105 106 107 108 109 110 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE

dplyr

\[dataset\text{%>%}f_1(var_1,var_2,\ldots)\text{%>%}f_2(var_k,\ldots){\leftrightarrow}f_2(f_1(dataset,var_1,var_2,\ldots),var_k,\ldots)\]

arrange

arrange(penguins, species, desc(body_mass_g)) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           43.2          19                197        4775 male 
## 2 Adelie  Biscoe           41            20                203        4725 male 
## 3 Adelie  Torge…           42.9          17.6              196        4700 male 
## 4 Adelie  Torge…           39.2          19.6              195        4675 male 
## 5 Adelie  Dream            39.8          19.1              184        4650 male 
## 6 Adelie  Dream            39.6          18.8              190        4600 male 
## # … with 1 more variable: year <dbl>
arrange(penguins, desc(body_mass_g)) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Gentoo  Biscoe           49.2          15.2              221        6300 male 
## 2 Gentoo  Biscoe           59.6          17                230        6050 male 
## 3 Gentoo  Biscoe           51.1          16.3              220        6000 male 
## 4 Gentoo  Biscoe           48.8          16.2              222        6000 male 
## 5 Gentoo  Biscoe           45.2          16.4              223        5950 male 
## 6 Gentoo  Biscoe           49.8          15.9              229        5950 male 
## # … with 1 more variable: year <dbl>
arrange(penguins, island , bill_length_mm) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           34.5          18.1              187        2900 fema…
## 2 Adelie  Biscoe           35            17.9              190        3450 fema…
## 3 Adelie  Biscoe           35            17.9              192        3725 fema…
## 4 Adelie  Biscoe           35.3          18.9              187        3800 fema…
## 5 Adelie  Biscoe           35.5          16.2              195        3350 fema…
## 6 Adelie  Biscoe           35.7          16.9              185        3150 fema…
## # … with 1 more variable: year <dbl>
arrange(penguins, sex, year, desc(bill_depth_mm)) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 2 Adelie  Dream            37.6          19.3              181        3300 fema…
## 3 Adelie  Biscoe           35.9          19.2              189        3800 fema…
## 4 Adelie  Torge…           38.7          19                195        3450 fema…
## 5 Adelie  Biscoe           35.3          18.9              187        3800 fema…
## 6 Chinst… Dream            46            18.9              195        4150 fema…
## # … with 1 more variable: year <dbl>
arrange(penguins, island , bill_length_mm) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           34.5          18.1              187        2900 fema…
## 2 Adelie  Biscoe           35            17.9              190        3450 fema…
## 3 Adelie  Biscoe           35            17.9              192        3725 fema…
## 4 Adelie  Biscoe           35.3          18.9              187        3800 fema…
## 5 Adelie  Biscoe           35.5          16.2              195        3350 fema…
## 6 Adelie  Biscoe           35.7          16.9              185        3150 fema…
## # … with 1 more variable: year <dbl>
penguins %>% arrange(island , bill_length_mm) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           34.5          18.1              187        2900 fema…
## 2 Adelie  Biscoe           35            17.9              190        3450 fema…
## 3 Adelie  Biscoe           35            17.9              192        3725 fema…
## 4 Adelie  Biscoe           35.3          18.9              187        3800 fema…
## 5 Adelie  Biscoe           35.5          16.2              195        3350 fema…
## 6 Adelie  Biscoe           35.7          16.9              185        3150 fema…
## # … with 1 more variable: year <dbl>

count

penguins %>% count()
## # A tibble: 1 x 1
##       n
##   <int>
## 1   344
penguins %>%
  count(island, sort = TRUE, name="cantidad")
## # A tibble: 3 x 2
##   island    cantidad
##   <chr>        <int>
## 1 Biscoe         168
## 2 Dream          124
## 3 Torgersen       52

filter

Con dplyr

penguins %>% filter(year >= 2008) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           39.6          17.7              186        3500 fema…
## 2 Adelie  Biscoe           40.1          18.9              188        4300 male 
## 3 Adelie  Biscoe           35            17.9              190        3450 fema…
## 4 Adelie  Biscoe           42            19.5              200        4050 male 
## 5 Adelie  Biscoe           34.5          18.1              187        2900 fema…
## 6 Adelie  Biscoe           41.4          18.6              191        3700 male 
## # … with 1 more variable: year <dbl>
penguins %>% filter(year == 2007 & species== "Adelie") %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
penguins %>% filter(island == "Torgersen", species == "Adelie") %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
penguins %>% filter(year >= 2008 & sex == "male") %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           40.1          18.9              188        4300 male 
## 2 Adelie  Biscoe           42            19.5              200        4050 male 
## 3 Adelie  Biscoe           41.4          18.6              191        3700 male 
## 4 Adelie  Biscoe           40.6          18.8              193        3800 male 
## 5 Adelie  Biscoe           37.6          19.1              194        3750 male 
## 6 Adelie  Biscoe           41.3          21.1              195        4400 male 
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008)) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008) & species != "Adelie" ) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Gentoo  Biscoe           46.1          13.2              211        4500 fema…
## 2 Gentoo  Biscoe           50            16.3              230        5700 male 
## 3 Gentoo  Biscoe           48.7          14.1              210        4450 fema…
## 4 Gentoo  Biscoe           50            15.2              218        5700 male 
## 5 Gentoo  Biscoe           47.6          14.5              215        5400 male 
## 6 Gentoo  Biscoe           46.5          13.5              210        4550 fema…
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008) & species != "Adelie" ) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Gentoo  Biscoe           46.1          13.2              211        4500 fema…
## 2 Gentoo  Biscoe           50            16.3              230        5700 male 
## 3 Gentoo  Biscoe           48.7          14.1              210        4450 fema…
## 4 Gentoo  Biscoe           50            15.2              218        5700 male 
## 5 Gentoo  Biscoe           47.6          14.5              215        5400 male 
## 6 Gentoo  Biscoe           46.5          13.5              210        4550 fema…
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008), species == 'Adelie') %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008) & island !="Torgersen") %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           37.8          18.3              174        3400 fema…
## 2 Adelie  Biscoe           37.7          18.7              180        3600 male 
## 3 Adelie  Biscoe           35.9          19.2              189        3800 fema…
## 4 Adelie  Biscoe           38.2          18.1              185        3950 male 
## 5 Adelie  Biscoe           38.8          17.2              180        3800 male 
## 6 Adelie  Biscoe           35.3          18.9              187        3800 fema…
## # … with 1 more variable: year <dbl>
penguins %>% filter(year == 2007, island=="Torgersen") %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>

Sin dplyr

penguins[penguins$year==2007 | penguins$year==2008,] %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
penguins[penguins$year %in% c(2007,2008),c("species")] %>% head()
## # A tibble: 6 x 1
##   species
##   <chr>  
## 1 Adelie 
## 2 Adelie 
## 3 Adelie 
## 4 Adelie 
## 5 Adelie 
## 6 Adelie

distinct

penguins %>% 
  distinct(island, species)
## # A tibble: 5 x 2
##   species   island   
##   <chr>     <chr>    
## 1 Adelie    Torgersen
## 2 Adelie    Biscoe   
## 3 Adelie    Dream    
## 4 Gentoo    Biscoe   
## 5 Chinstrap Dream

slice

penguins %>% slice(100:109)
## # A tibble: 10 x 8
##    species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
##    <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl>
##  1 Adelie  Dream            43.2          18.5              192        4100
##  2 Adelie  Biscoe           35            17.9              192        3725
##  3 Adelie  Biscoe           41            20                203        4725
##  4 Adelie  Biscoe           37.7          16                183        3075
##  5 Adelie  Biscoe           37.8          20                190        4250
##  6 Adelie  Biscoe           37.9          18.6              193        2925
##  7 Adelie  Biscoe           39.7          18.9              184        3550
##  8 Adelie  Biscoe           38.6          17.2              199        3750
##  9 Adelie  Biscoe           38.2          20                190        3900
## 10 Adelie  Biscoe           38.1          17                181        3175
## # … with 2 more variables: sex <chr>, year <dbl>

filter & select

penguins %>% 
  filter(year == 2007, island=="Torgersen") %>%
  dplyr::select(island, bill_length_mm, body_mass_g, sex) %>%
  head(n=10)
## # A tibble: 10 x 4
##    island    bill_length_mm body_mass_g sex   
##    <chr>              <dbl>       <dbl> <chr> 
##  1 Torgersen           39.1        3750 male  
##  2 Torgersen           39.5        3800 female
##  3 Torgersen           40.3        3250 female
##  4 Torgersen           NA            NA <NA>  
##  5 Torgersen           36.7        3450 female
##  6 Torgersen           39.3        3650 male  
##  7 Torgersen           38.9        3625 female
##  8 Torgersen           39.2        4675 male  
##  9 Torgersen           34.1        3475 <NA>  
## 10 Torgersen           42          4250 <NA>
penguins %>%
  filter(year %in% c(2007,2008) & species != "Adelie") %>%
  select(species, island, bill_length_mm, body_mass_g, sex) %>%
  head(n=5)
## # A tibble: 5 x 5
##   species island bill_length_mm body_mass_g sex   
##   <chr>   <chr>           <dbl>       <dbl> <chr> 
## 1 Gentoo  Biscoe           46.1        4500 female
## 2 Gentoo  Biscoe           50          5700 male  
## 3 Gentoo  Biscoe           48.7        4450 female
## 4 Gentoo  Biscoe           50          5700 male  
## 5 Gentoo  Biscoe           47.6        5400 male
penguins %>%
  filter(year == 2007, island == "Torgersen" , sex != 'male') %>%
  dplyr::select(island, bill_length_mm, body_mass_g, sex) %>%
  head(n=10)
## # A tibble: 8 x 4
##   island    bill_length_mm body_mass_g sex   
##   <chr>              <dbl>       <dbl> <chr> 
## 1 Torgersen           39.5        3800 female
## 2 Torgersen           40.3        3250 female
## 3 Torgersen           36.7        3450 female
## 4 Torgersen           38.9        3625 female
## 5 Torgersen           41.1        3200 female
## 6 Torgersen           36.6        3700 female
## 7 Torgersen           38.7        3450 female
## 8 Torgersen           34.4        3325 female
penguins %>%
filter(body_mass_g > 3500, island=="Torgersen") %>%
dplyr::select(species,bill_length_mm, body_mass_g, sex, year) %>%
head(10)
## # A tibble: 10 x 5
##    species bill_length_mm body_mass_g sex     year
##    <chr>            <dbl>       <dbl> <chr>  <dbl>
##  1 Adelie            39.1        3750 male    2007
##  2 Adelie            39.5        3800 female  2007
##  3 Adelie            39.3        3650 male    2007
##  4 Adelie            38.9        3625 female  2007
##  5 Adelie            39.2        4675 male    2007
##  6 Adelie            42          4250 <NA>    2007
##  7 Adelie            37.8        3700 <NA>    2007
##  8 Adelie            38.6        3800 male    2007
##  9 Adelie            34.6        4400 male    2007
## 10 Adelie            36.6        3700 female  2007
penguins %>%
filter(year > 2007, species=='Adelie') %>%
dplyr::select(species, body_mass_g , bill_depth_mm)%>%
head(n=10)
## # A tibble: 10 x 3
##    species body_mass_g bill_depth_mm
##    <chr>         <dbl>         <dbl>
##  1 Adelie         3500          17.7
##  2 Adelie         4300          18.9
##  3 Adelie         3450          17.9
##  4 Adelie         4050          19.5
##  5 Adelie         2900          18.1
##  6 Adelie         3700          18.6
##  7 Adelie         3550          17.5
##  8 Adelie         3800          18.8
##  9 Adelie         2850          16.6
## 10 Adelie         3750          19.1

group_by

penguins %>% 
  group_by(year) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g,  probs = 0.25, na.rm = TRUE), 
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g,  probs = 0.75, na.rm = TRUE), 
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  ) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
##    year   min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <dbl> <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1  2007  2900 3525           3900 4125. 4600      6300
## 2  2008  2700 3612.          4200 4267. 4838.     6000
## 3  2009  2900 3500           4000 4210. 4850      6000
penguins %>% 
  group_by(species) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g,  probs = 0.25, na.rm = TRUE), 
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g,  probs = 0.75, na.rm = TRUE), 
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  ) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
##   species     min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <chr>     <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1 Adelie     2850 3350           3700 3701.  4000     4775
## 2 Chinstrap  2700 3488.          3700 3733.  3950     4800
## 3 Gentoo     3950 4700           5000 5076.  5500     6300
penguins %>%
  group_by( species , island) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g,  probs = 0.25, na.rm = TRUE),
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g,  probs = 0.75, na.rm = TRUE),
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  ) %>% head()
## `summarise()` regrouping output by 'species' (override with `.groups` argument)
## # A tibble: 5 x 8
## # Groups:   species [3]
##   species   island      min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <chr>     <chr>     <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1 Adelie    Biscoe     2850 3388.          3750 3710. 3975      4775
## 2 Adelie    Dream      2900 3388.          3575 3688. 3981.     4650
## 3 Adelie    Torgersen  2900 3338.          3700 3706. 4000      4700
## 4 Chinstrap Dream      2700 3488.          3700 3733. 3950      4800
## 5 Gentoo    Biscoe     3950 4700           5000 5076. 5500      6300
penguins %>%
  group_by(sex, island) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  ) %>% head()
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
## # A tibble: 6 x 8
## # Groups:   sex [2]
##   sex    island      min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <chr>  <chr>     <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1 female Biscoe     2850 3819.         4588. 4319. 4812.     5200
## 2 female Dream      2700 3300          3450  3446. 3650      4150
## 3 female Torgersen  2900 3200          3400  3396. 3606.     3800
## 4 male   Biscoe     3550 4738.         5350  5105. 5600      6300
## 5 male   Dream      3250 3756.         3950  3987. 4250      4800
## 6 male   Torgersen  3325 3788.         4000  4035. 4275      4700
penguins %>%
  group_by( species , island) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g,  probs = 0.25, na.rm = TRUE),
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g,  probs = 0.75, na.rm = TRUE),
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  ) %>% head()
## `summarise()` regrouping output by 'species' (override with `.groups` argument)
## # A tibble: 5 x 8
## # Groups:   species [3]
##   species   island      min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <chr>     <chr>     <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1 Adelie    Biscoe     2850 3388.          3750 3710. 3975      4775
## 2 Adelie    Dream      2900 3388.          3575 3688. 3981.     4650
## 3 Adelie    Torgersen  2900 3338.          3700 3706. 4000      4700
## 4 Chinstrap Dream      2700 3488.          3700 3733. 3950      4800
## 5 Gentoo    Biscoe     3950 4700           5000 5076. 5500      6300
penguins %>%
  group_by(sex, island) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
            "max Q4" = max(body_mass_g, na.rm = TRUE)
 ) %>% head()
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
## # A tibble: 6 x 8
## # Groups:   sex [2]
##   sex    island      min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <chr>  <chr>     <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1 female Biscoe     2850 3819.         4588. 4319. 4812.     5200
## 2 female Dream      2700 3300          3450  3446. 3650      4150
## 3 female Torgersen  2900 3200          3400  3396. 3606.     3800
## 4 male   Biscoe     3550 4738.         5350  5105. 5600      6300
## 5 male   Dream      3250 3756.         3950  3987. 4250      4800
## 6 male   Torgersen  3325 3788.         4000  4035. 4275      4700
penguins %>%
group_by(sex) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
            "max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
##   sex      min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <chr>  <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1 female  2700  3350          3650 3862. 4550      5200
## 2 male    3250  3900          4300 4546. 5312.     6300
## 3 <NA>    2975  3475          4100 4006. 4650      4875
penguins %>%
  group_by(year) %>%
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g,  probs = 0.25, na.rm = TRUE),
            "median (Q2)" = median(body_mass_g,  na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            "Q3" = quantile(body_mass_g,  probs = 0.75, na.rm = TRUE),
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  ) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
##    year   min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <dbl> <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1  2007  2900 3525           3900 4125. 4600      6300
## 2  2008  2700 3612.          4200 4267. 4838.     6000
## 3  2009  2900 3500           4000 4210. 4850      6000
penguins %>% 
  group_by(species) %>% 
  summarise(last(body_mass_g))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
##   species   `last(body_mass_g)`
##   <chr>                   <dbl>
## 1 Adelie                   4000
## 2 Chinstrap                3775
## 3 Gentoo                   5400
penguins %>% 
  group_by(island, species) %>% 
  summarise("mean body mass" = mean(body_mass_g), "median body mass" = median(body_mass_g), "cv body mass" = sd(body_mass_g) / mean(body_mass_g))
## `summarise()` regrouping output by 'island' (override with `.groups` argument)
## # A tibble: 5 x 5
## # Groups:   island [3]
##   island    species   `mean body mass` `median body mass` `cv body mass`
##   <chr>     <chr>                <dbl>              <dbl>          <dbl>
## 1 Biscoe    Adelie               3710.               3750          0.131
## 2 Biscoe    Gentoo                 NA                  NA         NA    
## 3 Dream     Adelie               3688.               3575          0.123
## 4 Dream     Chinstrap            3733.               3700          0.103
## 5 Torgersen Adelie                 NA                  NA         NA

mutate

penguins %>% 
  mutate(body_mass_Kg = body_mass_g / 1000) %>%
  head()
## # A tibble: 6 x 9
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 2 more variables: year <dbl>, body_mass_Kg <dbl>
penguins %>%
  mutate(bill_length_cm = bill_length_mm / 10) %>%
  head()
## # A tibble: 6 x 9
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 2 more variables: year <dbl>, bill_length_cm <dbl>
penguins %>%
  mutate(proportion = flipper_length_mm / bill_length_mm ) %>%
  head()
## # A tibble: 6 x 9
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 2 more variables: year <dbl>, proportion <dbl>
penguins %>% 
  mutate(sexo=ifelse(sex=="male",1,2))
## # A tibble: 344 x 9
##    species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
##    <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl>
##  1 Adelie  Torge…           39.1          18.7              181        3750
##  2 Adelie  Torge…           39.5          17.4              186        3800
##  3 Adelie  Torge…           40.3          18                195        3250
##  4 Adelie  Torge…           NA            NA                 NA          NA
##  5 Adelie  Torge…           36.7          19.3              193        3450
##  6 Adelie  Torge…           39.3          20.6              190        3650
##  7 Adelie  Torge…           38.9          17.8              181        3625
##  8 Adelie  Torge…           39.2          19.6              195        4675
##  9 Adelie  Torge…           34.1          18.1              193        3475
## 10 Adelie  Torge…           42            20.2              190        4250
## # … with 334 more rows, and 3 more variables: sex <chr>, year <dbl>, sexo <dbl>
penguins %>% 
  mutate(bill_length_cm = ifelse(sex == "male", bill_length_mm / 10, bill_length_mm / 100))
## # A tibble: 344 x 9
##    species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
##    <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl>
##  1 Adelie  Torge…           39.1          18.7              181        3750
##  2 Adelie  Torge…           39.5          17.4              186        3800
##  3 Adelie  Torge…           40.3          18                195        3250
##  4 Adelie  Torge…           NA            NA                 NA          NA
##  5 Adelie  Torge…           36.7          19.3              193        3450
##  6 Adelie  Torge…           39.3          20.6              190        3650
##  7 Adelie  Torge…           38.9          17.8              181        3625
##  8 Adelie  Torge…           39.2          19.6              195        4675
##  9 Adelie  Torge…           34.1          18.1              193        3475
## 10 Adelie  Torge…           42            20.2              190        4250
## # … with 334 more rows, and 3 more variables: sex <chr>, year <dbl>,
## #   bill_length_cm <dbl>

mutate, filter & arrange

penguins %>% 
  mutate(body_mass_Kg = body_mass_g / 1000) %>%
  filter(year==2008, island=="Torgersen") %>%
  arrange(desc(body_mass_Kg)) %>%
  head()
## # A tibble: 6 x 9
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           42.9          17.6              196        4700 male 
## 2 Adelie  Torge…           41.8          19.4              198        4450 male 
## 3 Adelie  Torge…           42.8          18.5              195        4250 male 
## 4 Adelie  Torge…           35.1          19.4              193        4200 male 
## 5 Adelie  Torge…           45.8          18.9              197        4150 male 
## 6 Adelie  Torge…           42.1          19.1              195        4000 male 
## # … with 2 more variables: year <dbl>, body_mass_Kg <dbl>
penguins %>%
  mutate(body_mass_Kg = body_mass_g / 100) %>%
  filter(species =='Adelie', island =='Biscoe', year == 2009, sex=="male", body_mass_g <= 5000) %>%
  arrange(desc(flipper_length_mm, body_mass_Kg)) %>%
  head()
## # A tibble: 6 x 9
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Biscoe           41            20                203        4725 male 
## 2 Adelie  Biscoe           43.2          19                197        4775 male 
## 3 Adelie  Biscoe           42.2          19.5              197        4275 male 
## 4 Adelie  Biscoe           42.7          18.3              196        4075 male 
## 5 Adelie  Biscoe           45.6          20.3              191        4600 male 
## 6 Adelie  Biscoe           37.8          20                190        4250 male 
## # … with 2 more variables: year <dbl>, body_mass_Kg <dbl>

rename

rename(penguins, "flipper length (mm)"=flipper_length_mm, "body mass (g)"=body_mass_g) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm `flipper length… `body mass (g)`
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>           <dbl>
## 1 Adelie  Torge…           39.1          18.7              181            3750
## 2 Adelie  Torge…           39.5          17.4              186            3800
## 3 Adelie  Torge…           40.3          18                195            3250
## 4 Adelie  Torge…           NA            NA                 NA              NA
## 5 Adelie  Torge…           36.7          19.3              193            3450
## 6 Adelie  Torge…           39.3          20.6              190            3650
## # … with 2 more variables: sex <chr>, year <dbl>
rename(penguins, "Longitud Aleta (mm)"=flipper_length_mm, "Masa Corporal (g)"=body_mass_g, "profundidad pico m" = bill_depth_mm) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm `profundidad pi… `Longitud Aleta…
##   <chr>   <chr>           <dbl>            <dbl>            <dbl>
## 1 Adelie  Torge…           39.1             18.7              181
## 2 Adelie  Torge…           39.5             17.4              186
## 3 Adelie  Torge…           40.3             18                195
## 4 Adelie  Torge…           NA               NA                 NA
## 5 Adelie  Torge…           36.7             19.3              193
## 6 Adelie  Torge…           39.3             20.6              190
## # … with 3 more variables: `Masa Corporal (g)` <dbl>, sex <chr>, year <dbl>

sample_n

sample_n(penguins, 5)
## # A tibble: 5 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Gentoo  Biscoe           48.4          14.4              203        4625 fema…
## 2 Adelie  Torge…           39            17.1              191        3050 fema…
## 3 Adelie  Biscoe           36.5          16.6              181        2850 fema…
## 4 Chinst… Dream            50.7          19.7              203        4050 male 
## 5 Gentoo  Biscoe           48.7          15.7              208        5350 male 
## # … with 1 more variable: year <dbl>

sample_frac

sample_frac(penguins, 0.2) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Chinst… Dream            45.7          17.3              193        3600 fema…
## 2 Adelie  Dream            39.2          18.6              190        4250 male 
## 3 Adelie  Torge…           34.4          18.4              184        3325 fema…
## 4 Gentoo  Biscoe           50            15.3              220        5550 male 
## 5 Gentoo  Biscoe           43.5          15.2              213        4650 fema…
## 6 Adelie  Dream            37.8          18.1              193        3750 male 
## # … with 1 more variable: year <dbl>

select

select(penguins, species, flipper_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
##   species flipper_length_mm body_mass_g sex   
##   <chr>               <dbl>       <dbl> <chr> 
## 1 Adelie                181        3750 male  
## 2 Adelie                186        3800 female
## 3 Adelie                195        3250 female
## 4 Adelie                 NA          NA <NA>  
## 5 Adelie                193        3450 female
## 6 Adelie                190        3650 male
select(penguins, !flipper_length_mm) %>% head()
## # A tibble: 6 x 7
##   species island    bill_length_mm bill_depth_mm body_mass_g sex     year
##   <chr>   <chr>              <dbl>         <dbl>       <dbl> <chr>  <dbl>
## 1 Adelie  Torgersen           39.1          18.7        3750 male    2007
## 2 Adelie  Torgersen           39.5          17.4        3800 female  2007
## 3 Adelie  Torgersen           40.3          18          3250 female  2007
## 4 Adelie  Torgersen           NA            NA            NA <NA>    2007
## 5 Adelie  Torgersen           36.7          19.3        3450 female  2007
## 6 Adelie  Torgersen           39.3          20.6        3650 male    2007
select(penguins, -species, -flipper_length_mm, -body_mass_g, -sex) %>% head()
## # A tibble: 6 x 4
##   island    bill_length_mm bill_depth_mm  year
##   <chr>              <dbl>         <dbl> <dbl>
## 1 Torgersen           39.1          18.7  2007
## 2 Torgersen           39.5          17.4  2007
## 3 Torgersen           40.3          18    2007
## 4 Torgersen           NA            NA    2007
## 5 Torgersen           36.7          19.3  2007
## 6 Torgersen           39.3          20.6  2007
penguins %>% select(starts_with("bill")) %>% head()
## # A tibble: 6 x 2
##   bill_length_mm bill_depth_mm
##            <dbl>         <dbl>
## 1           39.1          18.7
## 2           39.5          17.4
## 3           40.3          18  
## 4           NA            NA  
## 5           36.7          19.3
## 6           39.3          20.6
penguins %>% select(ends_with("mm")) %>% head()
## # A tibble: 6 x 3
##   bill_length_mm bill_depth_mm flipper_length_mm
##            <dbl>         <dbl>             <dbl>
## 1           39.1          18.7               181
## 2           39.5          17.4               186
## 3           40.3          18                 195
## 4           NA            NA                  NA
## 5           36.7          19.3               193
## 6           39.3          20.6               190
penguins %>% select(contains("length")) %>% head()
## # A tibble: 6 x 2
##   bill_length_mm flipper_length_mm
##            <dbl>             <dbl>
## 1           39.1               181
## 2           39.5               186
## 3           40.3               195
## 4           NA                  NA
## 5           36.7               193
## 6           39.3               190

filter y select

dplyr::select(penguins %>% filter(year == 2007), species, flipper_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
##   species flipper_length_mm body_mass_g sex   
##   <chr>               <dbl>       <dbl> <chr> 
## 1 Adelie                181        3750 male  
## 2 Adelie                186        3800 female
## 3 Adelie                195        3250 female
## 4 Adelie                 NA          NA <NA>  
## 5 Adelie                193        3450 female
## 6 Adelie                190        3650 male
dplyr::select(penguins %>% filter(sex == "female"), species, bill_depth_mm ) %>% head()
## # A tibble: 6 x 2
##   species bill_depth_mm
##   <chr>           <dbl>
## 1 Adelie           17.4
## 2 Adelie           18  
## 3 Adelie           19.3
## 4 Adelie           17.8
## 5 Adelie           17.6
## 6 Adelie           17.8
dplyr::select(penguins %>% filter(sex != 'male', species != 'Adelie' , body_mass_g >= 5150), sex, species, body_mass_g) %>% head()
## # A tibble: 3 x 3
##   sex    species body_mass_g
##   <chr>  <chr>         <dbl>
## 1 female Gentoo         5150
## 2 female Gentoo         5200
## 3 female Gentoo         5200
dplyr::select(penguins %>% filter(year == 2008), island, bill_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
##   island bill_length_mm body_mass_g sex   
##   <chr>           <dbl>       <dbl> <chr> 
## 1 Biscoe           39.6        3500 female
## 2 Biscoe           40.1        4300 male  
## 3 Biscoe           35          3450 female
## 4 Biscoe           42          4050 male  
## 5 Biscoe           34.5        2900 female
## 6 Biscoe           41.4        3700 male
dplyr::select(penguins %>% filter(year == 2007, island == "Torgersen"), species, flipper_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
##   species flipper_length_mm body_mass_g sex   
##   <chr>               <dbl>       <dbl> <chr> 
## 1 Adelie                181        3750 male  
## 2 Adelie                186        3800 female
## 3 Adelie                195        3250 female
## 4 Adelie                 NA          NA <NA>  
## 5 Adelie                193        3450 female
## 6 Adelie                190        3650 male

summarize

penguins %>%
  filter(year == 2007) %>% 
  summarize("min" = min(body_mass_g, na.rm = TRUE),
            "Q1" = quantile(body_mass_g,  probs = 0.25, na.rm = TRUE), 
            "median (Q2)" = median(body_mass_g, na.rm = TRUE),
            "mean" = mean(body_mass_g, na.rm = TRUE),
            Q3 = quantile(body_mass_g,  probs = 0.75, na.rm = TRUE), 
            "max Q4" = max(body_mass_g, na.rm = TRUE)
  )
## # A tibble: 1 x 6
##     min    Q1 `median (Q2)`  mean    Q3 `max Q4`
##   <dbl> <dbl>         <dbl> <dbl> <dbl>    <dbl>
## 1  2900  3525          3900 4125.  4600     6300

transmute

penguins %>%
  transmute(body_mass_kg = body_mass_g /1000)
## # A tibble: 344 x 1
##    body_mass_kg
##           <dbl>
##  1         3.75
##  2         3.8 
##  3         3.25
##  4        NA   
##  5         3.45
##  6         3.65
##  7         3.62
##  8         4.68
##  9         3.48
## 10         4.25
## # … with 334 more rows
penguins %>%
transmute(body_mass_kg = body_mass_g /1000, species)
## # A tibble: 344 x 2
##    body_mass_kg species
##           <dbl> <chr>  
##  1         3.75 Adelie 
##  2         3.8  Adelie 
##  3         3.25 Adelie 
##  4        NA    Adelie 
##  5         3.45 Adelie 
##  6         3.65 Adelie 
##  7         3.62 Adelie 
##  8         4.68 Adelie 
##  9         3.48 Adelie 
## 10         4.25 Adelie 
## # … with 334 more rows

forcats

library(forcats)

fct_reorder

Reordena un factor por otra variable.

fct_infreq

Reordena un factor por la frecuencia de valores.

penguins %>%
  count(island, sort = TRUE, name="cantidad") %>%
  mutate(island = fct_infreq(island))
## # A tibble: 3 x 2
##   island    cantidad
##   <fct>        <int>
## 1 Biscoe         168
## 2 Dream          124
## 3 Torgersen       52
library(scales)
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
penguins %>%
  count(island, sort = TRUE, name="ammount") %>%
  mutate(island = fct_infreq(island)) %>%
  mutate("percent" = percent(ammount / sum(ammount), accuracy = 0.01))
## # A tibble: 3 x 3
##   island    ammount percent
##   <fct>       <int> <chr>  
## 1 Biscoe        168 48.84% 
## 2 Dream         124 36.05% 
## 3 Torgersen      52 15.12%
penguins %>% 
  group_by(species) %>%
  mutate(underweight = if_else(body_mass_g > quantile(body_mass_g, 0.75, na.rm = TRUE) + 1.5 * IQR(body_mass_g, na.rm = TRUE), T, F)) %>%
  filter(underweight == TRUE)
## # A tibble: 1 x 9
## # Groups:   species [1]
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Chinst… Dream              52          20.7              210        4800 male 
## # … with 2 more variables: year <dbl>, underweight <lgl>
penguins %>% mutate(index = row_number())
## # A tibble: 344 x 9
##    species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
##    <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl>
##  1 Adelie  Torge…           39.1          18.7              181        3750
##  2 Adelie  Torge…           39.5          17.4              186        3800
##  3 Adelie  Torge…           40.3          18                195        3250
##  4 Adelie  Torge…           NA            NA                 NA          NA
##  5 Adelie  Torge…           36.7          19.3              193        3450
##  6 Adelie  Torge…           39.3          20.6              190        3650
##  7 Adelie  Torge…           38.9          17.8              181        3625
##  8 Adelie  Torge…           39.2          19.6              195        4675
##  9 Adelie  Torge…           34.1          18.1              193        3475
## 10 Adelie  Torge…           42            20.2              190        4250
## # … with 334 more rows, and 3 more variables: sex <chr>, year <dbl>,
## #   index <int>

fct_relevel

Cambia el orden de un factor a mano.

fct_lump

Colapsar los valores menos / más frecuentes de un factor en “otro”.

Latitud y longitud de las islas

antarctica <- map_data("world", region = "Antarctica")

df_penguinloc <-
  tibble(
    island = c("Dream", "Biscoe", "Torgersen"),
    lat_y = c(-64.7333, -65.4333, -64.7666636),
    long_x = c(-64.2333, -65.5000, -64.083333)
  ) 

df_penguinloc
## # A tibble: 3 x 3
##   island    lat_y long_x
##   <chr>     <dbl>  <dbl>
## 1 Dream     -64.7  -64.2
## 2 Biscoe    -65.4  -65.5
## 3 Torgersen -64.8  -64.1

ggplot

\[ggplot(dataset)\text{+}geom\left((aes(\cdot)\right)\text{+}options(\cdot)\text{+}facets(\cdot){\leftrightarrow}ggplot\left(dataset,aes(\cdot)\right)\text{+}geom(\cdot)+options(\cdot)\text{+}facets(\cdot)\]

library('ggplot2')

penguins_2007 <- penguins %>% filter(year == 2007)

ggplot(penguins_2007, 
       aes(x = flipper_length_mm, y = body_mass_g, 
       color = species,
       shape = island)) + 
  geom_point() +
  scale_x_log10() + 
  labs(
    title = "Body mass (grs.) vs Flipper length (mms.) by Species",
    subtitle = "year 2007",
    caption = " Summary of data, ref 2007",
    x = "Longitud de la aleta (mms.)",
    y = "Masa corporal (grs.)"
  )
## Warning: Removed 1 rows containing missing values (geom_point).

library('ggplot2')

penguins_2007 <- penguins %>% filter(year == 2007)

ggplot(penguins_2007, 
       aes(x = flipper_length_mm, y = body_mass_g, 
       color = species,
       shape = island)) + 
  geom_point() +
  scale_x_log10() + 
  facet_wrap(~ species) + 
  labs(
    title = "Body mass (grs.) vs Flipper length (mms.) by Species",
    subtitle = "year 2007",
    caption = " Summary of data, ref 2007",
    x = "Longitud de la aleta (mms.)",
    y = "Masa corporal (grs.)"
  )
## Warning: Removed 1 rows containing missing values (geom_point).

tibble

Un tibble es una versión moderna de un data frame que trabaja de manera perezosa (es decir, que realiza menos operaciones) evitando problemas comunes y supuestos que un data frame puede llegar a asumir; entre otras:

  • No coercionan automáticamente los caracteres a factores

  • No crean nombres para las observaciones

  • No cambian los nombres de columnas que sean nombres no sintácticos

library(tibble)
as_tibble(penguins) %>% head()
## # A tibble: 6 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## 3 Adelie  Torge…           40.3          18                195        3250 fema…
## 4 Adelie  Torge…           NA            NA                 NA          NA <NA> 
## 5 Adelie  Torge…           36.7          19.3              193        3450 fema…
## 6 Adelie  Torge…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
tibble(x = penguins$flipper_length_mm, y = penguins$body_mass_g, z = x / y)
## # A tibble: 344 x 3
##        x     y       z
##    <dbl> <dbl>   <dbl>
##  1   181  3750  0.0483
##  2   186  3800  0.0489
##  3   195  3250  0.06  
##  4    NA    NA NA     
##  5   193  3450  0.0559
##  6   190  3650  0.0521
##  7   181  3625  0.0499
##  8   195  4675  0.0417
##  9   193  3475  0.0555
## 10   190  4250  0.0447
## # … with 334 more rows
tribble(
  ~x, ~y,  ~z,
  "a", 2,  3.6,
  "b", 1,  8.5
)
## # A tibble: 2 x 3
##   x         y     z
##   <chr> <dbl> <dbl>
## 1 a         2   3.6
## 2 b         1   8.5
bind_rows(penguins[1,],penguins[2,])
## # A tibble: 2 x 8
##   species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie  Torge…           39.1          18.7              181        3750 male 
## 2 Adelie  Torge…           39.5          17.4              186        3800 fema…
## # … with 1 more variable: year <dbl>
bind_cols(penguins[,1],penguins[,2]) %>% head()
## # A tibble: 6 x 2
##   species island   
##   <chr>   <chr>    
## 1 Adelie  Torgersen
## 2 Adelie  Torgersen
## 3 Adelie  Torgersen
## 4 Adelie  Torgersen
## 5 Adelie  Torgersen
## 6 Adelie  Torgersen

tidyr

penguins_spread <- spread(data = penguins, key = island, value = body_mass_g)

head(penguins_spread)
## # A tibble: 6 x 9
##   species bill_length_mm bill_depth_mm flipper_length_… sex    year Biscoe Dream
##   <chr>            <dbl>         <dbl>            <dbl> <chr> <dbl>  <dbl> <dbl>
## 1 Adelie            32.1          15.5              188 fema…  2009     NA  3050
## 2 Adelie            33.1          16.1              178 fema…  2008     NA  2900
## 3 Adelie            33.5          19                190 fema…  2008     NA    NA
## 4 Adelie            34            17.1              185 fema…  2008     NA  3400
## 5 Adelie            34.1          18.1              193 <NA>   2007     NA    NA
## 6 Adelie            34.4          18.4              184 fema…  2007     NA    NA
## # … with 1 more variable: Torgersen <dbl>
gather(data = penguins_spread, key = "island", value = "body_mass_g", 7:9) %>% 
  filter(is.na(body_mass_g)==FALSE)
## # A tibble: 342 x 8
##    species bill_length_mm bill_depth_mm flipper_length_… sex    year island
##    <chr>            <dbl>         <dbl>            <dbl> <chr> <dbl> <chr> 
##  1 Adelie            34.5          18.1              187 fema…  2008 Biscoe
##  2 Adelie            35            17.9              190 fema…  2008 Biscoe
##  3 Adelie            35            17.9              192 fema…  2009 Biscoe
##  4 Adelie            35.3          18.9              187 fema…  2007 Biscoe
##  5 Adelie            35.5          16.2              195 fema…  2008 Biscoe
##  6 Adelie            35.7          16.9              185 fema…  2008 Biscoe
##  7 Adelie            35.9          19.2              189 fema…  2007 Biscoe
##  8 Adelie            36.4          17.1              184 fema…  2008 Biscoe
##  9 Adelie            36.5          16.6              181 fema…  2008 Biscoe
## 10 Adelie            37.6          17                185 fema…  2008 Biscoe
## # … with 332 more rows, and 1 more variable: body_mass_g <dbl>
penguins_union <- unite(data = penguins,
                        col = species_island,
                        sep = "/",
                        species,island
                  )

head(penguins_union)
## # A tibble: 6 x 7
##   species_island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex  
##   <chr>                   <dbl>         <dbl>            <dbl>       <dbl> <chr>
## 1 Adelie/Torger…           39.1          18.7              181        3750 male 
## 2 Adelie/Torger…           39.5          17.4              186        3800 fema…
## 3 Adelie/Torger…           40.3          18                195        3250 fema…
## 4 Adelie/Torger…           NA            NA                 NA          NA <NA> 
## 5 Adelie/Torger…           36.7          19.3              193        3450 fema…
## 6 Adelie/Torger…           39.3          20.6              190        3650 male 
## # … with 1 more variable: year <dbl>
separate(data =  penguins_union, 
         col  =  species_island,  
         into =  c("species", "island"), 
         sep  =  "/")
## # A tibble: 344 x 8
##    species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
##    <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl>
##  1 Adelie  Torge…           39.1          18.7              181        3750
##  2 Adelie  Torge…           39.5          17.4              186        3800
##  3 Adelie  Torge…           40.3          18                195        3250
##  4 Adelie  Torge…           NA            NA                 NA          NA
##  5 Adelie  Torge…           36.7          19.3              193        3450
##  6 Adelie  Torge…           39.3          20.6              190        3650
##  7 Adelie  Torge…           38.9          17.8              181        3625
##  8 Adelie  Torge…           39.2          19.6              195        4675
##  9 Adelie  Torge…           34.1          18.1              193        3475
## 10 Adelie  Torge…           42            20.2              190        4250
## # … with 334 more rows, and 2 more variables: sex <chr>, year <dbl>
tidyr::expand_grid(penguins, z = 1:3)
## # A tibble: 1,032 x 9
##    species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
##    <chr>   <chr>           <dbl>         <dbl>            <dbl>       <dbl>
##  1 Adelie  Torge…           39.1          18.7              181        3750
##  2 Adelie  Torge…           39.1          18.7              181        3750
##  3 Adelie  Torge…           39.1          18.7              181        3750
##  4 Adelie  Torge…           39.5          17.4              186        3800
##  5 Adelie  Torge…           39.5          17.4              186        3800
##  6 Adelie  Torge…           39.5          17.4              186        3800
##  7 Adelie  Torge…           40.3          18                195        3250
##  8 Adelie  Torge…           40.3          18                195        3250
##  9 Adelie  Torge…           40.3          18                195        3250
## 10 Adelie  Torge…           NA            NA                 NA          NA
## # … with 1,022 more rows, and 3 more variables: sex <chr>, year <dbl>, z <int>
species <- data_frame("specie" = c("Adelie", "Chinstrap", "Gentoo"))
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
sexes <- data_frame("sex" = c("male", "female"))
crossing(species, sexes)
## # A tibble: 6 x 2
##   specie    sex   
##   <chr>     <chr> 
## 1 Adelie    female
## 2 Adelie    male  
## 3 Chinstrap female
## 4 Chinstrap male  
## 5 Gentoo    female
## 6 Gentoo    male
crossing(islands = penguins$island, species = penguins$species)
## # A tibble: 9 x 2
##   islands   species  
##   <chr>     <chr>    
## 1 Biscoe    Adelie   
## 2 Biscoe    Chinstrap
## 3 Biscoe    Gentoo   
## 4 Dream     Adelie   
## 5 Dream     Chinstrap
## 6 Dream     Gentoo   
## 7 Torgersen Adelie   
## 8 Torgersen Chinstrap
## 9 Torgersen Gentoo
tidyr::nesting(islands = penguins$island, species = penguins$species)
## # A tibble: 5 x 2
##   islands   species  
##   <chr>     <chr>    
## 1 Biscoe    Adelie   
## 2 Biscoe    Gentoo   
## 3 Dream     Adelie   
## 4 Dream     Chinstrap
## 5 Torgersen Adelie
penguins %>% tidyr::expand(body_mass_g)
## # A tibble: 95 x 1
##    body_mass_g
##          <dbl>
##  1        2700
##  2        2850
##  3        2900
##  4        2925
##  5        2975
##  6        3000
##  7        3050
##  8        3075
##  9        3100
## 10        3150
## # … with 85 more rows

Conteo de pingüinos según especie

df_penguinloc <- penguins %>% 
  group_by(island) %>% 
  summarise(amount = n(), .groups = 'drop') %>% 
  left_join(df_penguinloc, by = "island")

df_penguinloc
## # A tibble: 3 x 4
##   island    amount lat_y long_x
##   <chr>      <int> <dbl>  <dbl>
## 1 Biscoe       168 -65.4  -65.5
## 2 Dream        124 -64.7  -64.2
## 3 Torgersen     52 -64.8  -64.1

Gráfica de frecuencias según especie de pingüinos

d <- penguins %>% filter(year==2009) %>% 
  mutate(island = factor(island), 
         island = factor(island, levels = rev(levels(island)))) %>%  
  ggplot() +
  stat_count(aes(island, fill = species), alpha = 0.8) +
  annotate("text", y=3,  x= "Torgersen", label= "Torgersen", color = "#1874CD")+
  annotate("text", y=3,  x= "Dream", label= "Dream", color = "#c02728")+
  annotate("text", y=3,  x= "Biscoe", label= "Biscoe", color = "#53868B")+
  scale_fill_manual(values = c("#66c2a5","#fc8d62","#8da0cb")) +
  scale_y_reverse()+
  labs(caption = "Source: Gorman, Williams and Fraser, 2014") +
  theme_minimal() +
  theme(legend.position = c(0.2,0.3),
        axis.title = element_blank(),
        axis.text.x = element_blank(),
        panel.grid = element_blank(),
        plot.background = element_rect(fill="#f9f9f9", color = "#f9f9f9"))
d

Mapa de las localidades de las especies de pingüinos

p <- ggplot(antarctica, aes(long, lat, group = group)) +
  geom_polygon(fill = "#506B8E", alpha = .8) +
  coord_map("ortho", orientation = c(-90, 0, 0),
            xlim = c(-62, -55),
            ylim = c(-75, -60)) +
  geom_text_repel(df_penguinloc, mapping=aes(long_x, lat_y, label = island), 
                  group=1, color = c("#53868B", "#c02728", "#1874CD"), 
                   box.padding = 0.5,
                   nudge_y = 1,  nudge_x = -2, min.segment.length = 0) +
  geom_point(df_penguinloc, mapping=aes(long_x, lat_y,  
                                        group = 1, 
                                        colour = island), 
             alpha =.7)+
  scale_color_manual(values = c("#53868B", "#c02728", "#1874CD"))+
  labs(title = "Penguins in Palmer Archipelago",
       subtitle = "Recorded penguins in 2009 and their nesting Islands") +
  theme_map() +
  theme(legend.position = "none",
        plot.title = element_text(hjust = 0.5, size = 20),
        plot.subtitle =  element_text(hjust = 0.5),
        plot.background = element_rect(fill="#f9f9f9", color = "#f9f9f9"))
p

Mapa insertado

inset <- ggplot(antarctica, aes(long, lat, group = group)) +
  geom_polygon(fill = "#506B8E", alpha = .5) +
  coord_map("ortho", orientation = c(-90, 0, 0)) +
  geom_point(df_penguinloc, mapping=aes(long_x, lat_y,  
                                        group = island, 
                                        colour = island), 
             alpha =.5, size = 1)+
  annotate("rect", color="black", fill = "transparent",
           xmin = -68, xmax = -54,
           ymin = -75, ymax = -60)+
  labs(title = "Antarctica") +
  theme_map() +
  theme(legend.position = "none",
        panel.grid.major.y = element_line(colour="grey"),
        plot.title = element_text(hjust = 0.5),
        plot.background = element_rect(fill="#f9f9f9", color = "#f9f9f9"))
inset

Dibujar juntos el recuadro y el mapa principal

a <- ggdraw(p) +
  draw_plot(inset, x=.47, y=.38, width=.5, height=.4)
a

Mapa conjunto

p1 <- plot_grid(a,d, ncol = 1, rel_widths = c(4, 2), rel_heights = c(2,1))+
  theme(plot.background = element_rect(fill="#f9f9f9")) +
  labs(title = "Penguins in Palmer Archipelago")

penguin <-here("/penguin.jfif")
                    
p2 <- ggdraw() + 
  draw_plot(p1) +
  draw_image(penguin, x = 0.24, y = 0.38, hjust = 1, width = 0.20, height = 0.25)
p2