library(tidyverse)
library(visdat)
library(cowplot)
library(ggrepel)
library(mapproj)
library(ggthemes)
library(here)
library(extrafont)
library(extrafont)
library(knitr)
library(magick)
# Get the data
penguins <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv')
## Parsed with column specification:
## cols(
## species = col_character(),
## island = col_character(),
## bill_length_mm = col_double(),
## bill_depth_mm = col_double(),
## flipper_length_mm = col_double(),
## body_mass_g = col_double(),
## sex = col_character(),
## year = col_double()
## )
kable(head(penguins, n=15), caption = "Penguins data. Source: Gorman, Williams and Fraser, 2014 ")
species | island | bill_length_mm | bill_depth_mm | flipper_length_mm | body_mass_g | sex | year |
---|---|---|---|---|---|---|---|
Adelie | Torgersen | 39.1 | 18.7 | 181 | 3750 | male | 2007 |
Adelie | Torgersen | 39.5 | 17.4 | 186 | 3800 | female | 2007 |
Adelie | Torgersen | 40.3 | 18.0 | 195 | 3250 | female | 2007 |
Adelie | Torgersen | NA | NA | NA | NA | NA | 2007 |
Adelie | Torgersen | 36.7 | 19.3 | 193 | 3450 | female | 2007 |
Adelie | Torgersen | 39.3 | 20.6 | 190 | 3650 | male | 2007 |
Adelie | Torgersen | 38.9 | 17.8 | 181 | 3625 | female | 2007 |
Adelie | Torgersen | 39.2 | 19.6 | 195 | 4675 | male | 2007 |
Adelie | Torgersen | 34.1 | 18.1 | 193 | 3475 | NA | 2007 |
Adelie | Torgersen | 42.0 | 20.2 | 190 | 4250 | NA | 2007 |
Adelie | Torgersen | 37.8 | 17.1 | 186 | 3300 | NA | 2007 |
Adelie | Torgersen | 37.8 | 17.3 | 180 | 3700 | NA | 2007 |
Adelie | Torgersen | 41.1 | 17.6 | 182 | 3200 | female | 2007 |
Adelie | Torgersen | 38.6 | 21.2 | 191 | 3800 | male | 2007 |
Adelie | Torgersen | 34.6 | 21.1 | 198 | 4400 | male | 2007 |
str(penguins)
## tibble [344 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ species : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
## $ island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : num [1:344] 3750 3800 3250 NA 3450 ...
## $ sex : chr [1:344] "male" "female" "female" NA ...
## $ year : num [1:344] 2007 2007 2007 2007 2007 ...
## - attr(*, "spec")=
## .. cols(
## .. species = col_character(),
## .. island = col_character(),
## .. bill_length_mm = col_double(),
## .. bill_depth_mm = col_double(),
## .. flipper_length_mm = col_double(),
## .. body_mass_g = col_double(),
## .. sex = col_character(),
## .. year = col_double()
## .. )
str(penguins %>% group_by(year))
## tibble [344 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ species : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
## $ island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : num [1:344] 3750 3800 3250 NA 3450 ...
## $ sex : chr [1:344] "male" "female" "female" NA ...
## $ year : num [1:344] 2007 2007 2007 2007 2007 ...
## - attr(*, "spec")=
## .. cols(
## .. species = col_character(),
## .. island = col_character(),
## .. bill_length_mm = col_double(),
## .. bill_depth_mm = col_double(),
## .. flipper_length_mm = col_double(),
## .. body_mass_g = col_double(),
## .. sex = col_character(),
## .. year = col_double()
## .. )
## - attr(*, "groups")= tibble [3 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ year : num [1:3] 2007 2008 2009
## ..$ .rows: list<int> [1:3]
## .. ..$ : int [1:110] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ : int [1:114] 51 52 53 54 55 56 57 58 59 60 ...
## .. ..$ : int [1:120] 101 102 103 104 105 106 107 108 109 110 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
\[dataset\text{%>%}f_1(var_1,var_2,\ldots)\text{%>%}f_2(var_k,\ldots){\leftrightarrow}f_2(f_1(dataset,var_1,var_2,\ldots),var_k,\ldots)\]
arrange(penguins, species, desc(body_mass_g)) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 43.2 19 197 4775 male
## 2 Adelie Biscoe 41 20 203 4725 male
## 3 Adelie Torge… 42.9 17.6 196 4700 male
## 4 Adelie Torge… 39.2 19.6 195 4675 male
## 5 Adelie Dream 39.8 19.1 184 4650 male
## 6 Adelie Dream 39.6 18.8 190 4600 male
## # … with 1 more variable: year <dbl>
arrange(penguins, desc(body_mass_g)) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Gentoo Biscoe 49.2 15.2 221 6300 male
## 2 Gentoo Biscoe 59.6 17 230 6050 male
## 3 Gentoo Biscoe 51.1 16.3 220 6000 male
## 4 Gentoo Biscoe 48.8 16.2 222 6000 male
## 5 Gentoo Biscoe 45.2 16.4 223 5950 male
## 6 Gentoo Biscoe 49.8 15.9 229 5950 male
## # … with 1 more variable: year <dbl>
arrange(penguins, island , bill_length_mm) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 34.5 18.1 187 2900 fema…
## 2 Adelie Biscoe 35 17.9 190 3450 fema…
## 3 Adelie Biscoe 35 17.9 192 3725 fema…
## 4 Adelie Biscoe 35.3 18.9 187 3800 fema…
## 5 Adelie Biscoe 35.5 16.2 195 3350 fema…
## 6 Adelie Biscoe 35.7 16.9 185 3150 fema…
## # … with 1 more variable: year <dbl>
arrange(penguins, sex, year, desc(bill_depth_mm)) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 36.7 19.3 193 3450 fema…
## 2 Adelie Dream 37.6 19.3 181 3300 fema…
## 3 Adelie Biscoe 35.9 19.2 189 3800 fema…
## 4 Adelie Torge… 38.7 19 195 3450 fema…
## 5 Adelie Biscoe 35.3 18.9 187 3800 fema…
## 6 Chinst… Dream 46 18.9 195 4150 fema…
## # … with 1 more variable: year <dbl>
arrange(penguins, island , bill_length_mm) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 34.5 18.1 187 2900 fema…
## 2 Adelie Biscoe 35 17.9 190 3450 fema…
## 3 Adelie Biscoe 35 17.9 192 3725 fema…
## 4 Adelie Biscoe 35.3 18.9 187 3800 fema…
## 5 Adelie Biscoe 35.5 16.2 195 3350 fema…
## 6 Adelie Biscoe 35.7 16.9 185 3150 fema…
## # … with 1 more variable: year <dbl>
penguins %>% arrange(island , bill_length_mm) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 34.5 18.1 187 2900 fema…
## 2 Adelie Biscoe 35 17.9 190 3450 fema…
## 3 Adelie Biscoe 35 17.9 192 3725 fema…
## 4 Adelie Biscoe 35.3 18.9 187 3800 fema…
## 5 Adelie Biscoe 35.5 16.2 195 3350 fema…
## 6 Adelie Biscoe 35.7 16.9 185 3150 fema…
## # … with 1 more variable: year <dbl>
penguins %>% count()
## # A tibble: 1 x 1
## n
## <int>
## 1 344
penguins %>%
count(island, sort = TRUE, name="cantidad")
## # A tibble: 3 x 2
## island cantidad
## <chr> <int>
## 1 Biscoe 168
## 2 Dream 124
## 3 Torgersen 52
Con dplyr
penguins %>% filter(year >= 2008) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 39.6 17.7 186 3500 fema…
## 2 Adelie Biscoe 40.1 18.9 188 4300 male
## 3 Adelie Biscoe 35 17.9 190 3450 fema…
## 4 Adelie Biscoe 42 19.5 200 4050 male
## 5 Adelie Biscoe 34.5 18.1 187 2900 fema…
## 6 Adelie Biscoe 41.4 18.6 191 3700 male
## # … with 1 more variable: year <dbl>
penguins %>% filter(year == 2007 & species== "Adelie") %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
penguins %>% filter(island == "Torgersen", species == "Adelie") %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
penguins %>% filter(year >= 2008 & sex == "male") %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 40.1 18.9 188 4300 male
## 2 Adelie Biscoe 42 19.5 200 4050 male
## 3 Adelie Biscoe 41.4 18.6 191 3700 male
## 4 Adelie Biscoe 40.6 18.8 193 3800 male
## 5 Adelie Biscoe 37.6 19.1 194 3750 male
## 6 Adelie Biscoe 41.3 21.1 195 4400 male
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008)) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008) & species != "Adelie" ) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Gentoo Biscoe 46.1 13.2 211 4500 fema…
## 2 Gentoo Biscoe 50 16.3 230 5700 male
## 3 Gentoo Biscoe 48.7 14.1 210 4450 fema…
## 4 Gentoo Biscoe 50 15.2 218 5700 male
## 5 Gentoo Biscoe 47.6 14.5 215 5400 male
## 6 Gentoo Biscoe 46.5 13.5 210 4550 fema…
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008) & species != "Adelie" ) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Gentoo Biscoe 46.1 13.2 211 4500 fema…
## 2 Gentoo Biscoe 50 16.3 230 5700 male
## 3 Gentoo Biscoe 48.7 14.1 210 4450 fema…
## 4 Gentoo Biscoe 50 15.2 218 5700 male
## 5 Gentoo Biscoe 47.6 14.5 215 5400 male
## 6 Gentoo Biscoe 46.5 13.5 210 4550 fema…
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008), species == 'Adelie') %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
penguins %>% filter(year %in% c(2007,2008) & island !="Torgersen") %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 37.8 18.3 174 3400 fema…
## 2 Adelie Biscoe 37.7 18.7 180 3600 male
## 3 Adelie Biscoe 35.9 19.2 189 3800 fema…
## 4 Adelie Biscoe 38.2 18.1 185 3950 male
## 5 Adelie Biscoe 38.8 17.2 180 3800 male
## 6 Adelie Biscoe 35.3 18.9 187 3800 fema…
## # … with 1 more variable: year <dbl>
penguins %>% filter(year == 2007, island=="Torgersen") %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
Sin dplyr
penguins[penguins$year==2007 | penguins$year==2008,] %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
penguins[penguins$year %in% c(2007,2008),c("species")] %>% head()
## # A tibble: 6 x 1
## species
## <chr>
## 1 Adelie
## 2 Adelie
## 3 Adelie
## 4 Adelie
## 5 Adelie
## 6 Adelie
penguins %>%
distinct(island, species)
## # A tibble: 5 x 2
## species island
## <chr> <chr>
## 1 Adelie Torgersen
## 2 Adelie Biscoe
## 3 Adelie Dream
## 4 Gentoo Biscoe
## 5 Chinstrap Dream
penguins %>% slice(100:109)
## # A tibble: 10 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Dream 43.2 18.5 192 4100
## 2 Adelie Biscoe 35 17.9 192 3725
## 3 Adelie Biscoe 41 20 203 4725
## 4 Adelie Biscoe 37.7 16 183 3075
## 5 Adelie Biscoe 37.8 20 190 4250
## 6 Adelie Biscoe 37.9 18.6 193 2925
## 7 Adelie Biscoe 39.7 18.9 184 3550
## 8 Adelie Biscoe 38.6 17.2 199 3750
## 9 Adelie Biscoe 38.2 20 190 3900
## 10 Adelie Biscoe 38.1 17 181 3175
## # … with 2 more variables: sex <chr>, year <dbl>
penguins %>%
filter(year == 2007, island=="Torgersen") %>%
dplyr::select(island, bill_length_mm, body_mass_g, sex) %>%
head(n=10)
## # A tibble: 10 x 4
## island bill_length_mm body_mass_g sex
## <chr> <dbl> <dbl> <chr>
## 1 Torgersen 39.1 3750 male
## 2 Torgersen 39.5 3800 female
## 3 Torgersen 40.3 3250 female
## 4 Torgersen NA NA <NA>
## 5 Torgersen 36.7 3450 female
## 6 Torgersen 39.3 3650 male
## 7 Torgersen 38.9 3625 female
## 8 Torgersen 39.2 4675 male
## 9 Torgersen 34.1 3475 <NA>
## 10 Torgersen 42 4250 <NA>
penguins %>%
filter(year %in% c(2007,2008) & species != "Adelie") %>%
select(species, island, bill_length_mm, body_mass_g, sex) %>%
head(n=5)
## # A tibble: 5 x 5
## species island bill_length_mm body_mass_g sex
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Gentoo Biscoe 46.1 4500 female
## 2 Gentoo Biscoe 50 5700 male
## 3 Gentoo Biscoe 48.7 4450 female
## 4 Gentoo Biscoe 50 5700 male
## 5 Gentoo Biscoe 47.6 5400 male
penguins %>%
filter(year == 2007, island == "Torgersen" , sex != 'male') %>%
dplyr::select(island, bill_length_mm, body_mass_g, sex) %>%
head(n=10)
## # A tibble: 8 x 4
## island bill_length_mm body_mass_g sex
## <chr> <dbl> <dbl> <chr>
## 1 Torgersen 39.5 3800 female
## 2 Torgersen 40.3 3250 female
## 3 Torgersen 36.7 3450 female
## 4 Torgersen 38.9 3625 female
## 5 Torgersen 41.1 3200 female
## 6 Torgersen 36.6 3700 female
## 7 Torgersen 38.7 3450 female
## 8 Torgersen 34.4 3325 female
penguins %>%
filter(body_mass_g > 3500, island=="Torgersen") %>%
dplyr::select(species,bill_length_mm, body_mass_g, sex, year) %>%
head(10)
## # A tibble: 10 x 5
## species bill_length_mm body_mass_g sex year
## <chr> <dbl> <dbl> <chr> <dbl>
## 1 Adelie 39.1 3750 male 2007
## 2 Adelie 39.5 3800 female 2007
## 3 Adelie 39.3 3650 male 2007
## 4 Adelie 38.9 3625 female 2007
## 5 Adelie 39.2 4675 male 2007
## 6 Adelie 42 4250 <NA> 2007
## 7 Adelie 37.8 3700 <NA> 2007
## 8 Adelie 38.6 3800 male 2007
## 9 Adelie 34.6 4400 male 2007
## 10 Adelie 36.6 3700 female 2007
penguins %>%
filter(year > 2007, species=='Adelie') %>%
dplyr::select(species, body_mass_g , bill_depth_mm)%>%
head(n=10)
## # A tibble: 10 x 3
## species body_mass_g bill_depth_mm
## <chr> <dbl> <dbl>
## 1 Adelie 3500 17.7
## 2 Adelie 4300 18.9
## 3 Adelie 3450 17.9
## 4 Adelie 4050 19.5
## 5 Adelie 2900 18.1
## 6 Adelie 3700 18.6
## 7 Adelie 3550 17.5
## 8 Adelie 3800 18.8
## 9 Adelie 2850 16.6
## 10 Adelie 3750 19.1
penguins %>%
group_by(year) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
## year min Q1 `median (Q2)` mean Q3 `max Q4`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2007 2900 3525 3900 4125. 4600 6300
## 2 2008 2700 3612. 4200 4267. 4838. 6000
## 3 2009 2900 3500 4000 4210. 4850 6000
penguins %>%
group_by(species) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
## species min Q1 `median (Q2)` mean Q3 `max Q4`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie 2850 3350 3700 3701. 4000 4775
## 2 Chinstrap 2700 3488. 3700 3733. 3950 4800
## 3 Gentoo 3950 4700 5000 5076. 5500 6300
penguins %>%
group_by( species , island) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` regrouping output by 'species' (override with `.groups` argument)
## # A tibble: 5 x 8
## # Groups: species [3]
## species island min Q1 `median (Q2)` mean Q3 `max Q4`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Biscoe 2850 3388. 3750 3710. 3975 4775
## 2 Adelie Dream 2900 3388. 3575 3688. 3981. 4650
## 3 Adelie Torgersen 2900 3338. 3700 3706. 4000 4700
## 4 Chinstrap Dream 2700 3488. 3700 3733. 3950 4800
## 5 Gentoo Biscoe 3950 4700 5000 5076. 5500 6300
penguins %>%
group_by(sex, island) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
## # A tibble: 6 x 8
## # Groups: sex [2]
## sex island min Q1 `median (Q2)` mean Q3 `max Q4`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 female Biscoe 2850 3819. 4588. 4319. 4812. 5200
## 2 female Dream 2700 3300 3450 3446. 3650 4150
## 3 female Torgersen 2900 3200 3400 3396. 3606. 3800
## 4 male Biscoe 3550 4738. 5350 5105. 5600 6300
## 5 male Dream 3250 3756. 3950 3987. 4250 4800
## 6 male Torgersen 3325 3788. 4000 4035. 4275 4700
penguins %>%
group_by( species , island) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` regrouping output by 'species' (override with `.groups` argument)
## # A tibble: 5 x 8
## # Groups: species [3]
## species island min Q1 `median (Q2)` mean Q3 `max Q4`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Biscoe 2850 3388. 3750 3710. 3975 4775
## 2 Adelie Dream 2900 3388. 3575 3688. 3981. 4650
## 3 Adelie Torgersen 2900 3338. 3700 3706. 4000 4700
## 4 Chinstrap Dream 2700 3488. 3700 3733. 3950 4800
## 5 Gentoo Biscoe 3950 4700 5000 5076. 5500 6300
penguins %>%
group_by(sex, island) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` regrouping output by 'sex' (override with `.groups` argument)
## # A tibble: 6 x 8
## # Groups: sex [2]
## sex island min Q1 `median (Q2)` mean Q3 `max Q4`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 female Biscoe 2850 3819. 4588. 4319. 4812. 5200
## 2 female Dream 2700 3300 3450 3446. 3650 4150
## 3 female Torgersen 2900 3200 3400 3396. 3606. 3800
## 4 male Biscoe 3550 4738. 5350 5105. 5600 6300
## 5 male Dream 3250 3756. 3950 3987. 4250 4800
## 6 male Torgersen 3325 3788. 4000 4035. 4275 4700
penguins %>%
group_by(sex) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
## sex min Q1 `median (Q2)` mean Q3 `max Q4`
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 female 2700 3350 3650 3862. 4550 5200
## 2 male 3250 3900 4300 4546. 5312. 6300
## 3 <NA> 2975 3475 4100 4006. 4650 4875
penguins %>%
group_by(year) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
"Q3" = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
) %>% head()
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 7
## year min Q1 `median (Q2)` mean Q3 `max Q4`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2007 2900 3525 3900 4125. 4600 6300
## 2 2008 2700 3612. 4200 4267. 4838. 6000
## 3 2009 2900 3500 4000 4210. 4850 6000
penguins %>%
group_by(species) %>%
summarise(last(body_mass_g))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 3 x 2
## species `last(body_mass_g)`
## <chr> <dbl>
## 1 Adelie 4000
## 2 Chinstrap 3775
## 3 Gentoo 5400
penguins %>%
group_by(island, species) %>%
summarise("mean body mass" = mean(body_mass_g), "median body mass" = median(body_mass_g), "cv body mass" = sd(body_mass_g) / mean(body_mass_g))
## `summarise()` regrouping output by 'island' (override with `.groups` argument)
## # A tibble: 5 x 5
## # Groups: island [3]
## island species `mean body mass` `median body mass` `cv body mass`
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Biscoe Adelie 3710. 3750 0.131
## 2 Biscoe Gentoo NA NA NA
## 3 Dream Adelie 3688. 3575 0.123
## 4 Dream Chinstrap 3733. 3700 0.103
## 5 Torgersen Adelie NA NA NA
penguins %>%
mutate(body_mass_Kg = body_mass_g / 1000) %>%
head()
## # A tibble: 6 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 2 more variables: year <dbl>, body_mass_Kg <dbl>
penguins %>%
mutate(bill_length_cm = bill_length_mm / 10) %>%
head()
## # A tibble: 6 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 2 more variables: year <dbl>, bill_length_cm <dbl>
penguins %>%
mutate(proportion = flipper_length_mm / bill_length_mm ) %>%
head()
## # A tibble: 6 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 2 more variables: year <dbl>, proportion <dbl>
penguins %>%
mutate(sexo=ifelse(sex=="male",1,2))
## # A tibble: 344 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181 3750
## 2 Adelie Torge… 39.5 17.4 186 3800
## 3 Adelie Torge… 40.3 18 195 3250
## 4 Adelie Torge… NA NA NA NA
## 5 Adelie Torge… 36.7 19.3 193 3450
## 6 Adelie Torge… 39.3 20.6 190 3650
## 7 Adelie Torge… 38.9 17.8 181 3625
## 8 Adelie Torge… 39.2 19.6 195 4675
## 9 Adelie Torge… 34.1 18.1 193 3475
## 10 Adelie Torge… 42 20.2 190 4250
## # … with 334 more rows, and 3 more variables: sex <chr>, year <dbl>, sexo <dbl>
penguins %>%
mutate(bill_length_cm = ifelse(sex == "male", bill_length_mm / 10, bill_length_mm / 100))
## # A tibble: 344 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181 3750
## 2 Adelie Torge… 39.5 17.4 186 3800
## 3 Adelie Torge… 40.3 18 195 3250
## 4 Adelie Torge… NA NA NA NA
## 5 Adelie Torge… 36.7 19.3 193 3450
## 6 Adelie Torge… 39.3 20.6 190 3650
## 7 Adelie Torge… 38.9 17.8 181 3625
## 8 Adelie Torge… 39.2 19.6 195 4675
## 9 Adelie Torge… 34.1 18.1 193 3475
## 10 Adelie Torge… 42 20.2 190 4250
## # … with 334 more rows, and 3 more variables: sex <chr>, year <dbl>,
## # bill_length_cm <dbl>
penguins %>%
mutate(body_mass_Kg = body_mass_g / 1000) %>%
filter(year==2008, island=="Torgersen") %>%
arrange(desc(body_mass_Kg)) %>%
head()
## # A tibble: 6 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 42.9 17.6 196 4700 male
## 2 Adelie Torge… 41.8 19.4 198 4450 male
## 3 Adelie Torge… 42.8 18.5 195 4250 male
## 4 Adelie Torge… 35.1 19.4 193 4200 male
## 5 Adelie Torge… 45.8 18.9 197 4150 male
## 6 Adelie Torge… 42.1 19.1 195 4000 male
## # … with 2 more variables: year <dbl>, body_mass_Kg <dbl>
penguins %>%
mutate(body_mass_Kg = body_mass_g / 100) %>%
filter(species =='Adelie', island =='Biscoe', year == 2009, sex=="male", body_mass_g <= 5000) %>%
arrange(desc(flipper_length_mm, body_mass_Kg)) %>%
head()
## # A tibble: 6 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Biscoe 41 20 203 4725 male
## 2 Adelie Biscoe 43.2 19 197 4775 male
## 3 Adelie Biscoe 42.2 19.5 197 4275 male
## 4 Adelie Biscoe 42.7 18.3 196 4075 male
## 5 Adelie Biscoe 45.6 20.3 191 4600 male
## 6 Adelie Biscoe 37.8 20 190 4250 male
## # … with 2 more variables: year <dbl>, body_mass_Kg <dbl>
rename(penguins, "flipper length (mm)"=flipper_length_mm, "body mass (g)"=body_mass_g) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm `flipper length… `body mass (g)`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181 3750
## 2 Adelie Torge… 39.5 17.4 186 3800
## 3 Adelie Torge… 40.3 18 195 3250
## 4 Adelie Torge… NA NA NA NA
## 5 Adelie Torge… 36.7 19.3 193 3450
## 6 Adelie Torge… 39.3 20.6 190 3650
## # … with 2 more variables: sex <chr>, year <dbl>
rename(penguins, "Longitud Aleta (mm)"=flipper_length_mm, "Masa Corporal (g)"=body_mass_g, "profundidad pico m" = bill_depth_mm) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm `profundidad pi… `Longitud Aleta…
## <chr> <chr> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181
## 2 Adelie Torge… 39.5 17.4 186
## 3 Adelie Torge… 40.3 18 195
## 4 Adelie Torge… NA NA NA
## 5 Adelie Torge… 36.7 19.3 193
## 6 Adelie Torge… 39.3 20.6 190
## # … with 3 more variables: `Masa Corporal (g)` <dbl>, sex <chr>, year <dbl>
sample_n(penguins, 5)
## # A tibble: 5 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Gentoo Biscoe 48.4 14.4 203 4625 fema…
## 2 Adelie Torge… 39 17.1 191 3050 fema…
## 3 Adelie Biscoe 36.5 16.6 181 2850 fema…
## 4 Chinst… Dream 50.7 19.7 203 4050 male
## 5 Gentoo Biscoe 48.7 15.7 208 5350 male
## # … with 1 more variable: year <dbl>
sample_frac(penguins, 0.2) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Chinst… Dream 45.7 17.3 193 3600 fema…
## 2 Adelie Dream 39.2 18.6 190 4250 male
## 3 Adelie Torge… 34.4 18.4 184 3325 fema…
## 4 Gentoo Biscoe 50 15.3 220 5550 male
## 5 Gentoo Biscoe 43.5 15.2 213 4650 fema…
## 6 Adelie Dream 37.8 18.1 193 3750 male
## # … with 1 more variable: year <dbl>
select(penguins, species, flipper_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
## species flipper_length_mm body_mass_g sex
## <chr> <dbl> <dbl> <chr>
## 1 Adelie 181 3750 male
## 2 Adelie 186 3800 female
## 3 Adelie 195 3250 female
## 4 Adelie NA NA <NA>
## 5 Adelie 193 3450 female
## 6 Adelie 190 3650 male
select(penguins, !flipper_length_mm) %>% head()
## # A tibble: 6 x 7
## species island bill_length_mm bill_depth_mm body_mass_g sex year
## <chr> <chr> <dbl> <dbl> <dbl> <chr> <dbl>
## 1 Adelie Torgersen 39.1 18.7 3750 male 2007
## 2 Adelie Torgersen 39.5 17.4 3800 female 2007
## 3 Adelie Torgersen 40.3 18 3250 female 2007
## 4 Adelie Torgersen NA NA NA <NA> 2007
## 5 Adelie Torgersen 36.7 19.3 3450 female 2007
## 6 Adelie Torgersen 39.3 20.6 3650 male 2007
select(penguins, -species, -flipper_length_mm, -body_mass_g, -sex) %>% head()
## # A tibble: 6 x 4
## island bill_length_mm bill_depth_mm year
## <chr> <dbl> <dbl> <dbl>
## 1 Torgersen 39.1 18.7 2007
## 2 Torgersen 39.5 17.4 2007
## 3 Torgersen 40.3 18 2007
## 4 Torgersen NA NA 2007
## 5 Torgersen 36.7 19.3 2007
## 6 Torgersen 39.3 20.6 2007
penguins %>% select(starts_with("bill")) %>% head()
## # A tibble: 6 x 2
## bill_length_mm bill_depth_mm
## <dbl> <dbl>
## 1 39.1 18.7
## 2 39.5 17.4
## 3 40.3 18
## 4 NA NA
## 5 36.7 19.3
## 6 39.3 20.6
penguins %>% select(ends_with("mm")) %>% head()
## # A tibble: 6 x 3
## bill_length_mm bill_depth_mm flipper_length_mm
## <dbl> <dbl> <dbl>
## 1 39.1 18.7 181
## 2 39.5 17.4 186
## 3 40.3 18 195
## 4 NA NA NA
## 5 36.7 19.3 193
## 6 39.3 20.6 190
penguins %>% select(contains("length")) %>% head()
## # A tibble: 6 x 2
## bill_length_mm flipper_length_mm
## <dbl> <dbl>
## 1 39.1 181
## 2 39.5 186
## 3 40.3 195
## 4 NA NA
## 5 36.7 193
## 6 39.3 190
dplyr::select(penguins %>% filter(year == 2007), species, flipper_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
## species flipper_length_mm body_mass_g sex
## <chr> <dbl> <dbl> <chr>
## 1 Adelie 181 3750 male
## 2 Adelie 186 3800 female
## 3 Adelie 195 3250 female
## 4 Adelie NA NA <NA>
## 5 Adelie 193 3450 female
## 6 Adelie 190 3650 male
dplyr::select(penguins %>% filter(sex == "female"), species, bill_depth_mm ) %>% head()
## # A tibble: 6 x 2
## species bill_depth_mm
## <chr> <dbl>
## 1 Adelie 17.4
## 2 Adelie 18
## 3 Adelie 19.3
## 4 Adelie 17.8
## 5 Adelie 17.6
## 6 Adelie 17.8
dplyr::select(penguins %>% filter(sex != 'male', species != 'Adelie' , body_mass_g >= 5150), sex, species, body_mass_g) %>% head()
## # A tibble: 3 x 3
## sex species body_mass_g
## <chr> <chr> <dbl>
## 1 female Gentoo 5150
## 2 female Gentoo 5200
## 3 female Gentoo 5200
dplyr::select(penguins %>% filter(year == 2008), island, bill_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
## island bill_length_mm body_mass_g sex
## <chr> <dbl> <dbl> <chr>
## 1 Biscoe 39.6 3500 female
## 2 Biscoe 40.1 4300 male
## 3 Biscoe 35 3450 female
## 4 Biscoe 42 4050 male
## 5 Biscoe 34.5 2900 female
## 6 Biscoe 41.4 3700 male
dplyr::select(penguins %>% filter(year == 2007, island == "Torgersen"), species, flipper_length_mm, body_mass_g, sex) %>% head()
## # A tibble: 6 x 4
## species flipper_length_mm body_mass_g sex
## <chr> <dbl> <dbl> <chr>
## 1 Adelie 181 3750 male
## 2 Adelie 186 3800 female
## 3 Adelie 195 3250 female
## 4 Adelie NA NA <NA>
## 5 Adelie 193 3450 female
## 6 Adelie 190 3650 male
penguins %>%
filter(year == 2007) %>%
summarize("min" = min(body_mass_g, na.rm = TRUE),
"Q1" = quantile(body_mass_g, probs = 0.25, na.rm = TRUE),
"median (Q2)" = median(body_mass_g, na.rm = TRUE),
"mean" = mean(body_mass_g, na.rm = TRUE),
Q3 = quantile(body_mass_g, probs = 0.75, na.rm = TRUE),
"max Q4" = max(body_mass_g, na.rm = TRUE)
)
## # A tibble: 1 x 6
## min Q1 `median (Q2)` mean Q3 `max Q4`
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2900 3525 3900 4125. 4600 6300
penguins %>%
transmute(body_mass_kg = body_mass_g /1000)
## # A tibble: 344 x 1
## body_mass_kg
## <dbl>
## 1 3.75
## 2 3.8
## 3 3.25
## 4 NA
## 5 3.45
## 6 3.65
## 7 3.62
## 8 4.68
## 9 3.48
## 10 4.25
## # … with 334 more rows
penguins %>%
transmute(body_mass_kg = body_mass_g /1000, species)
## # A tibble: 344 x 2
## body_mass_kg species
## <dbl> <chr>
## 1 3.75 Adelie
## 2 3.8 Adelie
## 3 3.25 Adelie
## 4 NA Adelie
## 5 3.45 Adelie
## 6 3.65 Adelie
## 7 3.62 Adelie
## 8 4.68 Adelie
## 9 3.48 Adelie
## 10 4.25 Adelie
## # … with 334 more rows
library(forcats)
Reordena un factor por otra variable.
Reordena un factor por la frecuencia de valores.
penguins %>%
count(island, sort = TRUE, name="cantidad") %>%
mutate(island = fct_infreq(island))
## # A tibble: 3 x 2
## island cantidad
## <fct> <int>
## 1 Biscoe 168
## 2 Dream 124
## 3 Torgersen 52
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
penguins %>%
count(island, sort = TRUE, name="ammount") %>%
mutate(island = fct_infreq(island)) %>%
mutate("percent" = percent(ammount / sum(ammount), accuracy = 0.01))
## # A tibble: 3 x 3
## island ammount percent
## <fct> <int> <chr>
## 1 Biscoe 168 48.84%
## 2 Dream 124 36.05%
## 3 Torgersen 52 15.12%
penguins %>%
group_by(species) %>%
mutate(underweight = if_else(body_mass_g > quantile(body_mass_g, 0.75, na.rm = TRUE) + 1.5 * IQR(body_mass_g, na.rm = TRUE), T, F)) %>%
filter(underweight == TRUE)
## # A tibble: 1 x 9
## # Groups: species [1]
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Chinst… Dream 52 20.7 210 4800 male
## # … with 2 more variables: year <dbl>, underweight <lgl>
penguins %>% mutate(index = row_number())
## # A tibble: 344 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181 3750
## 2 Adelie Torge… 39.5 17.4 186 3800
## 3 Adelie Torge… 40.3 18 195 3250
## 4 Adelie Torge… NA NA NA NA
## 5 Adelie Torge… 36.7 19.3 193 3450
## 6 Adelie Torge… 39.3 20.6 190 3650
## 7 Adelie Torge… 38.9 17.8 181 3625
## 8 Adelie Torge… 39.2 19.6 195 4675
## 9 Adelie Torge… 34.1 18.1 193 3475
## 10 Adelie Torge… 42 20.2 190 4250
## # … with 334 more rows, and 3 more variables: sex <chr>, year <dbl>,
## # index <int>
Cambia el orden de un factor a mano.
Colapsar los valores menos / más frecuentes de un factor en “otro”.
antarctica <- map_data("world", region = "Antarctica")
df_penguinloc <-
tibble(
island = c("Dream", "Biscoe", "Torgersen"),
lat_y = c(-64.7333, -65.4333, -64.7666636),
long_x = c(-64.2333, -65.5000, -64.083333)
)
df_penguinloc
## # A tibble: 3 x 3
## island lat_y long_x
## <chr> <dbl> <dbl>
## 1 Dream -64.7 -64.2
## 2 Biscoe -65.4 -65.5
## 3 Torgersen -64.8 -64.1
\[ggplot(dataset)\text{+}geom\left((aes(\cdot)\right)\text{+}options(\cdot)\text{+}facets(\cdot){\leftrightarrow}ggplot\left(dataset,aes(\cdot)\right)\text{+}geom(\cdot)+options(\cdot)\text{+}facets(\cdot)\]
library('ggplot2')
penguins_2007 <- penguins %>% filter(year == 2007)
ggplot(penguins_2007,
aes(x = flipper_length_mm, y = body_mass_g,
color = species,
shape = island)) +
geom_point() +
scale_x_log10() +
labs(
title = "Body mass (grs.) vs Flipper length (mms.) by Species",
subtitle = "year 2007",
caption = " Summary of data, ref 2007",
x = "Longitud de la aleta (mms.)",
y = "Masa corporal (grs.)"
)
## Warning: Removed 1 rows containing missing values (geom_point).
library('ggplot2')
penguins_2007 <- penguins %>% filter(year == 2007)
ggplot(penguins_2007,
aes(x = flipper_length_mm, y = body_mass_g,
color = species,
shape = island)) +
geom_point() +
scale_x_log10() +
facet_wrap(~ species) +
labs(
title = "Body mass (grs.) vs Flipper length (mms.) by Species",
subtitle = "year 2007",
caption = " Summary of data, ref 2007",
x = "Longitud de la aleta (mms.)",
y = "Masa corporal (grs.)"
)
## Warning: Removed 1 rows containing missing values (geom_point).
Un tibble es una versión moderna de un data frame que trabaja de manera perezosa (es decir, que realiza menos operaciones) evitando problemas comunes y supuestos que un data frame puede llegar a asumir; entre otras:
No coercionan automáticamente los caracteres a factores
No crean nombres para las observaciones
No cambian los nombres de columnas que sean nombres no sintácticos
library(tibble)
as_tibble(penguins) %>% head()
## # A tibble: 6 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## 3 Adelie Torge… 40.3 18 195 3250 fema…
## 4 Adelie Torge… NA NA NA NA <NA>
## 5 Adelie Torge… 36.7 19.3 193 3450 fema…
## 6 Adelie Torge… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
tibble(x = penguins$flipper_length_mm, y = penguins$body_mass_g, z = x / y)
## # A tibble: 344 x 3
## x y z
## <dbl> <dbl> <dbl>
## 1 181 3750 0.0483
## 2 186 3800 0.0489
## 3 195 3250 0.06
## 4 NA NA NA
## 5 193 3450 0.0559
## 6 190 3650 0.0521
## 7 181 3625 0.0499
## 8 195 4675 0.0417
## 9 193 3475 0.0555
## 10 190 4250 0.0447
## # … with 334 more rows
tribble(
~x, ~y, ~z,
"a", 2, 3.6,
"b", 1, 8.5
)
## # A tibble: 2 x 3
## x y z
## <chr> <dbl> <dbl>
## 1 a 2 3.6
## 2 b 1 8.5
bind_rows(penguins[1,],penguins[2,])
## # A tibble: 2 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie Torge… 39.1 18.7 181 3750 male
## 2 Adelie Torge… 39.5 17.4 186 3800 fema…
## # … with 1 more variable: year <dbl>
bind_cols(penguins[,1],penguins[,2]) %>% head()
## # A tibble: 6 x 2
## species island
## <chr> <chr>
## 1 Adelie Torgersen
## 2 Adelie Torgersen
## 3 Adelie Torgersen
## 4 Adelie Torgersen
## 5 Adelie Torgersen
## 6 Adelie Torgersen
penguins_spread <- spread(data = penguins, key = island, value = body_mass_g)
head(penguins_spread)
## # A tibble: 6 x 9
## species bill_length_mm bill_depth_mm flipper_length_… sex year Biscoe Dream
## <chr> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 Adelie 32.1 15.5 188 fema… 2009 NA 3050
## 2 Adelie 33.1 16.1 178 fema… 2008 NA 2900
## 3 Adelie 33.5 19 190 fema… 2008 NA NA
## 4 Adelie 34 17.1 185 fema… 2008 NA 3400
## 5 Adelie 34.1 18.1 193 <NA> 2007 NA NA
## 6 Adelie 34.4 18.4 184 fema… 2007 NA NA
## # … with 1 more variable: Torgersen <dbl>
gather(data = penguins_spread, key = "island", value = "body_mass_g", 7:9) %>%
filter(is.na(body_mass_g)==FALSE)
## # A tibble: 342 x 8
## species bill_length_mm bill_depth_mm flipper_length_… sex year island
## <chr> <dbl> <dbl> <dbl> <chr> <dbl> <chr>
## 1 Adelie 34.5 18.1 187 fema… 2008 Biscoe
## 2 Adelie 35 17.9 190 fema… 2008 Biscoe
## 3 Adelie 35 17.9 192 fema… 2009 Biscoe
## 4 Adelie 35.3 18.9 187 fema… 2007 Biscoe
## 5 Adelie 35.5 16.2 195 fema… 2008 Biscoe
## 6 Adelie 35.7 16.9 185 fema… 2008 Biscoe
## 7 Adelie 35.9 19.2 189 fema… 2007 Biscoe
## 8 Adelie 36.4 17.1 184 fema… 2008 Biscoe
## 9 Adelie 36.5 16.6 181 fema… 2008 Biscoe
## 10 Adelie 37.6 17 185 fema… 2008 Biscoe
## # … with 332 more rows, and 1 more variable: body_mass_g <dbl>
penguins_union <- unite(data = penguins,
col = species_island,
sep = "/",
species,island
)
head(penguins_union)
## # A tibble: 6 x 7
## species_island bill_length_mm bill_depth_mm flipper_length_… body_mass_g sex
## <chr> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Adelie/Torger… 39.1 18.7 181 3750 male
## 2 Adelie/Torger… 39.5 17.4 186 3800 fema…
## 3 Adelie/Torger… 40.3 18 195 3250 fema…
## 4 Adelie/Torger… NA NA NA NA <NA>
## 5 Adelie/Torger… 36.7 19.3 193 3450 fema…
## 6 Adelie/Torger… 39.3 20.6 190 3650 male
## # … with 1 more variable: year <dbl>
separate(data = penguins_union,
col = species_island,
into = c("species", "island"),
sep = "/")
## # A tibble: 344 x 8
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181 3750
## 2 Adelie Torge… 39.5 17.4 186 3800
## 3 Adelie Torge… 40.3 18 195 3250
## 4 Adelie Torge… NA NA NA NA
## 5 Adelie Torge… 36.7 19.3 193 3450
## 6 Adelie Torge… 39.3 20.6 190 3650
## 7 Adelie Torge… 38.9 17.8 181 3625
## 8 Adelie Torge… 39.2 19.6 195 4675
## 9 Adelie Torge… 34.1 18.1 193 3475
## 10 Adelie Torge… 42 20.2 190 4250
## # … with 334 more rows, and 2 more variables: sex <chr>, year <dbl>
tidyr::expand_grid(penguins, z = 1:3)
## # A tibble: 1,032 x 9
## species island bill_length_mm bill_depth_mm flipper_length_… body_mass_g
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Adelie Torge… 39.1 18.7 181 3750
## 2 Adelie Torge… 39.1 18.7 181 3750
## 3 Adelie Torge… 39.1 18.7 181 3750
## 4 Adelie Torge… 39.5 17.4 186 3800
## 5 Adelie Torge… 39.5 17.4 186 3800
## 6 Adelie Torge… 39.5 17.4 186 3800
## 7 Adelie Torge… 40.3 18 195 3250
## 8 Adelie Torge… 40.3 18 195 3250
## 9 Adelie Torge… 40.3 18 195 3250
## 10 Adelie Torge… NA NA NA NA
## # … with 1,022 more rows, and 3 more variables: sex <chr>, year <dbl>, z <int>
species <- data_frame("specie" = c("Adelie", "Chinstrap", "Gentoo"))
## Warning: `data_frame()` is deprecated as of tibble 1.1.0.
## Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
sexes <- data_frame("sex" = c("male", "female"))
crossing(species, sexes)
## # A tibble: 6 x 2
## specie sex
## <chr> <chr>
## 1 Adelie female
## 2 Adelie male
## 3 Chinstrap female
## 4 Chinstrap male
## 5 Gentoo female
## 6 Gentoo male
crossing(islands = penguins$island, species = penguins$species)
## # A tibble: 9 x 2
## islands species
## <chr> <chr>
## 1 Biscoe Adelie
## 2 Biscoe Chinstrap
## 3 Biscoe Gentoo
## 4 Dream Adelie
## 5 Dream Chinstrap
## 6 Dream Gentoo
## 7 Torgersen Adelie
## 8 Torgersen Chinstrap
## 9 Torgersen Gentoo
tidyr::nesting(islands = penguins$island, species = penguins$species)
## # A tibble: 5 x 2
## islands species
## <chr> <chr>
## 1 Biscoe Adelie
## 2 Biscoe Gentoo
## 3 Dream Adelie
## 4 Dream Chinstrap
## 5 Torgersen Adelie
penguins %>% tidyr::expand(body_mass_g)
## # A tibble: 95 x 1
## body_mass_g
## <dbl>
## 1 2700
## 2 2850
## 3 2900
## 4 2925
## 5 2975
## 6 3000
## 7 3050
## 8 3075
## 9 3100
## 10 3150
## # … with 85 more rows
df_penguinloc <- penguins %>%
group_by(island) %>%
summarise(amount = n(), .groups = 'drop') %>%
left_join(df_penguinloc, by = "island")
df_penguinloc
## # A tibble: 3 x 4
## island amount lat_y long_x
## <chr> <int> <dbl> <dbl>
## 1 Biscoe 168 -65.4 -65.5
## 2 Dream 124 -64.7 -64.2
## 3 Torgersen 52 -64.8 -64.1
d <- penguins %>% filter(year==2009) %>%
mutate(island = factor(island),
island = factor(island, levels = rev(levels(island)))) %>%
ggplot() +
stat_count(aes(island, fill = species), alpha = 0.8) +
annotate("text", y=3, x= "Torgersen", label= "Torgersen", color = "#1874CD")+
annotate("text", y=3, x= "Dream", label= "Dream", color = "#c02728")+
annotate("text", y=3, x= "Biscoe", label= "Biscoe", color = "#53868B")+
scale_fill_manual(values = c("#66c2a5","#fc8d62","#8da0cb")) +
scale_y_reverse()+
labs(caption = "Source: Gorman, Williams and Fraser, 2014") +
theme_minimal() +
theme(legend.position = c(0.2,0.3),
axis.title = element_blank(),
axis.text.x = element_blank(),
panel.grid = element_blank(),
plot.background = element_rect(fill="#f9f9f9", color = "#f9f9f9"))
d
p <- ggplot(antarctica, aes(long, lat, group = group)) +
geom_polygon(fill = "#506B8E", alpha = .8) +
coord_map("ortho", orientation = c(-90, 0, 0),
xlim = c(-62, -55),
ylim = c(-75, -60)) +
geom_text_repel(df_penguinloc, mapping=aes(long_x, lat_y, label = island),
group=1, color = c("#53868B", "#c02728", "#1874CD"),
box.padding = 0.5,
nudge_y = 1, nudge_x = -2, min.segment.length = 0) +
geom_point(df_penguinloc, mapping=aes(long_x, lat_y,
group = 1,
colour = island),
alpha =.7)+
scale_color_manual(values = c("#53868B", "#c02728", "#1874CD"))+
labs(title = "Penguins in Palmer Archipelago",
subtitle = "Recorded penguins in 2009 and their nesting Islands") +
theme_map() +
theme(legend.position = "none",
plot.title = element_text(hjust = 0.5, size = 20),
plot.subtitle = element_text(hjust = 0.5),
plot.background = element_rect(fill="#f9f9f9", color = "#f9f9f9"))
p
inset <- ggplot(antarctica, aes(long, lat, group = group)) +
geom_polygon(fill = "#506B8E", alpha = .5) +
coord_map("ortho", orientation = c(-90, 0, 0)) +
geom_point(df_penguinloc, mapping=aes(long_x, lat_y,
group = island,
colour = island),
alpha =.5, size = 1)+
annotate("rect", color="black", fill = "transparent",
xmin = -68, xmax = -54,
ymin = -75, ymax = -60)+
labs(title = "Antarctica") +
theme_map() +
theme(legend.position = "none",
panel.grid.major.y = element_line(colour="grey"),
plot.title = element_text(hjust = 0.5),
plot.background = element_rect(fill="#f9f9f9", color = "#f9f9f9"))
inset
a <- ggdraw(p) +
draw_plot(inset, x=.47, y=.38, width=.5, height=.4)
a
p1 <- plot_grid(a,d, ncol = 1, rel_widths = c(4, 2), rel_heights = c(2,1))+
theme(plot.background = element_rect(fill="#f9f9f9")) +
labs(title = "Penguins in Palmer Archipelago")
penguin <-here("/penguin.jfif")
p2 <- ggdraw() +
draw_plot(p1) +
draw_image(penguin, x = 0.24, y = 0.38, hjust = 1, width = 0.20, height = 0.25)
p2