Carga de paquetes

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(visdat)
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
library(ggrepel)
library(mapproj)
## Loading required package: maps
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map
library(ggthemes) 
## 
## Attaching package: 'ggthemes'
## 
## The following object is masked from 'package:cowplot':
## 
##     theme_map
library(here)
## here() starts at /cloud/project
library(extrafont)
## Registering fonts with R
library(knitr)
library(magick)
## Linking to ImageMagick 6.9.10.23
## Enabled features: fontconfig, freetype, fftw, lcms, pango, webp, x11
## Disabled features: cairo, ghostscript, heic, raw, rsvg
## Using 16 threads

Importar la base de datos

penguins <- read.delim2("/cloud/project/penguins.txt")
library(readxl)
penguins <- read_excel(path="/cloud/project/penguins.xlsx",
                       col_types = c("text",
                                     "text",
                                     "numeric",
                                     "numeric",
                                     "numeric",
                                     "numeric",
                                     "text",
                                     "text"),
                       na="NA")
penguins <- readr::read_csv(file='https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv',
                            na=c("","NA"),
                            show_col_types = FALSE)

Visualizar los datos

penguins %>% 
  tail(n=7) %>% 
  kable(caption="Penguins data. Source: Gorman, Williams and Fraser, 2014",
        digits=3,
        format.args=list(big.mark=",",
                         scientific=FALSE))
Penguins data. Source: Gorman, Williams and Fraser, 2014
species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex year
Chinstrap Dream 46.8 16.5 189 3,650 female 2,009
Chinstrap Dream 45.7 17.0 195 3,650 female 2,009
Chinstrap Dream 55.8 19.8 207 4,000 male 2,009
Chinstrap Dream 43.5 18.1 202 3,400 female 2,009
Chinstrap Dream 49.6 18.2 193 3,775 male 2,009
Chinstrap Dream 50.8 19.0 210 4,100 male 2,009
Chinstrap Dream 50.2 18.7 198 3,775 female 2,009
penguins %>% 
  str()
## spc_tbl_ [344 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ species          : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
##  $ island           : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : num [1:344] 3750 3800 3250 NA 3450 ...
##  $ sex              : chr [1:344] "male" "female" "female" NA ...
##  $ year             : num [1:344] 2007 2007 2007 2007 2007 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   species = col_character(),
##   ..   island = col_character(),
##   ..   bill_length_mm = col_double(),
##   ..   bill_depth_mm = col_double(),
##   ..   flipper_length_mm = col_double(),
##   ..   body_mass_g = col_double(),
##   ..   sex = col_character(),
##   ..   year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
penguins %>% 
  dplyr::group_by(year) %>% 
  str()
## gropd_df [344 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ species          : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
##  $ island           : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : num [1:344] 3750 3800 3250 NA 3450 ...
##  $ sex              : chr [1:344] "male" "female" "female" NA ...
##  $ year             : num [1:344] 2007 2007 2007 2007 2007 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   species = col_character(),
##   ..   island = col_character(),
##   ..   bill_length_mm = col_double(),
##   ..   bill_depth_mm = col_double(),
##   ..   flipper_length_mm = col_double(),
##   ..   body_mass_g = col_double(),
##   ..   sex = col_character(),
##   ..   year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr> 
##  - attr(*, "groups")= tibble [3 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ year : num [1:3] 2007 2008 2009
##   ..$ .rows: list<int> [1:3] 
##   .. ..$ : int [1:110] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:114] 51 52 53 54 55 56 57 58 59 60 ...
##   .. ..$ : int [1:120] 101 102 103 104 105 106 107 108 109 110 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE
penguins %>% 
  arrange(bill_length_mm) %>% 
  head()
penguins %>% 
  arrange(desc(x=bill_length_mm)) %>% 
  tail()
penguins %>% 
  count()
penguins %>% 
  count(island)
penguins %>% 
  count(island, species)
penguins %>% 
  filter(bill_length_mm>55.8)
penguins %>% 
  filter(bill_length_mm>=55.8)
penguins %>% 
  filter(species=="Gentoo") %>% 
  head()
penguins %>% 
  filter(species!="Gentoo") %>% 
  head
penguins %>% 
  distinct(island)
penguins %>% 
  distinct(island, species)
penguins %>% 
  distinct(island, species, year)
penguins %>% 
  slice(200:205)
penguins %>% 
  group_by(species) %>% 
  summarize("min"=min(bill_length_mm,
                      na.rm=TRUE),
            "Q1"=quantile(bill_length_mm,
                          probs=0.25,
                          na.rm=TRUE),
            "median (Q2)"=median(bill_depth_mm,
                                 na.rm=TRUE),
            "mean"=mean(bill_length_mm,
                        na.rm=TRUE),
            "Q3"=quantile(bill_length_mm,
                          probs=0.75,
                          na.rm=TRUE),
            "max"=max(bill_length_mm,
                      na.rm=TRUE))
penguins %>% 
  group_by(island, species) %>% 
  summarize("min"=min(bill_length_mm,
                      na.rm=TRUE),
            "Q1"=quantile(bill_length_mm,
                          probs=0.25,
                          na.rm=TRUE),
            "median (Q2)"=median(bill_depth_mm,
                                 na.rm=TRUE),
            "mean"=mean(bill_length_mm,
                        na.rm=TRUE),
            "Q3"=quantile(bill_length_mm,
                          probs=0.75,
                          na.rm=TRUE),
            "max"=max(bill_length_mm,
                      na.rm=TRUE))
## `summarise()` has grouped output by 'island'. You can override using the
## `.groups` argument.
penguins %>% 
  mutate(bill_length_cm = bill_length_mm / 10) %>% 
  tail()
penguins %>% 
  rename("flipper length (mm)"=flipper_length_mm) %>% 
  head()

Tarea