LIbrería tidyverse

Carga de paquetes

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(visdat)
library(cowplot)

## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp

library(ggrepel)
library(mapproj)

## Loading required package: maps
## 
## Attaching package: 'maps'
## 
## The following object is masked from 'package:purrr':
## 
##     map

library(ggthemes)

## 
## Attaching package: 'ggthemes'
## 
## The following object is masked from 'package:cowplot':
## 
##     theme_map

library(here)

## here() starts at /cloud/project

library(extrafont)

## Registering fonts with R

library(knitr)
library(magick)

## Linking to ImageMagick 6.9.10.23
## Enabled features: fontconfig, freetype, fftw, lcms, pango, webp, x11
## Disabled features: cairo, ghostscript, heic, raw, rsvg
## Using 16 threads

Importar la base de datos

Lectura de un txt

penguins <- read.delim2("/cloud/project/penguins.txt")

Lectura de un xlsx

library(readxl)
penguins <- read_excel(path="/cloud/project/penguins.xlsx",
                       col_types = c("text",
                                     "text",
                                     "numeric",
                                     "numeric",
                                     "numeric",
                                     "numeric",
                                     "text",
                                     "text"),
                       na="NA")

Lectura desde la web

penguins <- readr::read_csv(file='https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv',
                            na=c("","NA"),
                            show_col_types = FALSE)

Visualizar los datos

Formato de tabla

penguins %>% 
  tail(n=7) %>% 
  kable(caption="Penguins data. Source: Gorman, Williams and Fraser, 2014",
        digits=3,
        format.args=list(big.mark=",",
                         scientific=FALSE))

Penguins data. Source: Gorman, Williams and Fraser, 2014
species	island	bill_length_mm	bill_depth_mm	flipper_length_mm	body_mass_g	sex	year
Chinstrap	Dream	46.8	16.5	189	3,650	female	2,009
Chinstrap	Dream	45.7	17.0	195	3,650	female	2,009
Chinstrap	Dream	55.8	19.8	207	4,000	male	2,009
Chinstrap	Dream	43.5	18.1	202	3,400	female	2,009
Chinstrap	Dream	49.6	18.2	193	3,775	male	2,009
Chinstrap	Dream	50.8	19.0	210	4,100	male	2,009
Chinstrap	Dream	50.2	18.7	198	3,775	female	2,009

Estructura

penguins %>% 
  str()

## spc_tbl_ [344 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ species          : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
##  $ island           : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : num [1:344] 3750 3800 3250 NA 3450 ...
##  $ sex              : chr [1:344] "male" "female" "female" NA ...
##  $ year             : num [1:344] 2007 2007 2007 2007 2007 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   species = col_character(),
##   ..   island = col_character(),
##   ..   bill_length_mm = col_double(),
##   ..   bill_depth_mm = col_double(),
##   ..   flipper_length_mm = col_double(),
##   ..   body_mass_g = col_double(),
##   ..   sex = col_character(),
##   ..   year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

Agrupamiento

penguins %>% 
  dplyr::group_by(year) %>% 
  str()

## gropd_df [344 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
##  $ species          : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
##  $ island           : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
##  $ bill_length_mm   : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
##  $ bill_depth_mm    : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
##  $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
##  $ body_mass_g      : num [1:344] 3750 3800 3250 NA 3450 ...
##  $ sex              : chr [1:344] "male" "female" "female" NA ...
##  $ year             : num [1:344] 2007 2007 2007 2007 2007 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   species = col_character(),
##   ..   island = col_character(),
##   ..   bill_length_mm = col_double(),
##   ..   bill_depth_mm = col_double(),
##   ..   flipper_length_mm = col_double(),
##   ..   body_mass_g = col_double(),
##   ..   sex = col_character(),
##   ..   year = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr> 
##  - attr(*, "groups")= tibble [3 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ year : num [1:3] 2007 2008 2009
##   ..$ .rows: list<int> [1:3] 
##   .. ..$ : int [1:110] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..$ : int [1:114] 51 52 53 54 55 56 57 58 59 60 ...
##   .. ..$ : int [1:120] 101 102 103 104 105 106 107 108 109 110 ...
##   .. ..@ ptype: int(0) 
##   ..- attr(*, ".drop")= logi TRUE

Ordenar

penguins %>% 
  arrange(bill_length_mm) %>% 
  head()

penguins %>% 
  arrange(desc(x=bill_length_mm)) %>% 
  tail()

Conteo

penguins %>% 
  count()

penguins %>% 
  count(island)

penguins %>% 
  count(island, species)

filtrar

penguins %>% 
  filter(bill_length_mm>55.8)

penguins %>% 
  filter(bill_length_mm>=55.8)

penguins %>% 
  filter(species=="Gentoo") %>% 
  head()

penguins %>% 
  filter(species!="Gentoo") %>% 
  head

Categorias

penguins %>% 
  distinct(island)

penguins %>% 
  distinct(island, species)

penguins %>% 
  distinct(island, species, year)

Deslizar

penguins %>% 
  slice(200:205)

Estadísticas o resúmen de datos

penguins %>% 
  group_by(species) %>% 
  summarize("min"=min(bill_length_mm,
                      na.rm=TRUE),
            "Q1"=quantile(bill_length_mm,
                          probs=0.25,
                          na.rm=TRUE),
            "median (Q2)"=median(bill_depth_mm,
                                 na.rm=TRUE),
            "mean"=mean(bill_length_mm,
                        na.rm=TRUE),
            "Q3"=quantile(bill_length_mm,
                          probs=0.75,
                          na.rm=TRUE),
            "max"=max(bill_length_mm,
                      na.rm=TRUE))

penguins %>% 
  group_by(island, species) %>% 
  summarize("min"=min(bill_length_mm,
                      na.rm=TRUE),
            "Q1"=quantile(bill_length_mm,
                          probs=0.25,
                          na.rm=TRUE),
            "median (Q2)"=median(bill_depth_mm,
                                 na.rm=TRUE),
            "mean"=mean(bill_length_mm,
                        na.rm=TRUE),
            "Q3"=quantile(bill_length_mm,
                          probs=0.75,
                          na.rm=TRUE),
            "max"=max(bill_length_mm,
                      na.rm=TRUE))

## `summarise()` has grouped output by 'island'. You can override using the
## `.groups` argument.

Modificar

penguins %>% 
  mutate(bill_length_cm = bill_length_mm / 10) %>% 
  tail()

Renombrar

penguins %>% 
  rename("flipper length (mm)"=flipper_length_mm) %>% 
  head()

Tarea

Seleccionar (select)

LIbrería tidyverse

M Sc. Mario Gregorio Saavedra Rodríguez

2024-06-21

Carga de paquetes

Importar la base de datos

Visualizar los datos

Tarea