Carga de paquetes
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(visdat)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
library(ggrepel)
library(mapproj)
## Loading required package: maps
##
## Attaching package: 'maps'
##
## The following object is masked from 'package:purrr':
##
## map
library(ggthemes)
##
## Attaching package: 'ggthemes'
##
## The following object is masked from 'package:cowplot':
##
## theme_map
library(here)
## here() starts at /cloud/project
library(extrafont)
## Registering fonts with R
library(knitr)
library(magick)
## Linking to ImageMagick 6.9.10.23
## Enabled features: fontconfig, freetype, fftw, lcms, pango, webp, x11
## Disabled features: cairo, ghostscript, heic, raw, rsvg
## Using 16 threads
Importar la base de datos
penguins <- read.delim2("/cloud/project/penguins.txt")
library(readxl)
penguins <- read_excel(path="/cloud/project/penguins.xlsx",
col_types = c("text",
"text",
"numeric",
"numeric",
"numeric",
"numeric",
"text",
"text"),
na="NA")
penguins <- readr::read_csv(file='https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-28/penguins.csv',
na=c("","NA"),
show_col_types = FALSE)
Visualizar los datos
penguins %>%
tail(n=7) %>%
kable(caption="Penguins data. Source: Gorman, Williams and Fraser, 2014",
digits=3,
format.args=list(big.mark=",",
scientific=FALSE))
Penguins data. Source: Gorman, Williams and Fraser,
2014
Chinstrap |
Dream |
46.8 |
16.5 |
189 |
3,650 |
female |
2,009 |
Chinstrap |
Dream |
45.7 |
17.0 |
195 |
3,650 |
female |
2,009 |
Chinstrap |
Dream |
55.8 |
19.8 |
207 |
4,000 |
male |
2,009 |
Chinstrap |
Dream |
43.5 |
18.1 |
202 |
3,400 |
female |
2,009 |
Chinstrap |
Dream |
49.6 |
18.2 |
193 |
3,775 |
male |
2,009 |
Chinstrap |
Dream |
50.8 |
19.0 |
210 |
4,100 |
male |
2,009 |
Chinstrap |
Dream |
50.2 |
18.7 |
198 |
3,775 |
female |
2,009 |
penguins %>%
str()
## spc_tbl_ [344 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ species : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
## $ island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : num [1:344] 3750 3800 3250 NA 3450 ...
## $ sex : chr [1:344] "male" "female" "female" NA ...
## $ year : num [1:344] 2007 2007 2007 2007 2007 ...
## - attr(*, "spec")=
## .. cols(
## .. species = col_character(),
## .. island = col_character(),
## .. bill_length_mm = col_double(),
## .. bill_depth_mm = col_double(),
## .. flipper_length_mm = col_double(),
## .. body_mass_g = col_double(),
## .. sex = col_character(),
## .. year = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
penguins %>%
dplyr::group_by(year) %>%
str()
## gropd_df [344 × 8] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ species : chr [1:344] "Adelie" "Adelie" "Adelie" "Adelie" ...
## $ island : chr [1:344] "Torgersen" "Torgersen" "Torgersen" "Torgersen" ...
## $ bill_length_mm : num [1:344] 39.1 39.5 40.3 NA 36.7 39.3 38.9 39.2 34.1 42 ...
## $ bill_depth_mm : num [1:344] 18.7 17.4 18 NA 19.3 20.6 17.8 19.6 18.1 20.2 ...
## $ flipper_length_mm: num [1:344] 181 186 195 NA 193 190 181 195 193 190 ...
## $ body_mass_g : num [1:344] 3750 3800 3250 NA 3450 ...
## $ sex : chr [1:344] "male" "female" "female" NA ...
## $ year : num [1:344] 2007 2007 2007 2007 2007 ...
## - attr(*, "spec")=
## .. cols(
## .. species = col_character(),
## .. island = col_character(),
## .. bill_length_mm = col_double(),
## .. bill_depth_mm = col_double(),
## .. flipper_length_mm = col_double(),
## .. body_mass_g = col_double(),
## .. sex = col_character(),
## .. year = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
## - attr(*, "groups")= tibble [3 × 2] (S3: tbl_df/tbl/data.frame)
## ..$ year : num [1:3] 2007 2008 2009
## ..$ .rows: list<int> [1:3]
## .. ..$ : int [1:110] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..$ : int [1:114] 51 52 53 54 55 56 57 58 59 60 ...
## .. ..$ : int [1:120] 101 102 103 104 105 106 107 108 109 110 ...
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
penguins %>%
arrange(bill_length_mm) %>%
head()
penguins %>%
arrange(desc(x=bill_length_mm)) %>%
tail()
penguins %>%
count()
penguins %>%
count(island)
penguins %>%
count(island, species)
penguins %>%
filter(bill_length_mm>55.8)
penguins %>%
filter(bill_length_mm>=55.8)
penguins %>%
filter(species=="Gentoo") %>%
head()
penguins %>%
filter(species!="Gentoo") %>%
head
penguins %>%
distinct(island)
penguins %>%
distinct(island, species)
penguins %>%
distinct(island, species, year)
penguins %>%
slice(200:205)
- Estadísticas o resúmen de datos
penguins %>%
group_by(species) %>%
summarize("min"=min(bill_length_mm,
na.rm=TRUE),
"Q1"=quantile(bill_length_mm,
probs=0.25,
na.rm=TRUE),
"median (Q2)"=median(bill_depth_mm,
na.rm=TRUE),
"mean"=mean(bill_length_mm,
na.rm=TRUE),
"Q3"=quantile(bill_length_mm,
probs=0.75,
na.rm=TRUE),
"max"=max(bill_length_mm,
na.rm=TRUE))
penguins %>%
group_by(island, species) %>%
summarize("min"=min(bill_length_mm,
na.rm=TRUE),
"Q1"=quantile(bill_length_mm,
probs=0.25,
na.rm=TRUE),
"median (Q2)"=median(bill_depth_mm,
na.rm=TRUE),
"mean"=mean(bill_length_mm,
na.rm=TRUE),
"Q3"=quantile(bill_length_mm,
probs=0.75,
na.rm=TRUE),
"max"=max(bill_length_mm,
na.rm=TRUE))
## `summarise()` has grouped output by 'island'. You can override using the
## `.groups` argument.
penguins %>%
mutate(bill_length_cm = bill_length_mm / 10) %>%
tail()
penguins %>%
rename("flipper length (mm)"=flipper_length_mm) %>%
head()