library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.4
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.1.1 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 1.4.0
## Warning: package 'ggplot2' was built under R version 4.0.5
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.4
## Warning: package 'readr' was built under R version 4.0.4
## Warning: package 'purrr' was built under R version 4.0.4
## Warning: package 'stringr' was built under R version 4.0.4
## Warning: package 'forcats' was built under R version 4.0.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
## Warning: package 'readxl' was built under R version 4.0.4
gapminder <- read_xlsx("data/gapminder.xlsx")
gapminder
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
#función filter()
filter(gapminder, country == "Peru")
## # A tibble: 12 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Peru Americas 1952 43.9 8025700 3759.
## 2 Peru Americas 1957 46.3 9146100 4245.
## 3 Peru Americas 1962 49.1 10516500 4957.
## 4 Peru Americas 1967 51.4 12132200 5788.
## 5 Peru Americas 1972 55.4 13954700 5938.
## 6 Peru Americas 1977 58.4 15990099 6281.
## 7 Peru Americas 1982 61.4 18125129 6435.
## 8 Peru Americas 1987 64.1 20195924 6361.
## 9 Peru Americas 1992 66.5 22430449 4446.
## 10 Peru Americas 1997 68.4 24748122 5838.
## 11 Peru Americas 2002 69.9 26769436 5909.
## 12 Peru Americas 2007 71.4 28674757 7409.
filter(gapminder, country == "Peru", year >= 1992)
## # A tibble: 4 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Peru Americas 1992 66.5 22430449 4446.
## 2 Peru Americas 1997 68.4 24748122 5838.
## 3 Peru Americas 2002 69.9 26769436 5909.
## 4 Peru Americas 2007 71.4 28674757 7409.
#obtener la data a partir de 1992 para Perú y Chile
filter(gapminder,
country == "Peru" | country == "Chile",
year >= 1992)
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 1992 74.1 13572994 7596.
## 2 Chile Americas 1997 75.8 14599929 10118.
## 3 Chile Americas 2002 77.9 15497046 10779.
## 4 Chile Americas 2007 78.6 16284741 13172.
## 5 Peru Americas 1992 66.5 22430449 4446.
## 6 Peru Americas 1997 68.4 24748122 5838.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
filter(gapminder,
country %in% c("Peru", "Chile", "Colombia", "Mexico"),
year >= 2002)
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Mexico Americas 2002 74.9 102479927 10742.
## 6 Mexico Americas 2007 76.2 108700891 11978.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
Usando gapminder, filtra los datos de paises miembros permanentes del consejo de seguridad de la ONU de entre 1957 y 2007
filter(gapminder,
country %in% c("United Startes", "United Kingdom", "France", "China"),
year %in% 1957:2007)
## # A tibble: 33 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 China Asia 1957 50.5 637408000 576.
## 2 China Asia 1962 44.5 665770000 488.
## 3 China Asia 1967 58.4 754550000 613.
## 4 China Asia 1972 63.1 862030000 677.
## 5 China Asia 1977 64.0 943455000 741.
## 6 China Asia 1982 65.5 1000281000 962.
## 7 China Asia 1987 67.3 1084035000 1379.
## 8 China Asia 1992 68.7 1164970000 1656.
## 9 China Asia 1997 70.4 1230075000 2289.
## 10 China Asia 2002 72.0 1280400000 3119.
## # ... with 23 more rows
Usando gapminder, filtra los datos de paises de Africa y Asia para el 1997
filter (
gapminder,
continent %in% c("Africa", "Asia"),
year == 1997
)
## # A tibble: 85 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1997 41.8 22227415 635.
## 2 Algeria Africa 1997 69.2 29072015 4797.
## 3 Angola Africa 1997 41.0 9875024 2277.
## 4 Bahrain Asia 1997 73.9 598561 20292.
## 5 Bangladesh Asia 1997 59.4 123315288 973.
## 6 Benin Africa 1997 54.8 6066080 1233.
## 7 Botswana Africa 1997 52.6 1536536 8647.
## 8 Burkina Faso Africa 1997 50.3 10352843 946.
## 9 Burundi Africa 1997 45.3 6121610 463.
## 10 Cambodia Asia 1997 56.5 11782962 734.
## # ... with 75 more rows
usando gapminder_subset
gapminder_subset <- filter(gapminder,
country %in% c("Peru", "Chile", "Colombia", "Ecuador"),
year >= 2002)
gapminder_subset
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773.
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
select (gapminder_subset, country, year, pop)
## # A tibble: 8 x 3
## country year pop
## <chr> <dbl> <dbl>
## 1 Chile 2002 15497046
## 2 Chile 2007 16284741
## 3 Colombia 2002 41008227
## 4 Colombia 2007 44227550
## 5 Ecuador 2002 12921234
## 6 Ecuador 2007 13755680
## 7 Peru 2002 26769436
## 8 Peru 2007 28674757
select(gapminder_subset,
pais = country,
año =year,
poblacion = pop)
## # A tibble: 8 x 3
## pais año poblacion
## <chr> <dbl> <dbl>
## 1 Chile 2002 15497046
## 2 Chile 2007 16284741
## 3 Colombia 2002 41008227
## 4 Colombia 2007 44227550
## 5 Ecuador 2002 12921234
## 6 Ecuador 2007 13755680
## 7 Peru 2002 26769436
## 8 Peru 2007 28674757
select(gapminder_subset, -continent)
## # A tibble: 8 x 5
## country year lifeExp pop gdpPercap
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile 2002 77.9 15497046 10779.
## 2 Chile 2007 78.6 16284741 13172.
## 3 Colombia 2002 71.7 41008227 5755.
## 4 Colombia 2007 72.9 44227550 7007.
## 5 Ecuador 2002 74.2 12921234 5773.
## 6 Ecuador 2007 75.0 13755680 6873.
## 7 Peru 2002 69.9 26769436 5909.
## 8 Peru 2007 71.4 28674757 7409.
select(gapminder_subset, -c(continent, pop, gdpPercap))
## # A tibble: 8 x 3
## country year lifeExp
## <chr> <dbl> <dbl>
## 1 Chile 2002 77.9
## 2 Chile 2007 78.6
## 3 Colombia 2002 71.7
## 4 Colombia 2007 72.9
## 5 Ecuador 2002 74.2
## 6 Ecuador 2007 75.0
## 7 Peru 2002 69.9
## 8 Peru 2007 71.4
#Ejercicio 1
Usando gapminder, selecciona las variables country, year y lifeExp
select(gapminder, country, year, lifeExp)
## # A tibble: 1,704 x 3
## country year lifeExp
## <chr> <dbl> <dbl>
## 1 Afghanistan 1952 28.8
## 2 Afghanistan 1957 30.3
## 3 Afghanistan 1962 32.0
## 4 Afghanistan 1967 34.0
## 5 Afghanistan 1972 36.1
## 6 Afghanistan 1977 38.4
## 7 Afghanistan 1982 39.9
## 8 Afghanistan 1987 40.8
## 9 Afghanistan 1992 41.7
## 10 Afghanistan 1997 41.8
## # ... with 1,694 more rows
Usando gapminder, selecciona las variables country, year y pop. Esta vez traduce sus nombres
select(gapminder,
pais = country,
año = year,
poblacion =pop)
## # A tibble: 1,704 x 3
## pais año poblacion
## <chr> <dbl> <dbl>
## 1 Afghanistan 1952 8425333
## 2 Afghanistan 1957 9240934
## 3 Afghanistan 1962 10267083
## 4 Afghanistan 1967 11537966
## 5 Afghanistan 1972 13079460
## 6 Afghanistan 1977 14880372
## 7 Afghanistan 1982 12881816
## 8 Afghanistan 1987 13867957
## 9 Afghanistan 1992 16317921
## 10 Afghanistan 1997 22227415
## # ... with 1,694 more rows
Usando gapminder, selecciona todas las variables con excepción de lifeExp y gdpPercap.
select(gapminder, -c(lifeExp, gdpPercap))
## # A tibble: 1,704 x 4
## country continent year pop
## <chr> <chr> <dbl> <dbl>
## 1 Afghanistan Asia 1952 8425333
## 2 Afghanistan Asia 1957 9240934
## 3 Afghanistan Asia 1962 10267083
## 4 Afghanistan Asia 1967 11537966
## 5 Afghanistan Asia 1972 13079460
## 6 Afghanistan Asia 1977 14880372
## 7 Afghanistan Asia 1982 12881816
## 8 Afghanistan Asia 1987 13867957
## 9 Afghanistan Asia 1992 16317921
## 10 Afghanistan Asia 1997 22227415
## # ... with 1,694 more rows
subset de gapminder está ordenado por el orden alfabético de la columna country
gapminder_subset
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773.
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
arrange(gapminder_subset, year)
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Colombia Americas 2002 71.7 41008227 5755.
## 3 Ecuador Americas 2002 74.2 12921234 5773.
## 4 Peru Americas 2002 69.9 26769436 5909.
## 5 Chile Americas 2007 78.6 16284741 13172.
## 6 Colombia Americas 2007 72.9 44227550 7007.
## 7 Ecuador Americas 2007 75.0 13755680 6873.
## 8 Peru Americas 2007 71.4 28674757 7409.
arrange(gapminder_subset, desc(year))
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2007 78.6 16284741 13172.
## 2 Colombia Americas 2007 72.9 44227550 7007.
## 3 Ecuador Americas 2007 75.0 13755680 6873.
## 4 Peru Americas 2007 71.4 28674757 7409.
## 5 Chile Americas 2002 77.9 15497046 10779.
## 6 Colombia Americas 2002 71.7 41008227 5755.
## 7 Ecuador Americas 2002 74.2 12921234 5773.
## 8 Peru Americas 2002 69.9 26769436 5909.
arrange(gapminder_subset, year, lifeExp)
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Peru Americas 2002 69.9 26769436 5909.
## 2 Colombia Americas 2002 71.7 41008227 5755.
## 3 Ecuador Americas 2002 74.2 12921234 5773.
## 4 Chile Americas 2002 77.9 15497046 10779.
## 5 Peru Americas 2007 71.4 28674757 7409.
## 6 Colombia Americas 2007 72.9 44227550 7007.
## 7 Ecuador Americas 2007 75.0 13755680 6873.
## 8 Chile Americas 2007 78.6 16284741 13172.
Usando gapminder y la función arrange, determina cuál es el año más bajo
arrange(gapminder, year)
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Albania Europe 1952 55.2 1282697 1601.
## 3 Algeria Africa 1952 43.1 9279525 2449.
## 4 Angola Africa 1952 30.0 4232095 3521.
## 5 Argentina Americas 1952 62.5 17876956 5911.
## 6 Australia Oceania 1952 69.1 8691212 10040.
## 7 Austria Europe 1952 66.8 6927772 6137.
## 8 Bahrain Asia 1952 50.9 120447 9867.
## 9 Bangladesh Asia 1952 37.5 46886859 684.
## 10 Belgium Europe 1952 68 8730405 8343.
## # ... with 1,694 more rows
Rpta 1952
Usando gapminder y la función arrange, determina cuál es fue la expectativa de vida más alta en el año más bajo
arrange(gapminder, year, desc(lifeExp))
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Norway Europe 1952 72.7 3327728 10095.
## 2 Iceland Europe 1952 72.5 147962 7268.
## 3 Netherlands Europe 1952 72.1 10381988 8942.
## 4 Sweden Europe 1952 71.9 7124673 8528.
## 5 Denmark Europe 1952 70.8 4334000 9692.
## 6 Switzerland Europe 1952 69.6 4815000 14734.
## 7 New Zealand Oceania 1952 69.4 1994794 10557.
## 8 United Kingdom Europe 1952 69.2 50430000 9980.
## 9 Australia Oceania 1952 69.1 8691212 10040.
## 10 Canada Americas 1952 68.8 14785584 11367.
## # ... with 1,694 more rows
Rpta. En el año mas bajo, la exp de vida mas alta fue Noruega: 1952 y 72.67
Usando gapminder y la función arrange, determina cuál es fue la expectativa de vida más alta en el año más alto
arrange(gapminder, desc(lifeExp, year))
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Japan Asia 2007 82.6 127467972 31656.
## 2 Hong Kong, China Asia 2007 82.2 6980412 39725.
## 3 Japan Asia 2002 82 127065841 28605.
## 4 Iceland Europe 2007 81.8 301931 36181.
## 5 Switzerland Europe 2007 81.7 7554661 37506.
## 6 Hong Kong, China Asia 2002 81.5 6762476 30209.
## 7 Australia Oceania 2007 81.2 20434176 34435.
## 8 Spain Europe 2007 80.9 40448191 28821.
## 9 Sweden Europe 2007 80.9 9031088 33860.
## 10 Israel Asia 2007 80.7 6426679 25523.
## # ... with 1,694 more rows
Rpta. Japon en el 2007 Exp. 82 años
Cuando necesito crear nuevas variables
mutate(gapminder_subset,
PBI_total = pop * gdpPercap)
## # A tibble: 8 x 7
## country continent year lifeExp pop gdpPercap PBI_total
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779. 167039309148.
## 2 Chile Americas 2007 78.6 16284741 13172. 214496727218.
## 3 Colombia Americas 2002 71.7 41008227 5755. 236013006966.
## 4 Colombia Americas 2007 72.9 44227550 7007. 309883885810.
## 5 Ecuador Americas 2002 74.2 12921234 5773. 74594859032.
## 6 Ecuador Americas 2007 75.0 13755680 6873. 94546397113.
## 7 Peru Americas 2002 69.9 26769436 5909. 158181134667.
## 8 Peru Americas 2007 71.4 28674757 7409. 212448566598.
#mutate() además permite usar múltiples transformaciones en una sola llamada, incluso utilizando variables recién creadas.
mutate(gapminder_subset,
PBI_total = pop * gdpPercap,
PBI_total_texto = paste("PBI de",country,"en",year,":",PBI_total,"dólares")
)
## # A tibble: 8 x 8
## country continent year lifeExp pop gdpPercap PBI_total PBI_total_texto
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Chile Americas 2002 77.9 1.55e7 10779. 1.67e11 PBI de Chile en 2~
## 2 Chile Americas 2007 78.6 1.63e7 13172. 2.14e11 PBI de Chile en 2~
## 3 Colombia Americas 2002 71.7 4.10e7 5755. 2.36e11 PBI de Colombia e~
## 4 Colombia Americas 2007 72.9 4.42e7 7007. 3.10e11 PBI de Colombia e~
## 5 Ecuador Americas 2002 74.2 1.29e7 5773. 7.46e10 PBI de Ecuador en~
## 6 Ecuador Americas 2007 75.0 1.38e7 6873. 9.45e10 PBI de Ecuador en~
## 7 Peru Americas 2002 69.9 2.68e7 5909. 1.58e11 PBI de Peru en 20~
## 8 Peru Americas 2007 71.4 2.87e7 7409. 2.12e11 PBI de Peru en 20~
mutate(gapminder_subset,
gdpPercap = round(gdpPercap, digits = 1)
)
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909
## 8 Peru Americas 2007 71.4 28674757 7409.
#Ejercicio 1
Usando gapminder y las funciones filter() y mutate(), determina el PBI de China y United States en los años 1952 y 2007.
mis_datos_filtrados <- filter(gapminder,
country %in% c("China", "United States"),
year %in% c(1952, 2007))
mutate(mis_datos_filtrados,
PBI_nacional = pop * gdpPercap)
## # A tibble: 4 x 7
## country continent year lifeExp pop gdpPercap PBI_nacional
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 China Asia 1952 44 556263527 400. 2.23e11
## 2 China Asia 2007 73.0 1318683096 4959. 6.54e12
## 3 United States Americas 1952 68.4 157553000 13990. 2.20e12
## 4 United States Americas 2007 78.2 301139947 42952. 1.29e13
#summarise() permite obtener datos resumen de nuestras variables
summarise(gapminder_subset,
recuento = n())
## # A tibble: 1 x 1
## recuento
## <int>
## 1 8
Podemos obtener cualquier otro tipo de resumen, por ejemplo, el promedio de expectativa de vida para el periodo escogido. Llamaremos promedio a esta columna.
summarise(gapminder_subset, promedio = mean(lifeExp))
## # A tibble: 1 x 1
## promedio
## <dbl>
## 1 73.9
es posible crear más de una variable resumen
summarise(gapminder_subset,
recuento = n(),
promedio = mean(lifeExp))
## # A tibble: 1 x 2
## recuento promedio
## <int> <dbl>
## 1 8 73.9
summarise(gapminder_subset,
recuento = n(),
promedio = mean(lifeExp),
resumen = paste("La expectativa de vida de las",
recuento,
"observaciones es",
promedio))
## # A tibble: 1 x 3
## recuento promedio resumen
## <int> <dbl> <chr>
## 1 8 73.9 La expectativa de vida de las 8 observaciones es 73.93475
#Tener en cuenta la diferencia entre summarise y mutate
Se crean nuevas columnas con el valor de la operacion promedio, mediana, minimo para los n
mutate(gapminder_subset,
promedio = mean(lifeExp),
mediana = median(lifeExp),
minimo = min(lifeExp))
## # A tibble: 8 x 9
## country continent year lifeExp pop gdpPercap promedio mediana minimo
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779. 73.9 73.5 69.9
## 2 Chile Americas 2007 78.6 16284741 13172. 73.9 73.5 69.9
## 3 Colombia Americas 2002 71.7 41008227 5755. 73.9 73.5 69.9
## 4 Colombia Americas 2007 72.9 44227550 7007. 73.9 73.5 69.9
## 5 Ecuador Americas 2002 74.2 12921234 5773. 73.9 73.5 69.9
## 6 Ecuador Americas 2007 75.0 13755680 6873. 73.9 73.5 69.9
## 7 Peru Americas 2002 69.9 26769436 5909. 73.9 73.5 69.9
## 8 Peru Americas 2007 71.4 28674757 7409. 73.9 73.5 69.9
mutate(gapminder_subset,
promedio = mean(lifeExp),
diferencia = lifeExp - promedio)
## # A tibble: 8 x 8
## country continent year lifeExp pop gdpPercap promedio diferencia
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779. 73.9 3.93
## 2 Chile Americas 2007 78.6 16284741 13172. 73.9 4.62
## 3 Colombia Americas 2002 71.7 41008227 5755. 73.9 -2.25
## 4 Colombia Americas 2007 72.9 44227550 7007. 73.9 -1.05
## 5 Ecuador Americas 2002 74.2 12921234 5773. 73.9 0.238
## 6 Ecuador Americas 2007 75.0 13755680 6873. 73.9 1.06
## 7 Peru Americas 2002 69.9 26769436 5909. 73.9 -4.03
## 8 Peru Americas 2007 71.4 28674757 7409. 73.9 -2.51
Usando gapminder, la función filter() y summarise() determina el promedio de la expectativa de vida para Asia, Africa y Oceania en 1952.
datos_asia <- filter(gapminder, continent == "Asia", year == 1952)
datos_africa <- filter(gapminder, continent == "Africa", year == 1952)
datos_oceania <- filter(gapminder, continent == "Oceania", year == 1952)
summarise(datos_asia, promedio = mean(lifeExp))
## # A tibble: 1 x 1
## promedio
## <dbl>
## 1 46.3
summarise(datos_africa, promedio = mean(lifeExp))
## # A tibble: 1 x 1
## promedio
## <dbl>
## 1 39.1
summarise(datos_oceania, promedio = mean(lifeExp))
## # A tibble: 1 x 1
## promedio
## <dbl>
## 1 69.3
permite agrupar nuestros datos para realizar operaciones al interior de esos grupos. potente en combinacion con summarise
no se estan sobre escribiendo mis datos, estoy usando gapminder_subset
gapminder_subset <- group_by(gapminder_subset, country)
agrupa gapminder_subsert en 4 paises
gapminder_subset
## # A tibble: 8 x 6
## # Groups: country [4]
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773.
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
summarise(gapminder_subset, recuento = n())
## # A tibble: 4 x 2
## country recuento
## <chr> <int>
## 1 Chile 2
## 2 Colombia 2
## 3 Ecuador 2
## 4 Peru 2
Es el mismo codigo que se uso anteriormente,
summarise(gapminder_subset, recuento = n())
pero ahora esta agrupado. ya no me da el recuento para todo el set de datos con un solo valor, sino un recuento u observaciones para cada grupo.
group by hace las operaciones para cada subconjunto de la variable que le indicamos
gapminder_subset <- ungroup(gapminder_subset)
gapminder_subset
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773.
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
Volvemos a agrupar
gapminder_subset <- group_by(gapminder_subset, country)
gapminder_subset
## # A tibble: 8 x 6
## # Groups: country [4]
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773.
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
summarise(gapminder_subset, promedio = mean(lifeExp))
## # A tibble: 4 x 2
## country promedio
## <chr> <dbl>
## 1 Chile 78.2
## 2 Colombia 72.3
## 3 Ecuador 74.6
## 4 Peru 70.7
Al terminar, desagrupamos
gapminder_subset <- ungroup(gapminder_subset)
gapminder_subset
## # A tibble: 8 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Chile Americas 2002 77.9 15497046 10779.
## 2 Chile Americas 2007 78.6 16284741 13172.
## 3 Colombia Americas 2002 71.7 41008227 5755.
## 4 Colombia Americas 2007 72.9 44227550 7007.
## 5 Ecuador Americas 2002 74.2 12921234 5773.
## 6 Ecuador Americas 2007 75.0 13755680 6873.
## 7 Peru Americas 2002 69.9 26769436 5909.
## 8 Peru Americas 2007 71.4 28674757 7409.
Usando gapminder, la función group_by() y summarise() determina el promedio de la expectativa de vida para todos los continentes en 1952.
filter(gapminder, year== 1952)
## # A tibble: 142 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Albania Europe 1952 55.2 1282697 1601.
## 3 Algeria Africa 1952 43.1 9279525 2449.
## 4 Angola Africa 1952 30.0 4232095 3521.
## 5 Argentina Americas 1952 62.5 17876956 5911.
## 6 Australia Oceania 1952 69.1 8691212 10040.
## 7 Austria Europe 1952 66.8 6927772 6137.
## 8 Bahrain Asia 1952 50.9 120447 9867.
## 9 Bangladesh Asia 1952 37.5 46886859 684.
## 10 Belgium Europe 1952 68 8730405 8343.
## # ... with 132 more rows
gapminder <- group_by(gapminder, continent)
gapminder
## # A tibble: 1,704 x 6
## # Groups: continent [5]
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
summarise(gapminder, promedio = mean(lifeExp))
## # A tibble: 5 x 2
## continent promedio
## <chr> <dbl>
## 1 Africa 48.9
## 2 Americas 64.7
## 3 Asia 60.1
## 4 Europe 71.9
## 5 Oceania 74.3
gapminder <- ungroup(gapminder)
gapminder
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
## # ... with 1,694 more rows
summarise(gapminder, promedio = mean(lifeExp))
## # A tibble: 1 x 1
## promedio
## <dbl>
## 1 59.5
library(magrittr)
##
## Attaching package: 'magrittr'
## The following object is masked from 'package:purrr':
##
## set_names
## The following object is masked from 'package:tidyr':
##
## extract
Representa esta operación usando las funciones de magrittr.
(((12 * 5) - 11)/ 7) + 3
add(divide_by(subtract(multiply_by(12,5),11),7),3)
## [1] 10
numero_inicial <- 12
resultado1 <- multiply_by(numero_inicial,5)
resultado2 <- subtract(resultado1,11)
resultado3 <- divide_by(resultado2,7)
resultado4 <- add(resultado3,3)
resultado4
## [1] 10