filter: selecionando apenas Brazil e salvando em gapbrazil
gap %>% filter(country=="Brazil")
## # A tibble: 12 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Brazil Americas 1952 50.9 56602560 2109.
## 2 Brazil Americas 1957 53.3 65551171 2487.
## 3 Brazil Americas 1962 55.7 76039390 3337.
## 4 Brazil Americas 1967 57.6 88049823 3430.
## 5 Brazil Americas 1972 59.5 100840058 4986.
## 6 Brazil Americas 1977 61.5 114313951 6660.
## 7 Brazil Americas 1982 63.3 128962939 7031.
## 8 Brazil Americas 1987 65.2 142938076 7807.
## 9 Brazil Americas 1992 67.1 155975974 6950.
## 10 Brazil Americas 1997 69.4 168546719 7958.
## 11 Brazil Americas 2002 71.0 179914212 8131.
## 12 Brazil Americas 2007 72.4 190010647 9066.
gapbrazil <- gap %>% filter(country=="Brazil")
salvando arquivo como .Rdata no diretorio de trabalho
save(gap, file="gap.Rdata")
save(gapbrazil, file="gapbrazil.Rdata")
carregando arquivo mt.Rdata do diretorio de trabalho
load("gap.Rdata")
load("gapbrazil.rdata")
filter & arrange
gap %>% arrange(year, country)
## # A tibble: 1,704 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Albania Europe 1952 55.2 1282697 1601.
## 3 Algeria Africa 1952 43.1 9279525 2449.
## 4 Angola Africa 1952 30.0 4232095 3521.
## 5 Argentina Americas 1952 62.5 17876956 5911.
## 6 Australia Oceania 1952 69.1 8691212 10040.
## 7 Austria Europe 1952 66.8 6927772 6137.
## 8 Bahrain Asia 1952 50.9 120447 9867.
## 9 Bangladesh Asia 1952 37.5 46886859 684.
## 10 Belgium Europe 1952 68 8730405 8343.
## # … with 1,694 more rows
gap %>% filter(year == 2007) %>% arrange(lifeExp)
## # A tibble: 142 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Swaziland Africa 2007 39.6 1.13e6 4513.
## 2 Mozambique Africa 2007 42.1 2.00e7 824.
## 3 Zambia Africa 2007 42.4 1.17e7 1271.
## 4 Sierra Leone Africa 2007 42.6 6.14e6 863.
## 5 Lesotho Africa 2007 42.6 2.01e6 1569.
## 6 Angola Africa 2007 42.7 1.24e7 4797.
## 7 Zimbabwe Africa 2007 43.5 1.23e7 470.
## 8 Afghanistan Asia 2007 43.8 3.19e7 975.
## 9 Central African… Africa 2007 44.7 4.37e6 706.
## 10 Liberia Africa 2007 45.7 3.19e6 415.
## # … with 132 more rows
gap %>% filter(year == 2007) %>% arrange(desc(lifeExp))
## # A tibble: 142 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Japan Asia 2007 82.6 1.27e8 31656.
## 2 Hong Kong, Ch… Asia 2007 82.2 6.98e6 39725.
## 3 Iceland Europe 2007 81.8 3.02e5 36181.
## 4 Switzerland Europe 2007 81.7 7.55e6 37506.
## 5 Australia Oceania 2007 81.2 2.04e7 34435.
## 6 Spain Europe 2007 80.9 4.04e7 28821.
## 7 Sweden Europe 2007 80.9 9.03e6 33860.
## 8 Israel Asia 2007 80.7 6.43e6 25523.
## 9 France Europe 2007 80.7 6.11e7 30470.
## 10 Canada Americas 2007 80.7 3.34e7 36319.
## # … with 132 more rows
gap %>% filter(year==2007 & continent=="Americas") %>% arrange(desc(gdpPercap))
## # A tibble: 25 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 United States Americas 2007 78.2 3.01e8 42952.
## 2 Canada Americas 2007 80.7 3.34e7 36319.
## 3 Puerto Rico Americas 2007 78.7 3.94e6 19329.
## 4 Trinidad and T… Americas 2007 69.8 1.06e6 18009.
## 5 Chile Americas 2007 78.6 1.63e7 13172.
## 6 Argentina Americas 2007 75.3 4.03e7 12779.
## 7 Mexico Americas 2007 76.2 1.09e8 11978.
## 8 Venezuela Americas 2007 73.7 2.61e7 11416.
## 9 Uruguay Americas 2007 76.4 3.45e6 10611.
## 10 Panama Americas 2007 75.5 3.24e6 9809.
## # … with 15 more rows
mutate
gap <- gap %>% mutate(gdp = pop * gdpPercap)
rename
gap %>% rename(life_exp = lifeExp, gdp_percap = gdpPercap)
## # A tibble: 1,704 x 7
## country continent year life_exp pop gdp_percap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghan… Asia 1952 28.8 8.43e6 779.
## 2 Afghan… Asia 1957 30.3 9.24e6 821.
## 3 Afghan… Asia 1962 32.0 1.03e7 853.
## 4 Afghan… Asia 1967 34.0 1.15e7 836.
## 5 Afghan… Asia 1972 36.1 1.31e7 740.
## 6 Afghan… Asia 1977 38.4 1.49e7 786.
## 7 Afghan… Asia 1982 39.9 1.29e7 978.
## 8 Afghan… Asia 1987 40.8 1.39e7 852.
## 9 Afghan… Asia 1992 41.7 1.63e7 649.
## 10 Afghan… Asia 1997 41.8 2.22e7 635.
## # … with 1,694 more rows, and 1 more variable: gdp <dbl>
group_by & filter -> filtra apenas ano 2007 e mostra o numero de paises em cada continente
gap %>% filter(year==2007) %>% group_by(continent) %>% summarise(numero=n())
## # A tibble: 5 x 2
## continent numero
## <fct> <int>
## 1 Africa 52
## 2 Americas 25
## 3 Asia 33
## 4 Europe 30
## 5 Oceania 2
comandos combinados
gap %>% filter(year==2007) %>% group_by(continent) %>% summarise(n=n(), media=mean(pop))
## # A tibble: 5 x 3
## continent n media
## <fct> <int> <dbl>
## 1 Africa 52 17875763.
## 2 Americas 25 35954847.
## 3 Asia 33 115513752.
## 4 Europe 30 19536618.
## 5 Oceania 2 12274974.
gap %>% filter(year==2007) %>% group_by(continent) %>% summarise(n=n(), median=median(pop))
## # A tibble: 5 x 3
## continent n median
## <fct> <int> <dbl>
## 1 Africa 52 10093310.
## 2 Americas 25 9319622
## 3 Asia 33 24821286
## 4 Europe 30 9493598
## 5 Oceania 2 12274974.
gap %>% filter(year==2007) %>% group_by(continent) %>% summarise(n=n(), media=mean(pop), minimo=min(pop), maximo=max(pop))
## # A tibble: 5 x 5
## continent n media minimo maximo
## <fct> <int> <dbl> <int> <int>
## 1 Africa 52 17875763. 199579 135031164
## 2 Americas 25 35954847. 1056608 301139947
## 3 Asia 33 115513752. 708573 1318683096
## 4 Europe 30 19536618. 301931 82400996
## 5 Oceania 2 12274974. 4115771 20434176
gap %>% filter(year==2007) %>% group_by(continent) %>% summarise(media=mean(pop), minimo=min(pop), maximo=max(pop))
## # A tibble: 5 x 4
## continent media minimo maximo
## <fct> <dbl> <int> <int>
## 1 Africa 17875763. 199579 135031164
## 2 Americas 35954847. 1056608 301139947
## 3 Asia 115513752. 708573 1318683096
## 4 Europe 19536618. 301931 82400996
## 5 Oceania 12274974. 4115771 20434176
gap %>% filter(year==2007 & pop>100000000) %>% group_by(country) %>% arrange(desc(pop))
## # A tibble: 10 x 7
## # Groups: country [10]
## country continent year lifeExp pop gdpPercap gdp
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 China Asia 2007 73.0 1.32e9 4959. 6.54e12
## 2 India Asia 2007 64.7 1.11e9 2452. 2.72e12
## 3 United … Americas 2007 78.2 3.01e8 42952. 1.29e13
## 4 Indones… Asia 2007 70.6 2.24e8 3541. 7.92e11
## 5 Brazil Americas 2007 72.4 1.90e8 9066. 1.72e12
## 6 Pakistan Asia 2007 65.5 1.69e8 2606. 4.41e11
## 7 Banglad… Asia 2007 64.1 1.50e8 1391. 2.09e11
## 8 Nigeria Africa 2007 46.9 1.35e8 2014. 2.72e11
## 9 Japan Asia 2007 82.6 1.27e8 31656. 4.04e12
## 10 Mexico Americas 2007 76.2 1.09e8 11978. 1.30e12
gap %>% filter(year==2007) %>% filter(gdpPercap<1000 | lifeExp<50) %>% group_by(country) %>% arrange(desc(pop))
## # A tibble: 27 x 7
## # Groups: country [27]
## country continent year lifeExp pop gdpPercap gdp
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Nigeria Africa 2007 46.9 1.35e8 2014. 2.72e11
## 2 Ethiopia Africa 2007 52.9 7.65e7 691. 5.29e10
## 3 Congo, … Africa 2007 46.5 6.46e7 278. 1.79e10
## 4 Myanmar Asia 2007 62.1 4.78e7 944 4.51e10
## 5 South A… Africa 2007 49.3 4.40e7 9270. 4.08e11
## 6 Afghani… Asia 2007 43.8 3.19e7 975. 3.11e10
## 7 Mozambi… Africa 2007 42.1 2.00e7 824. 1.64e10
## 8 Cote d'… Africa 2007 48.3 1.80e7 1545. 2.78e10
## 9 Malawi Africa 2007 48.3 1.33e7 759. 1.01e10
## 10 Niger Africa 2007 56.9 1.29e7 620. 7.99e 9
## # … with 17 more rows
gap %>% filter(year==2007) %>% filter(gdpPercap<1000 | lifeExp<50) %>% filter(continent!="Africa") %>% group_by(country)
## # A tibble: 2 x 7
## # Groups: country [2]
## country continent year lifeExp pop gdpPercap gdp
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Afghani… Asia 2007 43.8 3.19e7 975. 3.11e10
## 2 Myanmar Asia 2007 62.1 4.78e7 944 4.51e10
salvando em um tibble
t1 <- gap %>% filter(year==2007) %>% group_by(continent) %>% summarise(n=n(), media_pop=mean(pop), minimo_pop=min(pop), maximo_pop=max(pop))
t1
## # A tibble: 5 x 5
## continent n media_pop minimo_pop maximo_pop
## <fct> <int> <dbl> <int> <int>
## 1 Africa 52 17875763. 199579 135031164
## 2 Americas 25 35954847. 1056608 301139947
## 3 Asia 33 115513752. 708573 1318683096
## 4 Europe 30 19536618. 301931 82400996
## 5 Oceania 2 12274974. 4115771 20434176
Comandos filter + select(seleciona variáveis)
gap %>% filter(year==2007) %>% select(continent)
## # A tibble: 142 x 1
## continent
## <fct>
## 1 Asia
## 2 Europe
## 3 Africa
## 4 Africa
## 5 Americas
## 6 Oceania
## 7 Europe
## 8 Asia
## 9 Asia
## 10 Europe
## # … with 132 more rows
gap %>% filter(year==2007) %>% select(year,continent)
## # A tibble: 142 x 2
## year continent
## <int> <fct>
## 1 2007 Asia
## 2 2007 Europe
## 3 2007 Africa
## 4 2007 Africa
## 5 2007 Americas
## 6 2007 Oceania
## 7 2007 Europe
## 8 2007 Asia
## 9 2007 Asia
## 10 2007 Europe
## # … with 132 more rows
gap %>% filter(year==2007) %>% select(year,continent, country)
## # A tibble: 142 x 3
## year continent country
## <int> <fct> <fct>
## 1 2007 Asia Afghanistan
## 2 2007 Europe Albania
## 3 2007 Africa Algeria
## 4 2007 Africa Angola
## 5 2007 Americas Argentina
## 6 2007 Oceania Australia
## 7 2007 Europe Austria
## 8 2007 Asia Bahrain
## 9 2007 Asia Bangladesh
## 10 2007 Europe Belgium
## # … with 132 more rows
gap %>% filter(year==2007) %>% select(continent, country, ends_with("p"))
## # A tibble: 142 x 6
## continent country lifeExp pop gdpPercap gdp
## <fct> <fct> <dbl> <int> <dbl> <dbl>
## 1 Asia Afghanist… 43.8 3.19e7 975. 3.11e10
## 2 Europe Albania 76.4 3.60e6 5937. 2.14e10
## 3 Africa Algeria 72.3 3.33e7 6223. 2.07e11
## 4 Africa Angola 42.7 1.24e7 4797. 5.96e10
## 5 Americas Argentina 75.3 4.03e7 12779. 5.15e11
## 6 Oceania Australia 81.2 2.04e7 34435. 7.04e11
## 7 Europe Austria 79.8 8.20e6 36126. 2.96e11
## 8 Asia Bahrain 75.6 7.09e5 29796. 2.11e10
## 9 Asia Bangladesh 64.1 1.50e8 1391. 2.09e11
## 10 Europe Belgium 79.4 1.04e7 33693. 3.50e11
## # … with 132 more rows
gap %>% filter(year==2007) %>% select(ends_with("p"))
## # A tibble: 142 x 4
## lifeExp pop gdpPercap gdp
## <dbl> <int> <dbl> <dbl>
## 1 43.8 31889923 975. 31079291949.
## 2 76.4 3600523 5937. 21376411360.
## 3 72.3 33333216 6223. 207444851958.
## 4 42.7 12420476 4797. 59583895818.
## 5 75.3 40301927 12779. 515033625357.
## 6 81.2 20434176 34435. 703658358894.
## 7 79.8 8199783 36126. 296229400691.
## 8 75.6 708573 29796. 21112675360.
## 9 64.1 150448339 1391. 209311822134.
## 10 79.4 10392226 33693. 350141166520.
## # … with 132 more rows
gap %>% filter(year==2007) %>% select(starts_with("c"))
## # A tibble: 142 x 2
## country continent
## <fct> <fct>
## 1 Afghanistan Asia
## 2 Albania Europe
## 3 Algeria Africa
## 4 Angola Africa
## 5 Argentina Americas
## 6 Australia Oceania
## 7 Austria Europe
## 8 Bahrain Asia
## 9 Bangladesh Asia
## 10 Belgium Europe
## # … with 132 more rows
gap %>% filter(year==2007 & continent=="Americas") %>% select(ends_with("p")) %>% summarise_all(list(~n(), ~mean(.), ~median(.)))
## # A tibble: 1 x 12
## lifeExp_n pop_n gdpPercap_n gdp_n lifeExp_mean pop_mean
## <int> <int> <int> <int> <dbl> <dbl>
## 1 25 25 25 25 73.6 3.60e7
## # … with 6 more variables: gdpPercap_mean <dbl>,
## # gdp_mean <dbl>, lifeExp_median <dbl>, pop_median <int>,
## # gdpPercap_median <dbl>, gdp_median <dbl>
Combinando dplyr e ggplot
gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_point()

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_line()

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_col()

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_col(fill="Blue")

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_col(fill="#111111bb")

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_col(fill="#11119988")

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_col(fill="#11111122")+theme_bw()

gap %>% filter(country=="Brazil") %>% ggplot(aes(year, gdp))+geom_col(fill="#88111144")+theme_bw()

gap %>% filter(continent=="Americas") %>% ggplot(aes(year, gdp, group=country))+geom_line()

gap %>% filter(continent=="Americas") %>% ggplot(aes(year, gdp, col=country))+geom_line()

gap %>% filter(continent=="Americas") %>% ggplot(aes(year, gdp, col=country))+geom_line(size=1)

gap %>% filter(continent=="Americas") %>% ggplot(aes(year, lifeExp, col=country))+geom_line()

#===== excluindo Estados Unidos =====
gap %>% filter(continent=="Americas" & country!="United States") %>% ggplot(aes(year, gdp, col=country))+geom_line()+geom_point()

#=== GDP Oceania 1952 a 2007
gap %>% filter(continent=="Oceania" & country!="United States") %>% ggplot(aes(year, gdp, col=country))+geom_line()+geom_point()
