library(tidyverse)
library(gapminder)
df1 <- gapminder
df1 %>%
head()
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
df1 %>%
tail()
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Zimbabwe Africa 1982 60.4 7636524 789.
## 2 Zimbabwe Africa 1987 62.4 9216418 706.
## 3 Zimbabwe Africa 1992 60.4 10704340 693.
## 4 Zimbabwe Africa 1997 46.8 11404948 792.
## 5 Zimbabwe Africa 2002 40.0 11926563 672.
## 6 Zimbabwe Africa 2007 43.5 12311143 470.
glimpse(df1)
## Rows: 1,704
## Columns: 6
## $ country <fct> "Afghanistan", "Afghanistan", "Afghanistan", "Afghanistan", …
## $ continent <fct> Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, Asia, …
## $ year <int> 1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, …
## $ lifeExp <dbl> 28.801, 30.332, 31.997, 34.020, 36.088, 38.438, 39.854, 40.8…
## $ pop <int> 8425333, 9240934, 10267083, 11537966, 13079460, 14880372, 12…
## $ gdpPercap <dbl> 779.4453, 820.8530, 853.1007, 836.1971, 739.9811, 786.1134, …
df1 %>% count(continent)
## # A tibble: 5 × 2
## continent n
## <fct> <int>
## 1 Africa 624
## 2 Americas 300
## 3 Asia 396
## 4 Europe 360
## 5 Oceania 24
ggplot(df1, aes(x = gdpPercap, y = lifeExp)) +
geom_point()
ggplot(df1, aes(x = gdpPercap, y = lifeExp)) +
geom_point(color = "red") +
labs(title = "Seeing red?")
ggplot(df1, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point() +
labs(title = "One color for each continent!")
df1 %>%
filter(country == "China")
## # A tibble: 12 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 China Asia 1952 44 556263527 400.
## 2 China Asia 1957 50.5 637408000 576.
## 3 China Asia 1962 44.5 665770000 488.
## 4 China Asia 1967 58.4 754550000 613.
## 5 China Asia 1972 63.1 862030000 677.
## 6 China Asia 1977 64.0 943455000 741.
## 7 China Asia 1982 65.5 1000281000 962.
## 8 China Asia 1987 67.3 1084035000 1379.
## 9 China Asia 1992 68.7 1164970000 1656.
## 10 China Asia 1997 70.4 1230075000 2289.
## 11 China Asia 2002 72.0 1280400000 3119.
## 12 China Asia 2007 73.0 1318683096 4959.
# Create a new data frame called df_China
df_China <- df1 %>%
filter(country == "China")
ggplot(df_China, aes(x = year, y = lifeExp)) +
geom_point() +
labs(title = "Points only!")
ggplot(df_China, aes(x = year, y = lifeExp)) +
geom_line() +
labs(title = "Line only!")
ggplot(df_China, aes(x = year, y = lifeExp)) +
geom_point() +
geom_line() +
labs(title = "Both points and line!")
df1 %>%
filter(country %in% c("China", "Brazil"))
## # A tibble: 24 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Brazil Americas 1952 50.9 56602560 2109.
## 2 Brazil Americas 1957 53.3 65551171 2487.
## 3 Brazil Americas 1962 55.7 76039390 3337.
## 4 Brazil Americas 1967 57.6 88049823 3430.
## 5 Brazil Americas 1972 59.5 100840058 4986.
## 6 Brazil Americas 1977 61.5 114313951 6660.
## 7 Brazil Americas 1982 63.3 128962939 7031.
## 8 Brazil Americas 1987 65.2 142938076 7807.
## 9 Brazil Americas 1992 67.1 155975974 6950.
## 10 Brazil Americas 1997 69.4 168546719 7958.
## # … with 14 more rows
## # ℹ Use `print(n = ...)` to see more rows
# Create a new dataframe called df_two
df_two <- df1 %>%
filter(country %in% c("China", "Brazil"))
ggplot(df_two, aes(x = year, y = lifeExp)) +
geom_point() +
labs(title = "Sactterplot, in one color!")
ggplot(df_two, aes(x = year, y = lifeExp, color = country)) +
geom_point() +
labs(title = "Scatterplot, but different colors")
ggplot(df_two, aes(x = year, y = lifeExp, color = country)) +
geom_line() +
labs(title = "Line charts; one color for each country")
ggplot(df_two, aes(x = year, y = lifeExp, color = country)) +
geom_line() +
labs(x = "", # we don't want any label for the X-axis
y = "Life expectancy (years)",
title = "Life expectancy in China and Brazil, 1952-2007",
subtitle = "Wow, look at the increase over the period!",
caption = "Source: World Bank via gapminder")
– Theend –