library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(gapminder)
library(scales)
library(ggridges)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
data("gapminder")
head(gapminder, n = 10)
## # A tibble: 10 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
## 7 Afghanistan Asia 1982 39.9 12881816 978.
## 8 Afghanistan Asia 1987 40.8 13867957 852.
## 9 Afghanistan Asia 1992 41.7 16317921 649.
## 10 Afghanistan Asia 1997 41.8 22227415 635.
tail(gapminder)
## # A tibble: 6 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Zimbabwe Africa 1982 60.4 7636524 789.
## 2 Zimbabwe Africa 1987 62.4 9216418 706.
## 3 Zimbabwe Africa 1992 60.4 10704340 693.
## 4 Zimbabwe Africa 1997 46.8 11404948 792.
## 5 Zimbabwe Africa 2002 40.0 11926563 672.
## 6 Zimbabwe Africa 2007 43.5 12311143 470.
dim(gapminder)
## [1] 1704 6
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
## $ country : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
## $ lifeExp : num [1:1704] 28.8 30.3 32 34 36.1 ...
## $ pop : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
## $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
table(gapminder$continent)
##
## Africa Americas Asia Europe Oceania
## 624 300 396 360 24
gapminder %>%
filter(
continent == "Europe",
country == "France",
year %in% c(1997, 2002, 2007)
)
## # A tibble: 3 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 France Europe 1997 78.6 58623428 25890.
## 2 France Europe 2002 79.6 59925035 28926.
## 3 France Europe 2007 80.7 61083916 30470.
gapminder %>%
filter(
year == 2002,
continent == "Europe",
country == "France"
) %>%
summarise(mean(lifeExp))
## # A tibble: 1 × 1
## `mean(lifeExp)`
## <dbl>
## 1 79.6
gapminder %>%
filter(year == 2002) %>%
group_by(continent) %>%
summarise(average_lifeExp = mean(lifeExp))
## # A tibble: 5 × 2
## continent average_lifeExp
## <fct> <dbl>
## 1 Africa 53.3
## 2 Americas 72.4
## 3 Asia 69.2
## 4 Europe 76.7
## 5 Oceania 79.7
gapminder %>%
filter(year == 2002) %>%
group_by(continent) %>%
summarise(total_pop = sum(pop)) %>%
arrange(desc(total_pop))
## # A tibble: 5 × 2
## continent total_pop
## <fct> <dbl>
## 1 Asia 3601802203
## 2 Americas 849772762
## 3 Africa 833723916
## 4 Europe 578223869
## 5 Oceania 23454829
gapminder %>%
filter(year == 2002) %>%
mutate(totalGDP = gdpPercap * pop) %>%
head(n = 10)
## # A tibble: 10 × 7
## country continent year lifeExp pop gdpPercap totalGDP
## <fct> <fct> <int> <dbl> <int> <dbl> <dbl>
## 1 Afghanistan Asia 2002 42.1 25268405 727. 18363410424.
## 2 Albania Europe 2002 75.7 3508512 4604. 16153932130.
## 3 Algeria Africa 2002 71.0 31287142 5288. 165447670333.
## 4 Angola Africa 2002 41.0 10866106 2773. 30134833901.
## 5 Argentina Americas 2002 74.3 38331121 8798. 337223430800.
## 6 Australia Oceania 2002 80.4 19546792 30688. 599847158654.
## 7 Austria Europe 2002 79.0 8148312 32418. 264148781752.
## 8 Bahrain Asia 2002 74.8 656397 23404. 15362026094.
## 9 Bangladesh Asia 2002 62.0 135656790 1136. 154159077921.
## 10 Belgium Europe 2002 78.3 10311970 30486. 314369518653.
gapminder2002 <- gapminder %>%
filter(year == 2002)
gapminder2002 %>%
head(n=10)
## # A tibble: 10 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 2002 42.1 25268405 727.
## 2 Albania Europe 2002 75.7 3508512 4604.
## 3 Algeria Africa 2002 71.0 31287142 5288.
## 4 Angola Africa 2002 41.0 10866106 2773.
## 5 Argentina Americas 2002 74.3 38331121 8798.
## 6 Australia Oceania 2002 80.4 19546792 30688.
## 7 Austria Europe 2002 79.0 8148312 32418.
## 8 Bahrain Asia 2002 74.8 656397 23404.
## 9 Bangladesh Asia 2002 62.0 135656790 1136.
## 10 Belgium Europe 2002 78.3 10311970 30486.
ggplot(data = gapminder2002, mapping = aes(x = gdpPercap,y = lifeExp, color = continent)) +
geom_point() +
scale_x_log10()

ggplot(data = gapminder2002, mapping = aes(x = gdpPercap,y = lifeExp, color = continent, size = pop)) +
geom_point() +
scale_x_log10()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, size = pop)) +
geom_point(aes(color = continent)) +
#geom_smooth(method = "loess") +
scale_x_log10()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, size = pop)) +
geom_point(aes(color = continent)) +
#geom_smooth(method = "loess") +
scale_x_log10() +
labs(x =" Log GDP per Capita", y = "Life Expectancy") +
ggtitle("Association between GDP Per Capita and Life Expectancy") + theme(plot.title = element_text(face = "bold", hjust = 0.5))

library(ggthemes)
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(color = continent)) +
#geom_smooth(method = "loess") +
scale_x_log10() +
labs(x =" Log GDP per Capita", y = "Life Expectancy") +
ggtitle("Association between GDP Per Capita and Life Expectancy") + theme(plot.title = element_text(face = "bold", hjust = 0.5)) +
theme_economist()

gapminder %>%
filter(year == 2002) -> gapminder2002
gapminder2002 %>% head(n=10)
## # A tibble: 10 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 2002 42.1 25268405 727.
## 2 Albania Europe 2002 75.7 3508512 4604.
## 3 Algeria Africa 2002 71.0 31287142 5288.
## 4 Angola Africa 2002 41.0 10866106 2773.
## 5 Argentina Americas 2002 74.3 38331121 8798.
## 6 Australia Oceania 2002 80.4 19546792 30688.
## 7 Austria Europe 2002 79.0 8148312 32418.
## 8 Bahrain Asia 2002 74.8 656397 23404.
## 9 Bangladesh Asia 2002 62.0 135656790 1136.
## 10 Belgium Europe 2002 78.3 10311970 30486.
ggplot(data = gapminder2002, mapping = aes(gdpPercap)) +
geom_histogram(fill = "#00adef", color = "red", bins = 20) +
labs(title = "Distribution of GDP per Capita in 2002", y = "Frequency")

ggplot(data = gapminder2002, mapping = aes(x = gdpPercap, y = ..count../sum(..count..))) +
geom_histogram(fill = "#00adef", color = "red", bins = 20) +
scale_y_continuous(labels = percent) +
labs(title = "Distribution of GDP per Capita in 2002", y = "Frequency")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data = gapminder2002, mapping = aes(gdpPercap, fill = continent)) +
geom_density(alpha = 0.7)

ggplot(data = gapminder2002, aes(x = gdpPercap, y = continent, fill = continent)) +
geom_density_ridges(alpha = 0.7) +
theme_ridges() +
labs("RidgePlot for GDPPerCap") +
theme(legend.position = "none")
## Picking joint bandwidth of 2890

asia <- gapminder %>%
filter(continent == "Asia" & year == 2002)
asia %>%
head(n=10)
## # A tibble: 10 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 2002 42.1 25268405 727.
## 2 Bahrain Asia 2002 74.8 656397 23404.
## 3 Bangladesh Asia 2002 62.0 135656790 1136.
## 4 Cambodia Asia 2002 56.8 12926707 896.
## 5 China Asia 2002 72.0 1280400000 3119.
## 6 Hong Kong, China Asia 2002 81.5 6762476 30209.
## 7 India Asia 2002 62.9 1034172547 1747.
## 8 Indonesia Asia 2002 68.6 211060000 2874.
## 9 Iran Asia 2002 69.5 66907826 9241.
## 10 Iraq Asia 2002 57.0 24001816 4391.
europe <- gapminder %>%
filter(continent == "Europe" & year == 2002)
europe %>%
head(n=10)
## # A tibble: 10 × 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Albania Europe 2002 75.7 3508512 4604.
## 2 Austria Europe 2002 79.0 8148312 32418.
## 3 Belgium Europe 2002 78.3 10311970 30486.
## 4 Bosnia and Herzegovina Europe 2002 74.1 4165416 6019.
## 5 Bulgaria Europe 2002 72.1 7661799 7697.
## 6 Croatia Europe 2002 74.9 4481020 11628.
## 7 Czech Republic Europe 2002 75.5 10256295 17596.
## 8 Denmark Europe 2002 77.2 5374693 32167.
## 9 Finland Europe 2002 78.4 5193039 28205.
## 10 France Europe 2002 79.6 59925035 28926.
ggplot(data = asia, mapping = aes(x = country, y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip()

ggplot(data = asia, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none") +
labs(x="", y="Life Expectancy of Asia") -> graph1
graph1

ggplot(data = europe, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) +
geom_bar(stat = "identity", width = 0.9) +
coord_flip() +
theme(legend.position = "none") +
labs(x="", y="Life Expectancy of Europe") -> graph2
graph2

grid.arrange(graph1, graph2, ncol = 2)
