library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gapminder)
library(scales)
library(ggridges)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
data("gapminder")
head(gapminder, n = 10)
## # A tibble: 10 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Afghanistan Asia       1962    32.0 10267083      853.
##  4 Afghanistan Asia       1967    34.0 11537966      836.
##  5 Afghanistan Asia       1972    36.1 13079460      740.
##  6 Afghanistan Asia       1977    38.4 14880372      786.
##  7 Afghanistan Asia       1982    39.9 12881816      978.
##  8 Afghanistan Asia       1987    40.8 13867957      852.
##  9 Afghanistan Asia       1992    41.7 16317921      649.
## 10 Afghanistan Asia       1997    41.8 22227415      635.
tail(gapminder)
## # A tibble: 6 × 6
##   country  continent  year lifeExp      pop gdpPercap
##   <fct>    <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Zimbabwe Africa     1982    60.4  7636524      789.
## 2 Zimbabwe Africa     1987    62.4  9216418      706.
## 3 Zimbabwe Africa     1992    60.4 10704340      693.
## 4 Zimbabwe Africa     1997    46.8 11404948      792.
## 5 Zimbabwe Africa     2002    40.0 11926563      672.
## 6 Zimbabwe Africa     2007    43.5 12311143      470.
dim(gapminder)
## [1] 1704    6
str(gapminder)
## tibble [1,704 × 6] (S3: tbl_df/tbl/data.frame)
##  $ country  : Factor w/ 142 levels "Afghanistan",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ continent: Factor w/ 5 levels "Africa","Americas",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ year     : int [1:1704] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 ...
##  $ lifeExp  : num [1:1704] 28.8 30.3 32 34 36.1 ...
##  $ pop      : int [1:1704] 8425333 9240934 10267083 11537966 13079460 14880372 12881816 13867957 16317921 22227415 ...
##  $ gdpPercap: num [1:1704] 779 821 853 836 740 ...
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
table(gapminder$continent)
## 
##   Africa Americas     Asia   Europe  Oceania 
##      624      300      396      360       24
gapminder %>%
  filter(
    continent == "Europe",
    country == "France",
    year %in% c(1997, 2002, 2007)
  )
## # A tibble: 3 × 6
##   country continent  year lifeExp      pop gdpPercap
##   <fct>   <fct>     <int>   <dbl>    <int>     <dbl>
## 1 France  Europe     1997    78.6 58623428    25890.
## 2 France  Europe     2002    79.6 59925035    28926.
## 3 France  Europe     2007    80.7 61083916    30470.
gapminder %>%
  filter(
    year == 2002,
    continent == "Europe",
    country == "France"
  ) %>%
  summarise(mean(lifeExp))
## # A tibble: 1 × 1
##   `mean(lifeExp)`
##             <dbl>
## 1            79.6
gapminder %>%
  filter(year == 2002) %>%
  group_by(continent) %>%
  summarise(average_lifeExp = mean(lifeExp))
## # A tibble: 5 × 2
##   continent average_lifeExp
##   <fct>               <dbl>
## 1 Africa               53.3
## 2 Americas             72.4
## 3 Asia                 69.2
## 4 Europe               76.7
## 5 Oceania              79.7
gapminder %>%
  filter(year == 2002) %>%
  group_by(continent) %>%
  summarise(total_pop = sum(pop)) %>%
  arrange(desc(total_pop))
## # A tibble: 5 × 2
##   continent  total_pop
##   <fct>          <dbl>
## 1 Asia      3601802203
## 2 Americas   849772762
## 3 Africa     833723916
## 4 Europe     578223869
## 5 Oceania     23454829
gapminder %>%
  filter(year == 2002) %>%
  mutate(totalGDP = gdpPercap * pop) %>%
  head(n = 10)
## # A tibble: 10 × 7
##    country     continent  year lifeExp       pop gdpPercap      totalGDP
##    <fct>       <fct>     <int>   <dbl>     <int>     <dbl>         <dbl>
##  1 Afghanistan Asia       2002    42.1  25268405      727.  18363410424.
##  2 Albania     Europe     2002    75.7   3508512     4604.  16153932130.
##  3 Algeria     Africa     2002    71.0  31287142     5288. 165447670333.
##  4 Angola      Africa     2002    41.0  10866106     2773.  30134833901.
##  5 Argentina   Americas   2002    74.3  38331121     8798. 337223430800.
##  6 Australia   Oceania    2002    80.4  19546792    30688. 599847158654.
##  7 Austria     Europe     2002    79.0   8148312    32418. 264148781752.
##  8 Bahrain     Asia       2002    74.8    656397    23404.  15362026094.
##  9 Bangladesh  Asia       2002    62.0 135656790     1136. 154159077921.
## 10 Belgium     Europe     2002    78.3  10311970    30486. 314369518653.
gapminder2002 <- gapminder %>%
  filter(year == 2002)
gapminder2002 %>%
  head(n=10)
## # A tibble: 10 × 6
##    country     continent  year lifeExp       pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Afghanistan Asia       2002    42.1  25268405      727.
##  2 Albania     Europe     2002    75.7   3508512     4604.
##  3 Algeria     Africa     2002    71.0  31287142     5288.
##  4 Angola      Africa     2002    41.0  10866106     2773.
##  5 Argentina   Americas   2002    74.3  38331121     8798.
##  6 Australia   Oceania    2002    80.4  19546792    30688.
##  7 Austria     Europe     2002    79.0   8148312    32418.
##  8 Bahrain     Asia       2002    74.8    656397    23404.
##  9 Bangladesh  Asia       2002    62.0 135656790     1136.
## 10 Belgium     Europe     2002    78.3  10311970    30486.
ggplot(data = gapminder2002, mapping = aes(x = gdpPercap,y = lifeExp, color = continent)) +
  geom_point() +
  scale_x_log10()

ggplot(data = gapminder2002, mapping = aes(x = gdpPercap,y = lifeExp, color = continent, size = pop)) +
  geom_point() +
  scale_x_log10()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, size = pop)) +
  geom_point(aes(color = continent)) +
  #geom_smooth(method = "loess") +
  scale_x_log10()

ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp, size = pop)) +
  geom_point(aes(color = continent)) +
  #geom_smooth(method = "loess") +
  scale_x_log10() +
  labs(x =" Log GDP per Capita", y = "Life Expectancy") +
  ggtitle("Association between GDP Per Capita and Life Expectancy") + theme(plot.title = element_text(face = "bold", hjust = 0.5))

library(ggthemes)
ggplot(data = gapminder, mapping = aes(x = gdpPercap, y = lifeExp)) +
  geom_point(aes(color = continent)) +
  #geom_smooth(method = "loess") +
  scale_x_log10() +
  labs(x =" Log GDP per Capita", y = "Life Expectancy") +
  ggtitle("Association between GDP Per Capita and Life Expectancy") + theme(plot.title = element_text(face = "bold", hjust = 0.5)) +
theme_economist()

gapminder %>%
  filter(year == 2002) -> gapminder2002
gapminder2002 %>% head(n=10)
## # A tibble: 10 × 6
##    country     continent  year lifeExp       pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Afghanistan Asia       2002    42.1  25268405      727.
##  2 Albania     Europe     2002    75.7   3508512     4604.
##  3 Algeria     Africa     2002    71.0  31287142     5288.
##  4 Angola      Africa     2002    41.0  10866106     2773.
##  5 Argentina   Americas   2002    74.3  38331121     8798.
##  6 Australia   Oceania    2002    80.4  19546792    30688.
##  7 Austria     Europe     2002    79.0   8148312    32418.
##  8 Bahrain     Asia       2002    74.8    656397    23404.
##  9 Bangladesh  Asia       2002    62.0 135656790     1136.
## 10 Belgium     Europe     2002    78.3  10311970    30486.
ggplot(data = gapminder2002, mapping = aes(gdpPercap)) +
  geom_histogram(fill = "#00adef", color = "red", bins = 20) +
  labs(title = "Distribution of GDP per Capita in 2002", y = "Frequency")

ggplot(data = gapminder2002, mapping = aes(x = gdpPercap, y = ..count../sum(..count..))) +
  geom_histogram(fill = "#00adef", color = "red", bins = 20) +
  scale_y_continuous(labels = percent) +
  labs(title = "Distribution of GDP per Capita in 2002", y = "Frequency")
## Warning: The dot-dot notation (`..count..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(count)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(data = gapminder2002, mapping = aes(gdpPercap, fill = continent)) +
  geom_density(alpha = 0.7)

ggplot(data = gapminder2002, aes(x = gdpPercap, y = continent, fill = continent)) +
  geom_density_ridges(alpha = 0.7) +
  theme_ridges() +
  labs("RidgePlot for GDPPerCap") +
  theme(legend.position = "none")
## Picking joint bandwidth of 2890

asia <- gapminder %>%
  filter(continent == "Asia" & year == 2002)
asia %>%
  head(n=10)
## # A tibble: 10 × 6
##    country          continent  year lifeExp        pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>      <int>     <dbl>
##  1 Afghanistan      Asia       2002    42.1   25268405      727.
##  2 Bahrain          Asia       2002    74.8     656397    23404.
##  3 Bangladesh       Asia       2002    62.0  135656790     1136.
##  4 Cambodia         Asia       2002    56.8   12926707      896.
##  5 China            Asia       2002    72.0 1280400000     3119.
##  6 Hong Kong, China Asia       2002    81.5    6762476    30209.
##  7 India            Asia       2002    62.9 1034172547     1747.
##  8 Indonesia        Asia       2002    68.6  211060000     2874.
##  9 Iran             Asia       2002    69.5   66907826     9241.
## 10 Iraq             Asia       2002    57.0   24001816     4391.
europe <- gapminder %>%
  filter(continent == "Europe" & year == 2002)
europe %>%
  head(n=10)
## # A tibble: 10 × 6
##    country                continent  year lifeExp      pop gdpPercap
##    <fct>                  <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Albania                Europe     2002    75.7  3508512     4604.
##  2 Austria                Europe     2002    79.0  8148312    32418.
##  3 Belgium                Europe     2002    78.3 10311970    30486.
##  4 Bosnia and Herzegovina Europe     2002    74.1  4165416     6019.
##  5 Bulgaria               Europe     2002    72.1  7661799     7697.
##  6 Croatia                Europe     2002    74.9  4481020    11628.
##  7 Czech Republic         Europe     2002    75.5 10256295    17596.
##  8 Denmark                Europe     2002    77.2  5374693    32167.
##  9 Finland                Europe     2002    78.4  5193039    28205.
## 10 France                 Europe     2002    79.6 59925035    28926.
ggplot(data = asia, mapping = aes(x = country, y = lifeExp, fill = country)) +
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip()

ggplot(data = asia, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) +
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x="", y="Life Expectancy of Asia") -> graph1
graph1

ggplot(data = europe, mapping = aes(x = reorder(country, lifeExp), y = lifeExp, fill = country)) +
  geom_bar(stat = "identity", width = 0.9) +
  coord_flip() +
  theme(legend.position = "none") +
  labs(x="", y="Life Expectancy of Europe") -> graph2
graph2

grid.arrange(graph1, graph2, ncol = 2)