#0520_gapminder
#Gapminder is an independent educational non-profit fighting global misconceptions.
#Hans Rosling在TED談全球健康發展
#https://www.ted.com/talks/hans_rosling_the_best_stats_you_ve_ever_seen?language=zh-tw


#欄位簡介
# "country" 國家
# "continent" 洲
# "year"年
# "lifeExp"預期壽命
# "pop" 人口
# "gdpPercap"人均GDP

#一、先安裝下列套件
#install.packages("gapminder")
#install.packages("gridExtra")
#install.packages("ggplot2")
#install.packages("tidyverse")

#二、呼叫gapminder套件
library(gapminder)
#gapminder套件的內容就是一個名為gapminder的資料檔,以下是查看資料的方式
#將gapminder的資料檔存入在一個名為gapminder的變數(物件中)
gapminder <- gapminder
#查看資料規模
dim(gapminder)
## [1] 1704    6
#查看資料摘要
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
#查看前幾筆資料
head(gapminder)
## # A tibble: 6 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
#查看欄位名稱
colnames(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
#查詢資料包含的所有國家
unique(gapminder$country)
##   [1] Afghanistan              Albania                  Algeria                 
##   [4] Angola                   Argentina                Australia               
##   [7] Austria                  Bahrain                  Bangladesh              
##  [10] Belgium                  Benin                    Bolivia                 
##  [13] Bosnia and Herzegovina   Botswana                 Brazil                  
##  [16] Bulgaria                 Burkina Faso             Burundi                 
##  [19] Cambodia                 Cameroon                 Canada                  
##  [22] Central African Republic Chad                     Chile                   
##  [25] China                    Colombia                 Comoros                 
##  [28] Congo, Dem. Rep.         Congo, Rep.              Costa Rica              
##  [31] Cote d'Ivoire            Croatia                  Cuba                    
##  [34] Czech Republic           Denmark                  Djibouti                
##  [37] Dominican Republic       Ecuador                  Egypt                   
##  [40] El Salvador              Equatorial Guinea        Eritrea                 
##  [43] Ethiopia                 Finland                  France                  
##  [46] Gabon                    Gambia                   Germany                 
##  [49] Ghana                    Greece                   Guatemala               
##  [52] Guinea                   Guinea-Bissau            Haiti                   
##  [55] Honduras                 Hong Kong, China         Hungary                 
##  [58] Iceland                  India                    Indonesia               
##  [61] Iran                     Iraq                     Ireland                 
##  [64] Israel                   Italy                    Jamaica                 
##  [67] Japan                    Jordan                   Kenya                   
##  [70] Korea, Dem. Rep.         Korea, Rep.              Kuwait                  
##  [73] Lebanon                  Lesotho                  Liberia                 
##  [76] Libya                    Madagascar               Malawi                  
##  [79] Malaysia                 Mali                     Mauritania              
##  [82] Mauritius                Mexico                   Mongolia                
##  [85] Montenegro               Morocco                  Mozambique              
##  [88] Myanmar                  Namibia                  Nepal                   
##  [91] Netherlands              New Zealand              Nicaragua               
##  [94] Niger                    Nigeria                  Norway                  
##  [97] Oman                     Pakistan                 Panama                  
## [100] Paraguay                 Peru                     Philippines             
## [103] Poland                   Portugal                 Puerto Rico             
## [106] Reunion                  Romania                  Rwanda                  
## [109] Sao Tome and Principe    Saudi Arabia             Senegal                 
## [112] Serbia                   Sierra Leone             Singapore               
## [115] Slovak Republic          Slovenia                 Somalia                 
## [118] South Africa             Spain                    Sri Lanka               
## [121] Sudan                    Swaziland                Sweden                  
## [124] Switzerland              Syria                    Taiwan                  
## [127] Tanzania                 Thailand                 Togo                    
## [130] Trinidad and Tobago      Tunisia                  Turkey                  
## [133] Uganda                   United Kingdom           United States           
## [136] Uruguay                  Venezuela                Vietnam                 
## [139] West Bank and Gaza       Yemen, Rep.              Zambia                  
## [142] Zimbabwe                
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
#查詢資料包含的各個年份
unique(gapminder$year)
##  [1] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.0.6     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#1、各洲的壽命分佈(以2007年為例)
gapminder %>%
  filter(year == 2007) %>% 
  ggplot()+
  geom_boxplot(aes(x=continent, y=lifeExp, fill=continent))

#2、查看大洋洲包含那些國家
gapminder %>%
  filter(year == 2007) %>% 
  filter(continent == "Oceania")
## # A tibble: 2 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Australia   Oceania    2007    81.2 20434176    34435.
## 2 New Zealand Oceania    2007    80.2  4115771    25185.
#3、1952年最長壽的前10個國家
gapminder %>%
  filter(year==1952) %>% 
  arrange(desc(lifeExp)) %>% 
  slice(1:10) %>% 
  ggplot(aes(x=reorder(country,lifeExp), y=lifeExp))+
  geom_col()+
  coord_flip()

#4、查看2007年各洲的壽命與人均GDP的關係
gapminder %>%
  filter(year == 2007) %>% 
  group_by(continent, year) %>%
  summarize(medianLifeExp = median(lifeExp),      #壽命的中位數
            medianGdpPercap = median(gdpPercap),  #人均GDP的中位數
            sumpop=sum(pop)) %>%                  #人均GDP
  ggplot()+
  geom_text(aes(x=medianGdpPercap, y=medianLifeExp, label=continent, size=sumpop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.

#5、探索台灣與大陸的人均 GDP 與年份之變化趨勢
gapminder %>%
  filter(country %in% c("China", "Taiwan")) %>%
  ggplot(aes(x = year, 
             y = gdpPercap, 
             group = country,
             color= country)) +
  geom_line()

#6、比較1952到2007世界各國的發展情形
#過濾出1952年的資料,存入在名為gapminder_1952的變數中
gapminder_1952 <- gapminder %>%
  filter(year == 1952)
#用gapminder_1952畫散佈圖,並存在一個名為g1的變數
g1 <- ggplot(gapminder_1952, 
             aes(x = pop,               #指定X軸為pop
                 y = lifeExp,           #指定y軸為continent
                 color = continent,     #顏色以continent為區分
                 size = gdpPercap,      #大小以gdpPercap為區分
                 shape = continent)) +  #形狀以continent為區分
  geom_point() +
  scale_x_log10() +
  theme_minimal()  #常用的幾種風格,包括:theme_bw(), theme_classic(), theme_dark(), theme_minimal(), theme_grey(), theme_linedraw()。

#叫出物件g1即顯示圖表
g1

#過濾出2007年的資料,存入在名為gapminder_2007的變數中
gapminder_2007 <- gapminder %>%
  filter(year == 2007)
#用gapminder_1952畫散佈圖,並存在一個名為g1的變數
g2 <- ggplot(gapminder_2007, 
             aes(x = pop, 
                 y = lifeExp, 
                 color = continent, 
                 size = gdpPercap, 
                 shape = continent)) +
  geom_point() +
  scale_x_log10() +
  theme_classic()     #常用的幾種風格,包括:theme_bw(), theme_classic(), theme_dark(), theme_minimal(), theme_grey(), theme_linedraw()。

#叫出物件g2即顯示圖表
g2

# 繪畫多個圖形於一個畫布上,同時比較兩個年份的差異
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(g1, g2, nrow = 1, ncol = 2)

#7、比較1952到2007各洲國家的變化...facet_grid()
gapminder %>% 
  filter(year %in% c(1952, 2007)) %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent, size=pop)) +
  geom_point() +
  facet_grid(continent ~ year)

# 練習1、查看各洲的人均gdp分佈。
gapminder %>%
  filter(year == 2007) %>% 
  ggplot()+
  geom_boxplot(aes(x=continent, y=gdpPercap, fill=continent))

# 練習2、2007年最長壽的前10個國家
gapminder %>%
  filter(year==2007) %>% 
  arrange(desc(lifeExp)) %>% 
  slice(1:10) %>% 
  ggplot(aes(x=reorder(country,lifeExp), y=lifeExp))+
  geom_col()+
  coord_flip()

# 練習3、查看2007年各洲的人口數與人均GDP的關係
gapminder %>%
  filter(year == 2007) %>% 
  group_by(continent, year) %>%
  summarize(medianpop = median(pop),      #人口的中位數
            medianGdpPercap = median(gdpPercap),  #人均GDP的中位數
            sumpop=sum(pop)) %>%                  #人均GDP
  ggplot()+
  geom_text(aes(x=medianGdpPercap, y=medianpop, label=continent, size=sumpop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.

# 練習4、自行比較兩個國家的人均 GDP 與年份之變化趨勢
gapminder %>%
  filter(country %in% c("Oman", "Spain")) %>%
  ggplot(aes(x = year, 
             y = gdpPercap, 
             group = country,
             color= country)) +
  geom_line()

# 練習5、自由練習一,查看亞洲包含那些國家
gapminder %>%
  filter(year == 2007) %>% 
  filter(continent == "Asia")
## # A tibble: 33 x 6
##    country          continent  year lifeExp        pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>      <int>     <dbl>
##  1 Afghanistan      Asia       2007    43.8   31889923      975.
##  2 Bahrain          Asia       2007    75.6     708573    29796.
##  3 Bangladesh       Asia       2007    64.1  150448339     1391.
##  4 Cambodia         Asia       2007    59.7   14131858     1714.
##  5 China            Asia       2007    73.0 1318683096     4959.
##  6 Hong Kong, China Asia       2007    82.2    6980412    39725.
##  7 India            Asia       2007    64.7 1110396331     2452.
##  8 Indonesia        Asia       2007    70.6  223547000     3541.
##  9 Iran             Asia       2007    71.0   69453570    11606.
## 10 Iraq             Asia       2007    59.5   27499638     4471.
## # … with 23 more rows
# 練習6、自由練習二,#5、探索日本與台灣的人均 GDP 與年份之變化趨勢
gapminder %>%
  filter(country %in% c("Japan", "Taiwan")) %>%
  ggplot(aes(x = year, 
             y = gdpPercap, 
             group = country,
             color= country)) +
  geom_line()