#欄位簡介
# "country" 國家
# "continent" 洲
# "year"年
# "lifeExp"預期壽命
# "pop" 人口
# "gdpPercap"人均GDP

#一、先安裝下列套件
install.packages("gapminder")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("tidyverse")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
install.packages("gridExtra")
## Installing package into '/home/rstudio-user/R/x86_64-pc-linux-gnu-library/4.0'
## (as 'lib' is unspecified)
#二、呼叫gapminder套件
library(gapminder)
#gapminder套件的內容就是一個名為gapminder的資料檔,以下是查看資料的方式
dim(gapminder)
## [1] 1704    6
head(gapminder)
## # A tibble: 6 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
summary(gapminder)
##         country        continent        year         lifeExp     
##  Afghanistan:  12   Africa  :624   Min.   :1952   Min.   :23.60  
##  Albania    :  12   Americas:300   1st Qu.:1966   1st Qu.:48.20  
##  Algeria    :  12   Asia    :396   Median :1980   Median :60.71  
##  Angola     :  12   Europe  :360   Mean   :1980   Mean   :59.47  
##  Argentina  :  12   Oceania : 24   3rd Qu.:1993   3rd Qu.:70.85  
##  Australia  :  12                  Max.   :2007   Max.   :82.60  
##  (Other)    :1632                                                
##       pop              gdpPercap       
##  Min.   :6.001e+04   Min.   :   241.2  
##  1st Qu.:2.794e+06   1st Qu.:  1202.1  
##  Median :7.024e+06   Median :  3531.8  
##  Mean   :2.960e+07   Mean   :  7215.3  
##  3rd Qu.:1.959e+07   3rd Qu.:  9325.5  
##  Max.   :1.319e+09   Max.   :113523.1  
## 
#將gapminder的資料檔存入在一個名為gapminder的變數(物件中)
gapminder <- gapminder
#查看前幾筆資料
head(gapminder)
## # A tibble: 6 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Afghanistan Asia       1952    28.8  8425333      779.
## 2 Afghanistan Asia       1957    30.3  9240934      821.
## 3 Afghanistan Asia       1962    32.0 10267083      853.
## 4 Afghanistan Asia       1967    34.0 11537966      836.
## 5 Afghanistan Asia       1972    36.1 13079460      740.
## 6 Afghanistan Asia       1977    38.4 14880372      786.
#查看欄位名稱
colnames(gapminder)
## [1] "country"   "continent" "year"      "lifeExp"   "pop"       "gdpPercap"
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.3     ✓ purrr   0.3.4
## ✓ tibble  3.1.1     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#1、各洲的壽命分佈
gapminder %>%
  filter(year == 2007) %>% 
  ggplot()+
  geom_boxplot(aes(x=continent, y=lifeExp, fill=continent))

#2、查看大洲洲包含那些國家
gapminder %>%
  filter(year == 2007) %>% 
  filter(continent == "Oceania")
## # A tibble: 2 x 6
##   country     continent  year lifeExp      pop gdpPercap
##   <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Australia   Oceania    2007    81.2 20434176    34435.
## 2 New Zealand Oceania    2007    80.2  4115771    25185.
#3、1952年最長壽的前10個國家
gapminder %>%
  filter(year==1952) %>% 
  arrange(desc(lifeExp)) %>% 
  slice(1:10) %>% 
  ggplot(aes(x=reorder(country,lifeExp), y=lifeExp))+
  geom_col()+
  coord_flip()

#4、查看2007年各洲的壽命與人均GDP的關係
gapminder %>%
  filter(year == 2007) %>% 
  group_by(continent, year) %>%
  summarize(medianLifeExp = median(lifeExp), medianGdpPercap = median(gdpPercap), sumpop=sum(pop)) %>% 
  ggplot()+
  geom_text(aes(x=medianGdpPercap, y=medianLifeExp, label=continent, size=sumpop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.

#5、探索台灣與大陸的人均 GDP 與年份之變化趨勢
gapminder %>%
  filter(country %in% c("China", "Taiwan")) %>%
  ggplot(aes(x = year, 
             y = gdpPercap, 
             group = country,
             color= country)) +
  geom_line()

#6、查看2007年各國的發展情形
gapminder_2007 <- gapminder %>%
  filter(year == 2007)
g1 <- ggplot(gapminder_2007, aes(x = pop, y = lifeExp, color = continent, size = gdpPercap, shape = continent)) +
  geom_point() +
  scale_x_log10() +
  theme_minimal()
g2 <- ggplot(gapminder_2007, aes(x = pop, y = lifeExp, color = continent, size = gdpPercap)) +
  geom_point() +
  scale_x_log10() +
  theme_classic()
# 繪畫多個圖形於一個畫布上
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
grid.arrange(g1, g2, nrow = 1, ncol = 2)

#常用的幾種風格,包括:theme_bw(), theme_classic(), theme_dark(), theme_minimal(), theme_grey(), theme_linedraw()。

#7、1952到2007各洲國家的變化
gapminder %>% 
  filter(year %in% c(1952, 2007)) %>% 
  ggplot(aes(x = gdpPercap, y = lifeExp, color = continent, size=pop)) +
  geom_point() +
  facet_grid(continent ~ year)

filter(gapminder, year %in% c(1952, 2007)) %>%
  ggplot(aes(gdpPercap, lifeExp, col = continent)) +
  geom_point() +
  facet_grid(continent ~ year)

#練習1、查看各洲的人均gdp分佈。
gapminder %>%
  filter(year == 2007) %>% 
  ggplot()+
  geom_boxplot(aes(x=gdpPercap, y=lifeExp, fill=continent))

#練習2、2007年最長壽的前10個國家
gapminder %>%
  filter(year==2007) %>% 
  arrange(desc(lifeExp)) %>% 
  slice(1:10) %>% 
  ggplot(aes(x=reorder(country,lifeExp), y=lifeExp))+
  geom_col()+
  coord_flip()

#練習3、查看2007年各洲的人口數與人均GDP的關係
gapminder %>%
  filter(year == 2007) %>% 
  group_by(continent, year) %>%
  summarize(medianpop = median(pop), medianGdpPercap = median(gdpPercap), sumpop=sum(pop)) %>% 
  ggplot()+
  geom_text(aes(x=medianGdpPercap, y=medianpop, label=continent, size=sumpop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.

#練習4、自行比較兩個國家的人均 GDP 與年份之變化趨勢
gapminder %>%
  filter(country %in% c("Brazil", "China")) %>%
  ggplot(aes(x = year, 
             y = gdpPercap, 
             group = country,
             color= country)) +
  geom_line()