#0520_gapminder
#Gapminder is an independent educational non-profit fighting global misconceptions.
#Hans Rosling在TED談全球健康發展
#https://www.ted.com/talks/hans_rosling_the_best_stats_you_ve_ever_seen?language=zh-tw
#欄位簡介
# "country" 國家
# "continent" 洲
# "year"年
# "lifeExp"預期壽命
# "pop" 人口
# "gdpPercap"人均GDP
#一、先安裝下列套件
#install.packages("gapminder")
#install.packages("gridExtra")
#install.packages("ggplot2")
#install.packages("tidyverse")
#二、呼叫gapminder套件
library(gapminder)
#gapminder套件的內容就是一個名為gapminder的資料檔,以下是查看資料的方式
#將gapminder的資料檔存入在一個名為gapminder的變數(物件中)
gapminder <- gapminder
#查看資料規模
dim(gapminder)
## [1] 1704 6
#查看資料摘要
summary(gapminder)
## country continent year lifeExp
## Afghanistan: 12 Africa :624 Min. :1952 Min. :23.60
## Albania : 12 Americas:300 1st Qu.:1966 1st Qu.:48.20
## Algeria : 12 Asia :396 Median :1980 Median :60.71
## Angola : 12 Europe :360 Mean :1980 Mean :59.47
## Argentina : 12 Oceania : 24 3rd Qu.:1993 3rd Qu.:70.85
## Australia : 12 Max. :2007 Max. :82.60
## (Other) :1632
## pop gdpPercap
## Min. :6.001e+04 Min. : 241.2
## 1st Qu.:2.794e+06 1st Qu.: 1202.1
## Median :7.024e+06 Median : 3531.8
## Mean :2.960e+07 Mean : 7215.3
## 3rd Qu.:1.959e+07 3rd Qu.: 9325.5
## Max. :1.319e+09 Max. :113523.1
##
#查看前幾筆資料
head(gapminder)
## # A tibble: 6 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 1952 28.8 8425333 779.
## 2 Afghanistan Asia 1957 30.3 9240934 821.
## 3 Afghanistan Asia 1962 32.0 10267083 853.
## 4 Afghanistan Asia 1967 34.0 11537966 836.
## 5 Afghanistan Asia 1972 36.1 13079460 740.
## 6 Afghanistan Asia 1977 38.4 14880372 786.
#查看欄位名稱
colnames(gapminder)
## [1] "country" "continent" "year" "lifeExp" "pop" "gdpPercap"
#查詢資料包含的所有國家
unique(gapminder$country)
## [1] Afghanistan Albania Algeria
## [4] Angola Argentina Australia
## [7] Austria Bahrain Bangladesh
## [10] Belgium Benin Bolivia
## [13] Bosnia and Herzegovina Botswana Brazil
## [16] Bulgaria Burkina Faso Burundi
## [19] Cambodia Cameroon Canada
## [22] Central African Republic Chad Chile
## [25] China Colombia Comoros
## [28] Congo, Dem. Rep. Congo, Rep. Costa Rica
## [31] Cote d'Ivoire Croatia Cuba
## [34] Czech Republic Denmark Djibouti
## [37] Dominican Republic Ecuador Egypt
## [40] El Salvador Equatorial Guinea Eritrea
## [43] Ethiopia Finland France
## [46] Gabon Gambia Germany
## [49] Ghana Greece Guatemala
## [52] Guinea Guinea-Bissau Haiti
## [55] Honduras Hong Kong, China Hungary
## [58] Iceland India Indonesia
## [61] Iran Iraq Ireland
## [64] Israel Italy Jamaica
## [67] Japan Jordan Kenya
## [70] Korea, Dem. Rep. Korea, Rep. Kuwait
## [73] Lebanon Lesotho Liberia
## [76] Libya Madagascar Malawi
## [79] Malaysia Mali Mauritania
## [82] Mauritius Mexico Mongolia
## [85] Montenegro Morocco Mozambique
## [88] Myanmar Namibia Nepal
## [91] Netherlands New Zealand Nicaragua
## [94] Niger Nigeria Norway
## [97] Oman Pakistan Panama
## [100] Paraguay Peru Philippines
## [103] Poland Portugal Puerto Rico
## [106] Reunion Romania Rwanda
## [109] Sao Tome and Principe Saudi Arabia Senegal
## [112] Serbia Sierra Leone Singapore
## [115] Slovak Republic Slovenia Somalia
## [118] South Africa Spain Sri Lanka
## [121] Sudan Swaziland Sweden
## [124] Switzerland Syria Taiwan
## [127] Tanzania Thailand Togo
## [130] Trinidad and Tobago Tunisia Turkey
## [133] Uganda United Kingdom United States
## [136] Uruguay Venezuela Vietnam
## [139] West Bank and Gaza Yemen, Rep. Zambia
## [142] Zimbabwe
## 142 Levels: Afghanistan Albania Algeria Angola Argentina Australia ... Zimbabwe
#查詢資料包含的各個年份
unique(gapminder$year)
## [1] 1952 1957 1962 1967 1972 1977 1982 1987 1992 1997 2002 2007
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.3 ✓ purrr 0.3.4
## ✓ tibble 3.0.6 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
#1、各洲的壽命分佈(以2007年為例)
gapminder %>%
filter(year == 2007) %>%
ggplot()+
geom_boxplot(aes(x=continent, y=lifeExp, fill=continent))

#2、查看大洋洲包含那些國家
gapminder %>%
filter(year == 2007) %>%
filter(continent == "Oceania")
## # A tibble: 2 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Australia Oceania 2007 81.2 20434176 34435.
## 2 New Zealand Oceania 2007 80.2 4115771 25185.
#3、1952年最長壽的前10個國家
gapminder %>%
filter(year==1952) %>%
arrange(desc(lifeExp)) %>%
slice(1:10) %>%
ggplot(aes(x=reorder(country,lifeExp), y=lifeExp))+
geom_col()+
coord_flip()

#4、查看2007年各洲的壽命與人均GDP的關係
gapminder %>%
filter(year == 2007) %>%
group_by(continent, year) %>%
summarize(medianLifeExp = median(lifeExp), #壽命的中位數
medianGdpPercap = median(gdpPercap), #人均GDP的中位數
sumpop=sum(pop)) %>% #人均GDP
ggplot()+
geom_text(aes(x=medianGdpPercap, y=medianLifeExp, label=continent, size=sumpop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.

#5、探索台灣與大陸的人均 GDP 與年份之變化趨勢
gapminder %>%
filter(country %in% c("China", "Taiwan")) %>%
ggplot(aes(x = year,
y = gdpPercap,
group = country,
color= country)) +
geom_line()

#6、比較1952到2007世界各國的發展情形
#過濾出1952年的資料,存入在名為gapminder_1952的變數中
gapminder_1952 <- gapminder %>%
filter(year == 1952)
#用gapminder_1952畫散佈圖,並存在一個名為g1的變數
g1 <- ggplot(gapminder_1952,
aes(x = pop, #指定X軸為pop
y = lifeExp, #指定y軸為continent
color = continent, #顏色以continent為區分
size = gdpPercap, #大小以gdpPercap為區分
shape = continent)) + #形狀以continent為區分
geom_point() +
scale_x_log10() +
theme_minimal() #常用的幾種風格,包括:theme_bw(), theme_classic(), theme_dark(), theme_minimal(), theme_grey(), theme_linedraw()。
#叫出物件g1即顯示圖表
g1

#過濾出2007年的資料,存入在名為gapminder_2007的變數中
gapminder_2007 <- gapminder %>%
filter(year == 2007)
#用gapminder_1952畫散佈圖,並存在一個名為g1的變數
g2 <- ggplot(gapminder_2007,
aes(x = pop,
y = lifeExp,
color = continent,
size = gdpPercap,
shape = continent)) +
geom_point() +
scale_x_log10() +
theme_classic() #常用的幾種風格,包括:theme_bw(), theme_classic(), theme_dark(), theme_minimal(), theme_grey(), theme_linedraw()。
#叫出物件g2即顯示圖表
g2

# 繪畫多個圖形於一個畫布上,同時比較兩個年份的差異
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
grid.arrange(g1, g2, nrow = 1, ncol = 2)

#7、比較1952到2007各洲國家的變化...facet_grid()
gapminder %>%
filter(year %in% c(1952, 2007)) %>%
ggplot(aes(x = gdpPercap, y = lifeExp, color = continent, size=pop)) +
geom_point() +
facet_grid(continent ~ year)

# 練習1、查看各洲的人均gdp分佈。
gapminder %>%
filter(year == 2007) %>%
ggplot()+
geom_boxplot(aes(x=continent, y=gdpPercap, fill=continent))

# 練習2、2007年最長壽的前10個國家
gapminder %>%
filter(year==2007) %>%
arrange(desc(lifeExp)) %>%
slice(1:10) %>%
ggplot(aes(x=reorder(country,lifeExp), y=lifeExp))+
geom_col()+
coord_flip()

# 練習3、查看2007年各洲的人口數與人均GDP的關係
gapminder %>%
filter(year == 2007) %>%
group_by(continent, year) %>%
summarize(medianpop = median(pop), #人口的中位數
medianGdpPercap = median(gdpPercap), #人均GDP的中位數
sumpop=sum(pop)) %>% #人均GDP
ggplot()+
geom_text(aes(x=medianGdpPercap, y=medianpop, label=continent, size=sumpop))
## `summarise()` has grouped output by 'continent'. You can override using the `.groups` argument.

# 練習4、自行比較兩個國家的人均 GDP 與年份之變化趨勢
gapminder %>%
filter(country %in% c("Oman", "Spain")) %>%
ggplot(aes(x = year,
y = gdpPercap,
group = country,
color= country)) +
geom_line()

# 練習5、自由練習一,查看亞洲包含那些國家
gapminder %>%
filter(year == 2007) %>%
filter(continent == "Asia")
## # A tibble: 33 x 6
## country continent year lifeExp pop gdpPercap
## <fct> <fct> <int> <dbl> <int> <dbl>
## 1 Afghanistan Asia 2007 43.8 31889923 975.
## 2 Bahrain Asia 2007 75.6 708573 29796.
## 3 Bangladesh Asia 2007 64.1 150448339 1391.
## 4 Cambodia Asia 2007 59.7 14131858 1714.
## 5 China Asia 2007 73.0 1318683096 4959.
## 6 Hong Kong, China Asia 2007 82.2 6980412 39725.
## 7 India Asia 2007 64.7 1110396331 2452.
## 8 Indonesia Asia 2007 70.6 223547000 3541.
## 9 Iran Asia 2007 71.0 69453570 11606.
## 10 Iraq Asia 2007 59.5 27499638 4471.
## # … with 23 more rows
# 練習6、自由練習二,#5、探索日本與台灣的人均 GDP 與年份之變化趨勢
gapminder %>%
filter(country %in% c("Japan", "Taiwan")) %>%
ggplot(aes(x = year,
y = gdpPercap,
group = country,
color= country)) +
geom_line()
