The Dataset
country_code = c('AUS', 'AUT', 'BEL', 'BGR', 'BLR', 'CAN', 'CHE', 'CHL',
'CZE', 'DEU', 'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR',
'GRC', 'HKG', 'HRV', 'HUN', 'IRL', 'ISL', 'ISR', 'ITA',
'JPN', 'KOR', 'LTU', 'LUX', 'LVA', 'NLD', 'NOR', 'NZL',
'POL', 'PRT', 'RUS', 'SVK', 'SVN', 'SWE', 'TWN', 'UKR',
'USA')
# Scrape Female Data
life_table = NULL
for (i in 1:length(country_code)) {
link = paste('https://raw.githubusercontent.com/QuanNguyenIU/Human-Mortality/main/',
country_code[i], '_LT_F.txt', sep = '')
df = read.table(link, header = TRUE) %>% select (year = Year, age = Age, qx, lx, dx)
df[df == '110+'] = '110'
df$gender = 'F'
df$country = country_code[i]
if (i == 1) {
life_table = df
} else {
life_table = rbind(life_table, df)
}
}
# Scrape Male Data
for (i in 1:length(country_code)) {
link = paste('https://raw.githubusercontent.com/QuanNguyenIU/Human-Mortality/main/',
country_code[i], '_LT_M.txt', sep = '')
df = read.table(link, header = TRUE) %>% select (year = Year, age = Age, qx, lx, dx)
df[df == '110+'] = '110'
df$gender = 'M'
df$country = country_code[i]
life_table = rbind(life_table, df)
}
# Preprocessing
life_table = transform(life_table, age = as.numeric(age), qx = as.numeric(qx),
lx = as.numeric(lx), dx = as.numeric(dx)) %>% drop_na()
life_table$px = 1 - life_table$qx
# Overview
glimpse(life_table)
## Rows: 820,290
## Columns: 8
## $ year <int> 1921, 1921, 1921, 1921, 1921, 1921, 1921, 1921, 1921, 1921, 19~
## $ age <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ~
## $ qx <dbl> 0.05750, 0.01199, 0.00576, 0.00288, 0.00325, 0.00251, 0.00248,~
## $ lx <dbl> 100000, 94250, 93120, 92583, 92316, 92016, 91785, 91557, 91391~
## $ dx <dbl> 5750, 1130, 537, 267, 300, 231, 228, 166, 126, 125, 114, 105, ~
## $ gender <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F~
## $ country <chr> "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS",~
## $ px <dbl> 0.94250, 0.98801, 0.99424, 0.99712, 0.99675, 0.99749, 0.99752,~
Finland, the Happiest Country
FIN_1999 = life_table %>% filter(country == 'FIN', year == '1999')
ggplot(FIN_1999, aes(x = age, y = qx, color = gender)) + geom_line() +
labs(x = "Age", y = "Mortality Rate", title = "Mortality Rates, Finland 1999")

ggplot(FIN_1999, aes(x = age, y = log(qx), color = gender)) + geom_line() +
labs(x = "Age", y = "Mortality Rate",
title = "Log Mortality Rates, Finland 1999")

kpx = cumprod((FIN_1999 %>% filter(age %in% (18:100), gender == 'F'))$px)
FIN_1999_sp = data.frame(x = 1:length(kpx), kpx, le = rev(cumsum(rev(kpx))),
gender = 'F')
kpx = cumprod((FIN_1999 %>% filter(age %in% (18:100), gender == 'M'))$px)
FIN_1999_sp = rbind(FIN_1999_sp, data.frame(x = 1:length(kpx), kpx,
le = rev(cumsum(rev(kpx))),
gender = 'M'))
ggplot(FIN_1999_sp, aes(x, y = kpx, color = gender)) + geom_line() +
labs(x = "Minimum Survival Years", y = "Probability",
title = "Survival Probabilities for 18-Year-Old People, Finland 1999")

ggplot(FIN_1999_sp, aes(x, y = le, color = gender)) + geom_line() +
labs(x = "Age", y = "Future Lifetime",
title = "Future Lifetime by Age, Finland 1999")

FIN = life_table %>% filter(country == 'FIN')
ggplot(FIN, aes(x = age, y = qx, color = year)) + geom_line(aes(group = year)) +
facet_wrap(~gender) + labs(x ="Age", y = "Mortality Rates",
title = "Historical Mortality Rates, Finland")

FIN_le = FIN %>% group_by(gender, year) %>%
mutate(kpx = cumprod(px), le = sum(kpx)) %>% filter(age==0) %>%
select(country, year, gender, le)
ggplot(FIN_le, aes(x = year, y = le, color = gender)) + geom_line() +
labs(x = "Year", y = "Life Expectancy",
title = "Historical Life Expectancy, Finland")

Global Observation
all_le = life_table %>% group_by(year, country, gender) %>%
mutate(kpx = cumprod(px), le = sum(kpx)) %>%
filter(age == 0,year >= min(FIN_le$year)) %>% ungroup() %>%
group_by(year,gender) %>% summarize(le = mean(le)) %>%
mutate(country = "all")
ggplot(rbind(FIN_le, all_le),
aes(x = year, y = le, color = gender, linetype = country)) +
geom_line() + labs(x = "Year", y = "Life Expectancy",
title = "Historical Life Expectancy, Finland versus Global")

all_le = life_table %>% group_by(year, country, gender) %>%
mutate(kpx = cumprod(px), le = sum(kpx)) %>% filter(age==0) %>% ungroup()
ggplot(all_le, aes(x = year, y = le, color = country)) +
geom_point(size = 0.5) + facet_wrap(~gender) +
labs(x = "Year", y = "Life Expectancy",
title = "Historical Life Expectancy By Countries")

empty = element_blank()
ggplot(all_le %>% filter(year %in% (1900:1960)),
aes(x = year, y = le, color = gender)) + geom_line() +
facet_wrap(~country) + theme(axis.ticks.x = empty, axis.text.x = empty,
axis.ticks.y = empty, axis.text.y = empty) +
labs(x = "Year", y = "Life Expectancy",
title = "Historical Life Expectancy By Countries, 1900 - 1960")

countries = c('AUS', 'BEL', 'CAN', 'CHE', 'DNK',
'ESP', 'FIN', 'FRA', 'GBR', 'ISL',
'ITA', 'NLD', 'NOR', 'SWE', 'USA')
ggplot(all_le %>% filter(year %in% (1900:1960), country %in% countries),
aes(x = year, y = le, color = gender)) + geom_line() +
facet_wrap(~country) + theme(axis.ticks.x = empty, axis.text.x = empty) +
labs(x = "Year", y = "Life Expectancy",
title = "Historical Life Expectancy By Selected Countries, 1900 - 1960")
