Human Mortality Database - Life Expectancy

Instructor: Dr. Le Nhat Tan

1 The Dataset

country_code = c('AUS', 'AUT', 'BEL', 'BGR', 'BLR', 'CAN', 'CHE', 'CHL', 
                 'CZE', 'DEU', 'DNK', 'ESP', 'EST', 'FIN', 'FRA', 'GBR',
                 'GRC', 'HKG', 'HRV', 'HUN', 'IRL', 'ISL', 'ISR', 'ITA',
                 'JPN', 'KOR', 'LTU', 'LUX', 'LVA', 'NLD', 'NOR', 'NZL',
                 'POL', 'PRT', 'RUS', 'SVK', 'SVN', 'SWE', 'TWN', 'UKR',
                 'USA')

# Scrape Female Data
life_table = NULL
for (i in 1:length(country_code)) {
  link = paste('https://raw.githubusercontent.com/QuanNguyenIU/Human-Mortality/main/', 
               country_code[i], '_LT_F.txt', sep = '')
  df = read.table(link, header = TRUE) %>% select (year = Year, age = Age, qx, lx, dx)
  df[df == '110+'] = '110'
  df$gender = 'F'
  df$country = country_code[i]
  if (i == 1) {
    life_table = df
  } else {
    life_table = rbind(life_table, df)
  }
}

# Scrape Male Data
for (i in 1:length(country_code)) {
  link = paste('https://raw.githubusercontent.com/QuanNguyenIU/Human-Mortality/main/', 
               country_code[i], '_LT_M.txt', sep = '')
  df = read.table(link, header = TRUE) %>% select (year = Year, age = Age, qx, lx, dx)
  df[df == '110+'] = '110'
  df$gender = 'M'
  df$country = country_code[i]
  life_table = rbind(life_table, df)
}

# Preprocessing
life_table = transform(life_table, age = as.numeric(age), qx = as.numeric(qx),
                       lx = as.numeric(lx), dx = as.numeric(dx)) %>% drop_na()
life_table$px = 1 - life_table$qx
# Overview
glimpse(life_table)
## Rows: 820,290
## Columns: 8
## $ year    <int> 1921, 1921, 1921, 1921, 1921, 1921, 1921, 1921, 1921, 1921, 19~
## $ age     <dbl> 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, ~
## $ qx      <dbl> 0.05750, 0.01199, 0.00576, 0.00288, 0.00325, 0.00251, 0.00248,~
## $ lx      <dbl> 100000, 94250, 93120, 92583, 92316, 92016, 91785, 91557, 91391~
## $ dx      <dbl> 5750, 1130, 537, 267, 300, 231, 228, 166, 126, 125, 114, 105, ~
## $ gender  <chr> "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F", "F~
## $ country <chr> "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS", "AUS",~
## $ px      <dbl> 0.94250, 0.98801, 0.99424, 0.99712, 0.99675, 0.99749, 0.99752,~

2 Finland, the Happiest Country

FIN_1999 = life_table %>% filter(country == 'FIN', year == '1999')
ggplot(FIN_1999, aes(x = age, y = qx, color = gender)) + geom_line() + 
  labs(x = "Age", y = "Mortality Rate", title = "Mortality Rates, Finland 1999")

ggplot(FIN_1999, aes(x = age, y = log(qx), color = gender)) + geom_line() + 
  labs(x = "Age", y = "Mortality Rate", 
       title = "Log Mortality Rates, Finland 1999")

kpx = cumprod((FIN_1999 %>% filter(age %in% (18:100), gender == 'F'))$px)
FIN_1999_sp = data.frame(x = 1:length(kpx), kpx, le = rev(cumsum(rev(kpx))),
                         gender = 'F')
kpx = cumprod((FIN_1999 %>% filter(age %in% (18:100), gender == 'M'))$px)
FIN_1999_sp = rbind(FIN_1999_sp, data.frame(x = 1:length(kpx), kpx,
                                            le = rev(cumsum(rev(kpx))),
                                            gender = 'M'))
ggplot(FIN_1999_sp, aes(x, y = kpx, color = gender)) + geom_line() + 
  labs(x = "Minimum Survival Years", y = "Probability", 
       title = "Survival Probabilities for 18-Year-Old People, Finland 1999")

ggplot(FIN_1999_sp, aes(x, y = le, color = gender)) + geom_line() + 
  labs(x = "Age", y = "Future Lifetime", 
       title = "Future Lifetime by Age, Finland 1999")

FIN = life_table %>% filter(country == 'FIN')
ggplot(FIN, aes(x = age, y = qx, color = year)) + geom_line(aes(group = year)) + 
  facet_wrap(~gender) + labs(x ="Age", y = "Mortality Rates", 
                             title = "Historical Mortality Rates, Finland")

FIN_le = FIN %>% group_by(gender, year) %>% 
  mutate(kpx = cumprod(px), le = sum(kpx)) %>% filter(age==0) %>% 
  select(country, year, gender, le)
ggplot(FIN_le, aes(x = year, y = le, color = gender)) + geom_line() +
  labs(x = "Year", y = "Life Expectancy", 
       title = "Historical Life Expectancy, Finland")

3 Global Observation

all_le = life_table %>% group_by(year, country, gender) %>% 
  mutate(kpx = cumprod(px), le = sum(kpx)) %>% 
  filter(age == 0,year >= min(FIN_le$year)) %>% ungroup() %>% 
  group_by(year,gender) %>%  summarize(le = mean(le)) %>% 
  mutate(country = "all")
ggplot(rbind(FIN_le, all_le), 
       aes(x = year, y = le, color = gender, linetype = country)) + 
  geom_line() + labs(x = "Year", y = "Life Expectancy",
                     title = "Historical Life Expectancy, Finland versus Global")

all_le = life_table %>% group_by(year, country, gender) %>% 
  mutate(kpx = cumprod(px), le = sum(kpx)) %>% filter(age==0) %>% ungroup()
ggplot(all_le, aes(x = year, y = le, color = country)) + 
  geom_point(size = 0.5) + facet_wrap(~gender) + 
  labs(x = "Year", y = "Life Expectancy",
       title = "Historical Life Expectancy By Countries")

empty = element_blank()
ggplot(all_le %>% filter(year %in% (1900:1960)), 
       aes(x = year, y = le, color = gender)) + geom_line() +
  facet_wrap(~country) + theme(axis.ticks.x = empty, axis.text.x = empty,
                               axis.ticks.y = empty, axis.text.y = empty) +
  labs(x = "Year", y = "Life Expectancy", 
       title = "Historical Life Expectancy By Countries, 1900 - 1960")

countries = c('AUS', 'BEL', 'CAN', 'CHE', 'DNK', 
              'ESP', 'FIN', 'FRA', 'GBR', 'ISL', 
              'ITA', 'NLD', 'NOR', 'SWE', 'USA')
ggplot(all_le %>% filter(year %in% (1900:1960), country %in% countries), 
       aes(x = year, y = le, color = gender)) + geom_line() +
  facet_wrap(~country) + theme(axis.ticks.x = empty, axis.text.x = empty) +
  labs(x = "Year", y = "Life Expectancy", 
       title = "Historical Life Expectancy By Selected Countries, 1900 - 1960")