HMD

Harold Nelson

3/14/2022

Human Mortality Database

This is an initial exploration of the Human Mortality Database, which is at https://www.mortality.org/.

Setup

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(plotly)

## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout

USA Male Data

Load the data for USA males into the dataframe USAM. Add a variable country and set it to “USA”. Change the name of qx to qxm Select country, Year, Age and qxm.

Make Age numeric.

Eliminate any missing data.

Solution

USAM <- read_table("/Users/haroldnelson/Dropbox/HMD/lt_male/mltper_1x1/USA.mltper_1x1.txt", skip = 2) %>% 
mutate(country = "USA") %>% 
rename(qxm = qx) %>% 
select(country, Year, Age, qxm) %>% 
mutate(Age = as.numeric(Age)) %>% 
drop_na()

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qx = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Age = as.numeric(Age)`.
## Caused by warning:
## ! NAs introduced by coercion

Male Infant Mortality

g = USAM %>% 
  filter(Age == 0) %>% 
  ggplot(aes(x = Year, y = qxm)) +
  geom_point()

ggplotly(g)

Male Aged Probability of Death

g = USAM %>% 
  filter(Age == 75) %>% 
  ggplot(aes(x = Year, y = qxm)) +
  geom_point()

ggplotly(g)

USA Female Data

Load the data for USA females into the dataframe USAF. Add a variable country and set it to “USA”. Change the name of qx to qxf Select country, Year, Age and qxf.

Make Age numeric.

Eliminate any missing data.

Solution

USAF <- read_table("/Users/haroldnelson/Dropbox/HMD/lt_female/fltper_1x1/USA.fltper_1x1.txt", skip = 2) %>% 
mutate(country = "USA") %>% 
rename(qxf = qx) %>% 
select(country, Year, Age, qxf) %>% 
mutate(Age = as.numeric(Age)) %>% 
drop_na()

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qx = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Age = as.numeric(Age)`.
## Caused by warning:
## ! NAs introduced by coercion

Join USAM and USAF into USA

USA = USAM %>% 
  inner_join(USAF)

## Joining with `by = join_by(country, Year, Age)`

head(USA)

## # A tibble: 6 × 5
##   country  Year   Age     qxm     qxf
##   <chr>   <dbl> <dbl>   <dbl>   <dbl>
## 1 USA      1933     0 0.0649  0.0521 
## 2 USA      1933     1 0.00999 0.00883
## 3 USA      1933     2 0.00466 0.00402
## 4 USA      1933     3 0.00333 0.00287
## 5 USA      1933     4 0.00253 0.00223
## 6 USA      1933     5 0.00209 0.00185

Look at Ratio

g= USA %>% 
  filter(Age == 21) %>% 
  mutate(Ratio = qxm/qxf) %>% 
  ggplot(aes(x = Year, y = Ratio)) +
  geom_point()

ggplotly(g)

Male and Female Death Rates at Age 21

g = USA %>% 
  filter(Age == 21) %>% 
  ggplot((aes(x = Year))) +
  geom_point(aes(y = qxm), color = "blue") +
  geom_point(aes(y = qxf), color = "red")
ggplotly(g)

Canada

Do the same for Canada.

Solution

CANM <- read_table("/Users/haroldnelson/Dropbox/HMD/lt_male/mltper_1x1/CAN.mltper_1x1.txt", skip = 2) %>% 
  mutate(country = "Canada") %>% 
  rename(qxm = qx) %>% 
  select(country, Year, Age, qxm) %>% 
  mutate(Age = as.numeric(Age)) %>% 
  drop_na()

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qx = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Age = as.numeric(Age)`.
## Caused by warning:
## ! NAs introduced by coercion

CANF <- read_table("/Users/haroldnelson/Dropbox/HMD/lt_female/fltper_1x1/CAN.fltper_1x1.txt", skip = 2) %>% 
  mutate(country = "Canada") %>% 
  rename(qxf = qx) %>% 
  select(country, Year, Age, qxf) %>% 
  mutate(Age = as.numeric(Age)) %>% 
  drop_na()

## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qx = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Age = as.numeric(Age)`.
## Caused by warning:
## ! NAs introduced by coercion

Join CANM and CANF into CAN

CAN = CANM %>% 
  inner_join(CANF)

## Joining with `by = join_by(country, Year, Age)`

head(CAN)

## # A tibble: 6 × 5
##   country  Year   Age     qxm     qxf
##   <chr>   <dbl> <dbl>   <dbl>   <dbl>
## 1 Canada   1921     0 0.126   0.0987 
## 2 Canada   1921     1 0.0176  0.0155 
## 3 Canada   1921     2 0.00848 0.00738
## 4 Canada   1921     3 0.00609 0.00543
## 5 Canada   1921     4 0.00473 0.00455
## 6 Canada   1921     5 0.00382 0.00343

Combine

Combine the two dataframes into USA_CAN using rbind().

Solution

USA_CAN = rbind(USA, CAN)
str(USA_CAN)

## tibble [21,010 × 5] (S3: tbl_df/tbl/data.frame)
##  $ country: chr [1:21010] "USA" "USA" "USA" "USA" ...
##  $ Year   : num [1:21010] 1933 1933 1933 1933 1933 ...
##  $ Age    : num [1:21010] 0 1 2 3 4 5 6 7 8 9 ...
##  $ qxm    : num [1:21010] 0.06486 0.00999 0.00466 0.00333 0.00253 ...
##  $ qxf    : num [1:21010] 0.05208 0.00883 0.00402 0.00287 0.00223 ...

Infant Mortality USA and Canada

Create this graph beginning in 1940.

Solution

USA_CAN %>% 
  filter(Age == 0 & Year > 1940) %>% 
  ggplot(aes(x = Year, y = qxm, color = country)) +
  geom_point() +
  ggtitle("Male Infant Mortality - USA and Canada")

USA/Canada 2

Create a graph comparing USA and Canadian mortality at age 79.

Solution

USA_CAN %>% 
  filter(Age == 79 & Year > 1940) %>% 
  ggplot(aes(x = Year, y = qxm, color = country)) +
  geom_point() +
  ggtitle("Age 75 Male Mortality - USA and Canada")

Questions

Imagine the questions we could answer with the HMD.

Do other countries have the same pattern of excess male deaths?

Has the USA always had this pattern?

What should we look at for Friday?

Male and Female Death Rates at Age 21

g = USA_CAN %>% 
  filter(Age == 21) %>% 
  ggplot((aes(x = Year))) +
  geom_point(aes(y = qxm), color = "blue") +
  geom_point(aes(y = qxf), color = "red") +
  facet_wrap(~country)
ggplotly(g)

Ratio at Age 21 USA and Canada

g = USA_CAN %>% 
  filter(Age ==21) %>% 
  mutate(Ratio =qxm/qxf) %>% 
  ggplot(aes(x = Year, y = Ratio, color = country)) +
  geom_point() 

ggplotly(g)