Setup

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Problem 1

Prepare your analysis dataframe using data extracted from the Human Mortality Database. Begin by selecting another country(not Canada), which you will compare with the USA. When you are done, your dataframe should have the following variables: country, Year, Age, qxm, and qxf.

Do an str() on the dataframe and create a table of the values in the variable country. There should be two values in country, USA and whatever other country you select.

#Iceland Males
islm <- read_table("ISL.mltper_1x1.txt", skip = 2) %>% 
mutate(country = "Iceland") %>% 
select(country, Year, Age, qxm) %>% 
mutate(Age = as.numeric(Age)) %>% 
drop_na()
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qxm = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#Iceland Females
islf <- read_table("ISL.fltper_1x1.txt", skip = 2) %>% 
mutate(country = "Iceland") %>% 
select(country, Year, Age, qxf) %>% 
mutate(Age = as.numeric(Age)) %>% 
drop_na()
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qxf = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#Joining Iceland M & F
isl = islm %>% 
  inner_join(islf)
## Joining, by = c("country", "Year", "Age")
head(isl)
## # A tibble: 6 x 5
##   country  Year   Age    qxm    qxf
##   <chr>   <dbl> <dbl>  <dbl>  <dbl>
## 1 Iceland  1838     0 0.321  0.294 
## 2 Iceland  1838     1 0.0442 0.0429
## 3 Iceland  1838     2 0.0286 0.0279
## 4 Iceland  1838     3 0.0231 0.0199
## 5 Iceland  1838     4 0.0184 0.0144
## 6 Iceland  1838     5 0.0128 0.0118
#USA Males
USAM <- read_table("USA.mltper_1x1.txt", skip = 2) %>% 
mutate(country = "USA") %>% 
select(country, Year, Age, qxm) %>% 
mutate(Age = as.numeric(Age)) %>% 
drop_na()
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qxm = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#USA Females
USAF <- read_table("USA.fltper_1x1.txt", skip = 2) %>% 
mutate(country = "USA") %>% 
select(country, Year, Age, qxf) %>% 
mutate(Age = as.numeric(Age)) %>% 
drop_na()
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Year = col_double(),
##   Age = col_character(),
##   mx = col_double(),
##   qxf = col_double(),
##   ax = col_double(),
##   lx = col_double(),
##   dx = col_double(),
##   Lx = col_double(),
##   Tx = col_double(),
##   ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
#Joining USA M & 
USA = USAM %>% 
  inner_join(USAF)
## Joining, by = c("country", "Year", "Age")
head(USA)
## # A tibble: 6 x 5
##   country  Year   Age     qxm     qxf
##   <chr>   <dbl> <dbl>   <dbl>   <dbl>
## 1 USA      1933     0 0.0649  0.0521 
## 2 USA      1933     1 0.00999 0.00883
## 3 USA      1933     2 0.00466 0.00402
## 4 USA      1933     3 0.00333 0.00287
## 5 USA      1933     4 0.00253 0.00223
## 6 USA      1933     5 0.00209 0.00185
#Combining USA & Iceland
usa_isl = rbind(USA, isl)
str(usa_isl)
## tibble [29,480 x 5] (S3: tbl_df/tbl/data.frame)
##  $ country: chr [1:29480] "USA" "USA" "USA" "USA" ...
##  $ Year   : num [1:29480] 1933 1933 1933 1933 1933 ...
##  $ Age    : num [1:29480] 0 1 2 3 4 5 6 7 8 9 ...
##  $ qxm    : num [1:29480] 0.06486 0.00999 0.00466 0.00333 0.00253 ...
##  $ qxf    : num [1:29480] 0.05208 0.00883 0.00402 0.00287 0.00223 ...

Problem 2

Create a graph of infant mortality for both countries over as many years as you have. Be sure to comment on the results. Use plotly.

gi= usa_isl %>% 
  filter(Age == 0) %>% 
  ggplot((aes(x = Year))) +
  geom_point(aes(y = qxm), color = "blue") +
  geom_point(aes(y = qxf), color = "red") +
  facet_wrap(~country)+
  ggtitle("Infant Mortality - USA and Iceland")

ggplotly(gi)
#The infant mortality rates for both countries are very similar

Problem 3

Create a graph of mortality for your age and gender for both countries over as many years as you have. Be sure to comment on the results. Use plotly.

gm= usa_isl %>% 
  filter(Age == 37) %>% 
  ggplot(aes(x = Year, y = qxm, color = country)) +
  geom_point() +
  ggtitle("Age 37 Male Mortality - USA and Iceland")

ggplotly(gm)
#Men my age have a higher mortality rate in the US than in Iceland.

Problem 4

Create a graph of mortality at age 21 for both genders against year. Use facet_wrap() to do this for both countries side-by-side. Be sure to comment. Use plotly.

gf= usa_isl %>% 
  filter(Age == 21) %>% 
  ggplot((aes(x = Year))) +
  geom_point(aes(y = qxm), color = "blue") +
  geom_point(aes(y = qxf), color = "red") +
  facet_wrap(~country)+
  ggtitle("Age 21 Male & Female Mortality - USA and Iceland")

ggplotly(gf)
#Women age 21 have a higher mortality rate in the US than in Iceland.

Problem 5

Create a graph of the ratio qxm/qxf at age 21 for both countries in a single plot. Be sure to comment. Use plotly.

gr= usa_isl %>% 
  filter(Age ==21) %>% 
  mutate(Ratio =qxm/qxf) %>% 
  ggplot(aes(x = Year, y = Ratio, color = country))+
  geom_point() +
  ggtitle("Age 21 Male/Female Ratio Mortality - USA and Iceland")

ggplotly(gr)
#The US has a fairly steady M/F ratio of mortality, while Iceland's seems to be more sporadic.