library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
## Warning: package 'plotly' was built under R version 4.1.3
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Problem 1
Prepare your analysis dataframe using data extracted from the Human Mortality Database. Begin by selecting another country(not Canada), which you will compare with the USA. When you are done, your dataframe should have the following variables: country, Year, Age, qxm, and qxf.
library(tidyverse)
library(plotly)
JPN <- read_table("C:/Users/Tedy/Downloads/JPN/JPN/STATS/mltper_1x1.txt", skip = 2) %>%
mutate(country = "JPN") %>%
select(country, Year, Age, qx) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
JPNM <- read_table("C:/Users/Tedy/Downloads/JPN/JPN/STATS/mltper_1x1.txt", skip = 2) %>%
mutate(country = "JPN") %>%
rename(qxm = qx) %>%
select(country, Year, Age, qxm) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
JPNF <- read_table("C:/Users/Tedy/Downloads/JPN/JPN/STATS/fltper_1x1.txt", skip = 2)%>%
mutate(country = "JPN") %>%
rename(qxf = qx) %>%
select(country, Year, Age, qxf) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
JPN = JPNM %>% inner_join(JPNF)
## Joining, by = c("country", "Year", "Age")
head(JPN)
## # A tibble: 6 x 5
## country Year Age qxm qxf
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 JPN 1947 0 0.0895 0.0791
## 2 JPN 1947 1 0.0364 0.0350
## 3 JPN 1947 2 0.0171 0.0171
## 4 JPN 1947 3 0.0113 0.0112
## 5 JPN 1947 4 0.00745 0.007
## 6 JPN 1947 5 0.0051 0.00482
USA <- read_table("C:/Users/Tedy/Downloads/USA/USA/STATS/mltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
select(country, Year, Age, qx) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USAM <- read_table("C:/Users/Tedy/Downloads/USA/USA/STATS/mltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
rename(qxm = qx) %>%
select(country, Year, Age, qxm) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USAF <- read_table("C:/Users/Tedy/Downloads/USA/USA/STATS/fltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
rename(qxf = qx) %>%
select(country, Year, Age, qxf) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USA = USAM %>%
inner_join(USAF)
## Joining, by = c("country", "Year", "Age")
head(USA)
## # A tibble: 6 x 5
## country Year Age qxm qxf
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 USA 1933 0 0.0649 0.0521
## 2 USA 1933 1 0.00999 0.00883
## 3 USA 1933 2 0.00466 0.00402
## 4 USA 1933 3 0.00333 0.00287
## 5 USA 1933 4 0.00253 0.00223
## 6 USA 1933 5 0.00209 0.00185
Do an str() on the dataframe and create a table of the values in the variable country. There should be two values in country, USA and whatever other country you select.
USA_JPN = rbind(USA,JPN)
str(USA_JPN)
## tibble [17,710 x 5] (S3: tbl_df/tbl/data.frame)
## $ country: chr [1:17710] "USA" "USA" "USA" "USA" ...
## $ Year : num [1:17710] 1933 1933 1933 1933 1933 ...
## $ Age : num [1:17710] 0 1 2 3 4 5 6 7 8 9 ...
## $ qxm : num [1:17710] 0.06486 0.00999 0.00466 0.00333 0.00253 ...
## $ qxf : num [1:17710] 0.05208 0.00883 0.00402 0.00287 0.00223 ...
Problem 2
Create a graph of infant mortality for both countries over as many years as you have. Be sure to comment on the results. Use plotly.
USA_JPN %>%
filter(Age == 0 & Year > 1933) %>%
ggplot(aes(x = Year, y = qxm + qxf, color = country)) +
geom_point() +
ggtitle("Infant Mortality - USA and Japan")
#The graph shows that early in the years Japan had a higher infant mortality rate than the US. It wasnt until between 1962 and 1963 when Japan crossed over USA and got a lower infant mortality rate. From the graph, we can see that Japan and USA both are decreasing in terms of infant mortality rates as the years contine, probably due to more technological medical advancements and medicinal discoveries to help save more infants. However, Japan to this year continues to have a lower infant mortality rate than the U.S.
Problem 4
Create a graph of mortality at age 21 for both genders against year. Use facet_wrap() to do this for both countries side-by-side. Be sure to comment. Use plotly.
USA_JPN %>%
filter(Age == 21 & Year > 1933) %>%
ggplot(aes(x = Year, y = qxm + qxf, color = country)) +
geom_point() +
facet_wrap(vars(country)) +
ggtitle("Age 21 Male Mortality - USA and Japan")
#The graph shows a comparison between Japan’s and USA’s mortality rate at age 21. We can see that Japan’s mortality rate was spread through out prior to the 1950s and was not seen as often as the US. However, the mortality rate of 21 year olds for both countries are recorded more often after the 1950s and is decreasing.
Problem 5
Create a graph of the ratio qxm/qxf at age 21 for both countries in a single plot. Be sure to comment. Use plotly.
USA_JPN %>%
filter(Age == 21) %>%
mutate(Ratio = qxm/qxf) %>%
ggplot(aes(x = Year, y = Ratio)) + geom_point() + facet_wrap(vars(country))
#From the graphs, we can see that the ratio between males’ and females’ mortality rate in the USA had a turning point in the 1975 where it began to decrease then. With Japan, the ratio gradually increase till at some point near the 1970s, it started to spread throughout the years. In some years, there is high ratio while in other years it is low — the years are often times not consecutive as well.