library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Problem 1
Prepare your analysis dataframe using data extracted from the Human Mortality Database. Begin by selecting another country(not Canada), which you will compare with the USA. When you are done, your dataframe should have the following variables: country, Year, Age, qxm, and qxf.
Do an str() on the dataframe and create a table of the values in the variable country. There should be two values in country, USA and whatever other country you select.
library(tidyverse)
USAM <- read_table("C:\\linux\\CSC530 Data Analysis\\Projects\\Week 1\\USA.mltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
rename(qxm = qx) %>%
select(country, Year, Age, qxm) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USAF <- read_table("C:\\linux\\CSC530 Data Analysis\\Projects\\Week 1\\USA.fltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
rename(qxf = qx) %>%
select(country, Year, Age, qxf) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
JPNM <- read_table("C:\\linux\\CSC530 Data Analysis\\Projects\\Week 1\\JPN.mltper_1x1.txt", skip = 2) %>%
mutate(country = "Japan") %>%
rename(qxm = qx) %>%
select(country, Year, Age, qxm) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
JPNF <- read_table("C:\\linux\\CSC530 Data Analysis\\Projects\\Week 1\\USA.fltper_1x1.txt", skip = 2) %>%
mutate(country = "Japan") %>%
rename(qxf = qx) %>%
select(country, Year, Age, qxf) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USA = USAM %>%
inner_join(USAF)
## Joining, by = c("country", "Year", "Age")
JPN = JPNM %>%
inner_join(JPNF)
## Joining, by = c("country", "Year", "Age")
USA_JPN = rbind(USA,JPN)
str(USA_JPN)
## tibble [17,600 x 5] (S3: tbl_df/tbl/data.frame)
## $ country: chr [1:17600] "USA" "USA" "USA" "USA" ...
## $ Year : num [1:17600] 1933 1933 1933 1933 1933 ...
## $ Age : num [1:17600] 0 1 2 3 4 5 6 7 8 9 ...
## $ qxm : num [1:17600] 0.06486 0.00999 0.00466 0.00333 0.00253 ...
## $ qxf : num [1:17600] 0.05208 0.00883 0.00402 0.00287 0.00223 ...
Problem 2
Create a graph of infant mortality for both countries over as many years as you heve. Be sure to comment on the results. Use plotly.
USA_JPN %>%
filter(Age == 0 & Year > 1933) %>%
ggplot(aes(x = Year, y = qxm + qxf, color = country)) +
geom_point() +
ggtitle("Infant Mortality - USA and Japan")
This graph compares the combined male and female infant mortality for the USA and Japan. I used gglpot since Plotly doesn’t show up when I knit to HTML.
Create a graph of mortality for your age and gender for both countries over as many years as you have. Be sure to comment on the results. Use plotly.
USA_JPN %>%
filter(Age == 49 & Year > 1933) %>%
ggplot(aes(x = Year, y = qxm, color = country)) +
geom_point() +
ggtitle("Age 49 Male Mortality - USA and Japan")
This graph compares the male mortality at age 49 for the USA and Japan. I used gglpot since Plotly doesn’t show up when I knit to HTML.
Create a graph of mortality at age 21 for both genders against year. Use facet_wrap() to do this for both countries side-by-side. Be sure to comment. Use plotly.
USA_JPN %>%
filter(Age == 21) %>%
ggplot((aes(x = Year))) +
geom_point(aes(y = qxm), color = "blue") +
geom_point(aes(y = qxf), color = "red") +
facet_wrap(~country)
This graph compares male and female mortality at age 21 for USA and Japan since 1933. I used ggplot since plotly doesn’t show up when I knit to html
## Problem 5
Create a graph of the ratio qxm/qxf at age 21 for both countries in a single plot. Be sure to comment. Use plotly.
USA_JPN %>%
filter(Age ==21) %>%
mutate(Ratio =qxm/qxf) %>%
ggplot(aes(x = Year, y = Ratio, color = country)) +
geom_point()
This graph compares the ratio of male and female mortality at age 21 for USA and Japan since 1933. I used ggplot since plotly doesn’t show up when I knit to html.