library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
#USA Data Frame
USAM <- read_table("~/R/USA.mltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
rename(qxm = qx) %>%
select(country, Year, Age, qxm) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USAF <- read_table("~/R/USA.fltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
rename(qxf = qx) %>%
select(country, Year, Age, qxf) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
USA = USAM %>%
inner_join(USAF)
## Joining, by = c("country", "Year", "Age")
#Poland Data Frame
POLM <- read_table("~/R/POL.mltper_1x1.txt", skip = 2) %>%
mutate(country = "POL") %>%
rename(qxm = qx) %>%
select(country, Year, Age, qxm) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
POLF <- read_table("~/R/POL.fltper_1x1.txt", skip = 2) %>%
mutate(country = "POL") %>%
rename(qxf = qx) %>%
select(country, Year, Age, qxf) %>%
mutate(Age = as.numeric(Age)) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
## Warning in mask$eval_all_mutate(quo): NAs introduced by coercion
POL = POLM %>%
inner_join(POLF)
## Joining, by = c("country", "Year", "Age")
#Bind
USA_POL2 = rbind(USA, POL)
str(USA_POL2)
## tibble [16,390 x 5] (S3: tbl_df/tbl/data.frame)
## $ country: chr [1:16390] "USA" "USA" "USA" "USA" ...
## $ Year : num [1:16390] 1933 1933 1933 1933 1933 ...
## $ Age : num [1:16390] 0 1 2 3 4 5 6 7 8 9 ...
## $ qxm : num [1:16390] 0.06486 0.00999 0.00466 0.00333 0.00253 ...
## $ qxf : num [1:16390] 0.05208 0.00883 0.00402 0.00287 0.00223 ...
#Problem 1
Prepare your analysis dataframe using data extracted from the Human Mortality Database. Begin by selecting another country(not Canada), which you will compare with the USA. When you are done, your dataframe should have the following variables: country, Year, Age, qxm, and qxf.
Do an str() on the dataframe and create a table of the values in the variable country. There should be two values in country, USA and whatever other country you select.
library(tidyverse)
library(plotly)
library(readr)
POL_mltper_1x1 <- read_csv("~/R/POL.mltper_1x1.txt")
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 6883 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): Poland
## lgl (2): Life tables (period 1x1), Males Last modified: 13 Apr 2021; Method...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(POL_mltper_1x1)
library(readr)
USA_mltper_1x1 <- read_csv("~/R/USA.mltper_1x1.txt")
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 9658 Columns: 3
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (1): The United States of America
## lgl (2): Life tables (period 1x1), Males Last modified: 17 Mar 2021; Method...
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(USA_mltper_1x1)
USA <- read_table("~/R/USA.mltper_1x1.txt", skip = 2) %>%
mutate(country = "USA") %>%
select(country, Year, Age, qx) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
POL <- read_table("~/R/POL.mltper_1x1.txt", skip = 2) %>%
mutate(country = "POL") %>%
select(country, Year, Age, qx) %>%
drop_na()
##
## -- Column specification --------------------------------------------------------
## cols(
## Year = col_double(),
## Age = col_character(),
## mx = col_double(),
## qx = col_double(),
## ax = col_double(),
## lx = col_double(),
## dx = col_double(),
## Lx = col_double(),
## Tx = col_double(),
## ex = col_double()
## )
USA_POL = rbind(USA,POL)
#Problem 1 str
str(USA_POL)
## tibble [16,539 x 4] (S3: tbl_df/tbl/data.frame)
## $ country: chr [1:16539] "USA" "USA" "USA" "USA" ...
## $ Year : num [1:16539] 1933 1933 1933 1933 1933 ...
## $ Age : chr [1:16539] "0" "1" "2" "3" ...
## $ qx : num [1:16539] 0.06486 0.00999 0.00466 0.00333 0.00253 ...
#Problem 2
Create a graph of infant mortality for both countries over as many years as you heve. Be sure to comment on the results. Use plotly.
USA_POL %>%
filter(Age == 0) %>%
ggplot(aes(x = Year, y = qx, color = country)) +
geom_point() +
ggtitle("Male Infant Mortality - USA and Poland")
#Comment: Both countries have the same negative linear graph to infant mortality rates. It seems that both countries have declined greatly in a similar rate around the 2000's. However, beforehadn the US had high mortality rates for infants in the 1950's and 1965 for Poland respectively.
Create a graph of mortality for your age and gender for both countries over as many years as you have. Be sure to comment on the results. Use plotly.
USA_POL2 %>%
filter(Age == 20) %>%
ggplot((aes(x = Year))) +
geom_point(aes(y=qxf), color = "purple") +
geom_point(aes(y=qxf), color = "blue") +
facet_wrap(~country)
ggtitle("Age 20 Female Mortality - USA and Poland")
## $title
## [1] "Age 20 Female Mortality - USA and Poland"
##
## attr(,"class")
## [1] "labels"
#Results, USA had a higher mortality rate for females in the age of 20 around the 1940's in comaprison to Poland's mortality rate for the same demographics.
Create a graph of mortality at age 21 for both genders against year. Use facet_wrap() to do this for both countries side-by-side. Be sure to comment. Use plotly.
g = USA_POL2 %>%
filter(Age == 21) %>%
ggplot((aes(x = Year))) +
geom_point(aes(y = qxm), color = "blue", size =.7) +
geom_point(aes(y = qxf), color = "red", size=.7) +
facet_wrap(~country)
ggplotly(g)
#Comment: USA has a very high rate of mortality rates compared to Poland, espicially for both females and males during the 1950's. I am starting to see a trend in the 1950's to have a high death rates, I wonder what occured then. While Poland has had a steady decline since late 1950's.
Create a graph of the ratio qxm/qxf at age 21 for both countries in a single plot. Be sure to comment. Use plotly.
c = USA_POL2 %>%
filter(Age ==21) %>%
mutate(Ratio =qxm/qxf) %>%
ggplot(aes(x = Year, y = Ratio, color = country)) +
geom_point()
ggplotly(c)
#Comment: Poland's ratio seems to be more scattered than the linear graph of the US. Both countries seem to increase.