http://apps.who.int/gho/data/view.main.LIFEWBGLOBAL?lang=en
The World Health Organization Characterizes the raw data used in this analysis as “Mortality and global health estimates”" from the Life tables by World Bank Income Group Global.
This rudimentary analysis looks at the 22 age group brackets for each of the 4 years considered in the WHO’s super-set.
A historgram of mortality and a boxplot contingencies matrix of age bracket to mortality faceted by year is shown.
l <- read.csv(
"/Users/scottkarr/IS607Spring2016/project2/more/mortality.csv",
sep=",",
na.strings = "",
blank.lines.skip = TRUE,
col.names = c("Indicator", "Age Group",
"Both Sexes", "Female","Male",
"Both Sexes", "Female","Male",
"Both Sexes", "Female","Male",
"Both Sexes", "Female","Male"
),
stringsAsFactors=FALSE,
header=FALSE
)
df = data.frame(l)
# rows 1 & 2 contain header names
names(df) <- c('Indicator', 'Age.Group',paste(df[1,3:14], df[2,3:14]))
# remove rows 1 & 2
df <- df[-c(1,2),]
names(df) <- gsub("Both sexes:", "", names(df))
#kable(head(df), align = 'l')
df <- df %>%
select(Indicator, Age.Group, ends_with("sexes"))
# return only rows with mortality range indicator nqx
df <- df %>%
filter(grepl("nqx", Indicator))
# no longer need Indicator
df <- df %>%
select(Age.Group, ends_with("sexes"))
# gather morphs data from wide to long format
df_tidy <- df %>%
gather(year, rates, -Age.Group) %>%
arrange(Age.Group)
# separate year into year and gender
df_tidy <- df_tidy %>%
separate(year, into = c("year", "gender"), sep = "\\s")
## Warning: Too many values at 88 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
## 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...
# dispense with gender as a category, we are looking at combined gender mortality
df_tidy <- df_tidy %>%
select(Age.Group, year, rates)
# make year and rates numeric
df_tidy$year <- as.numeric(df_tidy$year)
df_tidy$rates <- as.numeric(df_tidy$rates)
# make Age.Group 100+ years consistent so it sorts correctly
df_tidy[13:16,1] <- rep('100-100+',4)
# present data nicely
kable(head(df_tidy), align = 'l')
Age.Group | year | rates |
---|---|---|
<1 year | 2013 | 0.034089 |
<1 year | 2012 | 0.035079 |
<1 year | 2000 | 0.053165 |
<1 year | 1990 | 0.063140 |
1-4 years | 2013 | 0.012828 |
1-4 years | 2012 | 0.013414 |
hist(df_tidy$rates)
boxplot(
df_tidy$rates ~ df_tidy$Age.Group,
xlab="Increasing Age Group",
main="Mortality Rates 4 years"
)
bp <- ggplot(data=df_tidy, aes(x=Age.Group, y=rates)) +
geom_boxplot() +
geom_text(aes(label = Age.Group, x = Age.Group, y = rates), size=.1, vjust=1, hjust=0.5)
# Split in horizontal direction
bp + facet_grid(. ~ year)