library(tidyverse)
library(lubridate)
# f<-function(year=2020)
# {
# d<- data.frame(year,read_csv(sprintf("https://www.populationpyramid.net/api/pp/826/%s/?csv=true",year)))
# d
# }
#
# year<-1950:2050
# d<-do.call("rbind",lapply(year,f))
# d %>% separate(Age, into=c("lower", "upper")) -> d
# d %>% mutate(total=M+F) -> d
# write_csv(d, "uk_demo.csv")
d<- read_csv("uk_demo.csv")
demo<-d
theme_set(theme_bw())
The age structure of the UK population is not static from year to year. There are some consistent and some more esoteric patterns in the demographic structure. The overall increase in the numbers of people falling into the elderly age classes should be taken into account when calculating excess mortality. Data is avalailable from https://www.populationpyramid.net
f<-function(Year=1950){
d %>% filter(year==Year) %>% filter(lower >74) ->dd
tit<- sum(dd$total)/1000000
tit<- round(tit,2)
tit<- sprintf("Total population over the age of 75 = %s million in %s",tit, Year)
d %>% filter(year==Year) %>% filter(lower >10) %>%
ggplot(aes(x=as.factor(lower))) + ggtitle(tit) +
geom_col (aes(y=-M/1000000), fill="lightblue") +geom_label (aes(y=- 0.2-M/1000000,label=round(M/1000000,2))) +geom_col (aes(y=F/1000000), fill="pink") +geom_label (aes(y=0.2+F/1000000,label=round(F/1000000,2))) +
labs(title = tit,
caption = "Source https://www.populationpyramid.net/api/pp/826",
subtitle ="Numbers after 2019 are projections",
x = "Age",
y="Millions") +
coord_flip()
}
f(1950)
f(1960)
f(1970)
f(1980)
f(1990)
f(2000)
f(2010)
f(2015)
f(2020)
f(2030)
f(2040)
d %>% filter(lower > 74) %>% filter(year > 2010) %>% filter(year < 2031) %>% group_by(year) %>% summarise(over75=sum(total)/1000000) %>%
arrange(year) %>%
mutate(diff = over75 - lag(over75, default = first(over75))) ->over75
ggplot(over75,aes(x=year,y=over75, label=round(over75,1))) +geom_col() +geom_label() + labs(title = "Number of people over 75 years of age in the UK",
subtitle ="Numbers after 2019 are projections",
caption = "Source https://www.populationpyramid.net/api/pp/826",
x = "Year",
y="Number in millions") + scale_x_continuous(breaks=2010:2030) + theme(axis.text.x=element_text(angle=45, hjust=1))
# d<-read.csv("https://www.mortality.org/Public/STMF/Outputs/stmf.csv",skip=1)
#
# filter(d, Year >2009) -> d
# d %>% filter(d$CountryCode %in% c("GBRTENW","GBR_NIR", "GBR_SCO")) -> d
# d$date<-paste(d$Year,d$Week,1,sep="-")
# d$date<-as.Date(d$date, "%Y-%U-%u")
# filter(d, Sex=="b") -> both
# both[,c(1,20,5:10)] -> deaths
# pivot_longer(deaths,cols=3:8) -> deaths
# deaths$name<-gsub("D","",deaths$name)
# deaths$year<-year(deaths$date)
# deaths$week<-week(deaths$date)
# write_csv(deaths,"deaths_uk.csv")
deaths<-read_csv("deaths_uk.csv")
deaths %>% group_by(date,year,week, name) %>% summarise(sum=sum(value))->uk
uk %>% separate (name, into=c("lower","upper")) -> uk
uk$lower<-aqm::clean(uk$lower)
Mortality data is avaliable from https://www.mortality.org/Public/STMF/Outputs/stmf.csv. This has been aggregated for the UK from the numbers provided for England and Wales, Scotland and Northern Ireland.
uk %>% filter(is.na(lower)) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Total all cause mortality in the UK",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Weekly deaths",
x = "Week",
y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk %>% filter(lower==15) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "All cause mortality in the UK in the 15 to 65 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Weekly deaths",
x = "Week",
y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk %>% filter(lower==65) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +labs(title = "All cause mortality in the UK in the 65 to 75 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Weekly deaths",
x = "Week",
y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk %>% filter(lower==75) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) + labs(title = "All cause mortality in the UK in the 75 to 85 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Weekly deaths",
x = "Week",
y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk %>% filter(lower==85) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) + labs(title = "All cause mortality in the UK in the over 85 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Weekly deaths",
x = "Week",
y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk %>% group_by(year,lower) %>% arrange(week) %>% summarise(week=week,sum=cumsum(sum)) ->uk_cumulative
uk %>%
pivot_wider(id_cols=c(1,2,3),names_from=lower, values_from=sum) -> uk_wide
uk %>% group_by(date,year,week) %>% summarise(lower,percent=100*sum/max(sum)) ->uk_percent
uk_cumulative %>% filter(is.na(lower)) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Total cumulative all cause mortality in the UK",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Cumulative deaths",
x = "Week",
y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_cumulative %>% filter(lower==15) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK 15 to 65 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Cumulative deaths",
x = "Week",
y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_cumulative %>% filter(lower==65) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK 65 to 75 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Cumulative deaths",
x = "Week",
y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_cumulative %>% filter(lower==75) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK 75 to 85 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Cumulative deaths",
x = "Week",
y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_cumulative %>% filter(lower==85) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK over 85 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Cumulative deaths",
x = "Week",
y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
The percentage of the total deaths occurring in a week that fall within an age class may reveal unusual mortality events.
uk_percent %>% filter(lower==15) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Percentage of all weekly deaths occurring in the 15 to 65 year age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",
x = "Week",
y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_percent %>% filter(lower==65) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) + labs(title = "Percentage of all weekly deaths occurring in the 65 to 75 year age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",
x = "Week",
y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_percent %>% filter(lower==75) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) + labs(title = "Percentage of all weekly deaths occurring in the 75 to 85 year age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",
x = "Week",
y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
uk_percent %>% filter(lower==85) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year)) + scale_color_manual(values = c( 'red','#595959')) + labs(title = "Percentage of all weekly deaths occurring in the over 85 age group",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",
x = "Week",
y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2)) + theme(axis.text.x=element_text(angle=45, hjust=1))
In order to calculate this metric the data from the demographic pyramid has to be aggregated into age classes and combined with the mortality data using the same age classes.
uk %>% group_by(year, lower) %>% summarise(total=sum(sum)) ->uk_year
uk_year %>% filter(!is.na(lower)) -> uk_year
demo %>% filter(year %in% unique(uk_year$year)) -> demo
demo$age_class<-cut(demo$lower, c(-1,14,64,74,84,100))
demo %>% group_by(year, age_class) %>% summarise(n = sum(total)) -> demo
uk_year$age_class<-cut(uk_year$lower, c(-1,14,64,74,84,100))
uk_year %>% inner_join(demo, by=c("age_class","year")) -> combined
combined %>% mutate(percent=100*(total/n)) %>% mutate(age_class = recode(age_class,
"(-1,14]" = "0-15",
"(14,64]" = "15-65",
"(64,74]" = "65-75",
"(74,84]" = "75-85",
"(84,100]" = "Over 85")) -> combined
combined %>% filter(lower>74) %>% ggplot(aes(x=year,y=percent)) + geom_col() + facet_wrap(~age_class) + scale_x_continuous(breaks=2010:2020) + theme(axis.text.x=element_text(angle=45, hjust=1)) + labs(title = "Percentage of deaths within each age class",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="The number of deaths occurring divided by the number of people in the age class",
x = "Year",
y="Percent")
combined %>% filter(lower<74) %>% ggplot(aes(x=year,y=percent)) + geom_col() + facet_wrap(~age_class) + scale_x_continuous(breaks=2010:2020) + theme(axis.text.x=element_text(angle=45, hjust=1)) + labs(title = "Percentage of deaths within each age class",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
subtitle ="The number of deaths occurring divided by the number of people in the age class",
x = "Year",
y="Percent")
Excess deaths in 2020 can be calculated in the following manner.
combined %>% filter(year<2020) %>% group_by(age_class) %>% summarise(mean_percent=mean(percent)) %>% inner_join(combined) %>% mutate(expected=(mean_percent/100)*n) %>% mutate(excess=total-expected) -> excess
excess %>% group_by(year) %>% summarise(total=round(sum(total)),excess=round(sum(excess))) %>% arrange(-excess)%>% aqm::dt()
excess %>% group_by(year,age_class) %>% summarise(excess=round(sum(excess))) %>% pivot_wider(names_from = age_class,values_from =excess) %>% aqm::dt()
library(ggsci)
excess %>% group_by(year,age_class) %>% summarise(excess=round(sum(excess))) %>% ggplot(aes(x=year,y=excess,col=age_class)) + geom_line() +geom_point() + scale_x_continuous(breaks=2010:2020) + theme(axis.text.x=element_text(angle=45, hjust=1)) + labs(title = "Deaths compared to expected number within each age class",
caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv \n https://www.populationpyramid.net/api",
subtitle ="Calculated from demographic data using 9 year mean prior to 2020",
x = "Year",
y="Number of additional deaths ") + scale_color_uchicago()
aqm::dt(excess)