library(tidyverse)
library(lubridate)
# f<-function(year=2020)
# {
#   d<- data.frame(year,read_csv(sprintf("https://www.populationpyramid.net/api/pp/826/%s/?csv=true",year)))
#   d
# }
# 
# year<-1950:2050
# d<-do.call("rbind",lapply(year,f))
# d %>% separate(Age, into=c("lower", "upper")) -> d
# d %>% mutate(total=M+F) -> d
# write_csv(d, "uk_demo.csv")
d<- read_csv("uk_demo.csv")
demo<-d
theme_set(theme_bw())

Demographic pyramids

The age structure of the UK population is not static from year to year. There are some consistent and some more esoteric patterns in the demographic structure. The overall increase in the numbers of people falling into the elderly age classes should be taken into account when calculating excess mortality. Data is avalailable from https://www.populationpyramid.net

1950

f<-function(Year=1950){
  d %>% filter(year==Year) %>% filter(lower >74) ->dd
  tit<- sum(dd$total)/1000000
   tit<- round(tit,2)
 tit<- sprintf("Total population over the age of 75 = %s million in %s",tit, Year)
  d %>% filter(year==Year) %>% filter(lower >10) %>%
  ggplot(aes(x=as.factor(lower))) + ggtitle(tit) +
  geom_col (aes(y=-M/1000000), fill="lightblue") +geom_label (aes(y=- 0.2-M/1000000,label=round(M/1000000,2))) +geom_col (aes(y=F/1000000), fill="pink") +geom_label (aes(y=0.2+F/1000000,label=round(F/1000000,2))) +
  
  labs(title = tit, 
       caption = "Source https://www.populationpyramid.net/api/pp/826",
       subtitle ="Numbers after 2019 are projections",   
       x = "Age",
       y="Millions") +
  coord_flip()
}

f(1950)

1960

f(1960)

1970

f(1970)

1980

f(1980)

1990

f(1990)

2000

f(2000)

2010

f(2010)

2015

f(2015)

2020

f(2020)

2030

f(2030)

2040

f(2040)

Total over 75s

d %>% filter(lower > 74) %>% filter(year > 2010) %>% filter(year < 2031) %>% group_by(year) %>% summarise(over75=sum(total)/1000000) %>%
 
    arrange(year) %>%
    mutate(diff = over75 - lag(over75, default = first(over75))) ->over75

ggplot(over75,aes(x=year,y=over75, label=round(over75,1))) +geom_col() +geom_label()  + labs(title = "Number of people over 75 years of age in the UK", 
      subtitle ="Numbers after 2019 are projections",                                                                                   
       caption = "Source https://www.populationpyramid.net/api/pp/826",
       x = "Year",
       y="Number in millions") + scale_x_continuous(breaks=2010:2030)  + theme(axis.text.x=element_text(angle=45, hjust=1))

# d<-read.csv("https://www.mortality.org/Public/STMF/Outputs/stmf.csv",skip=1)
# 
# filter(d, Year >2009) -> d
# d %>% filter(d$CountryCode %in% c("GBRTENW","GBR_NIR", "GBR_SCO")) -> d
# d$date<-paste(d$Year,d$Week,1,sep="-")
# d$date<-as.Date(d$date, "%Y-%U-%u")
# filter(d, Sex=="b") -> both
# both[,c(1,20,5:10)] -> deaths
# pivot_longer(deaths,cols=3:8) -> deaths
# deaths$name<-gsub("D","",deaths$name)
# deaths$year<-year(deaths$date)
# deaths$week<-week(deaths$date)
# write_csv(deaths,"deaths_uk.csv")

deaths<-read_csv("deaths_uk.csv")
deaths %>% group_by(date,year,week, name) %>% summarise(sum=sum(value))->uk
uk %>% separate (name, into=c("lower","upper")) -> uk
uk$lower<-aqm::clean(uk$lower)

All cause mortality

Mortality data is avaliable from https://www.mortality.org/Public/STMF/Outputs/stmf.csv. This has been aggregated for the UK from the numbers provided for England and Wales, Scotland and Northern Ireland.

Total mortality

uk %>% filter(is.na(lower)) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959'))  +
  labs(title = "Total all cause mortality in the UK", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Weekly deaths",   
       x = "Week",
       y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

15 to 65

uk %>% filter(lower==15) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
  labs(title = "All cause mortality in the UK in the 15 to 65 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Weekly deaths",   
       x = "Week",
       y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

65 to 75

uk %>% filter(lower==65) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +labs(title = "All cause mortality in the UK in the 65 to 75 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Weekly deaths",   
       x = "Week",
       y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

75 to 85

uk %>% filter(lower==75) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) + labs(title = "All cause mortality in the UK in the 75 to 85 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Weekly deaths",   
       x = "Week",
       y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Over 85

uk %>% filter(lower==85) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) + labs(title = "All cause mortality in the UK in the over 85 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Weekly deaths",   
       x = "Week",
       y="Numbers of deaths per week") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Cumulative mortality

Cumulative all age mortality

uk %>% group_by(year,lower) %>% arrange(week) %>% summarise(week=week,sum=cumsum(sum)) ->uk_cumulative
uk  %>%
  pivot_wider(id_cols=c(1,2,3),names_from=lower, values_from=sum) -> uk_wide
uk %>% group_by(date,year,week) %>% summarise(lower,percent=100*sum/max(sum)) ->uk_percent 
uk_cumulative %>% filter(is.na(lower)) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Total cumulative all cause mortality in the UK", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Cumulative deaths",   
       x = "Week",
       y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Cumulative 15 to 65

uk_cumulative %>% filter(lower==15) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK 15 to 65 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Cumulative deaths",   
       x = "Week",
       y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Cumulative 65 to 75

uk_cumulative %>% filter(lower==65) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK 65 to 75 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Cumulative deaths",   
       x = "Week",
       y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

75 to 85

uk_cumulative %>% filter(lower==75) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK 75 to 85 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Cumulative deaths",   
       x = "Week",
       y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Over 85

uk_cumulative %>% filter(lower==85) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=sum, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Cumulative all cause mortality in the UK over 85 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Cumulative deaths",   
       x = "Week",
       y="Total numbers of deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Percentage of deaths in each age group

The percentage of the total deaths occurring in a week that fall within an age class may reveal unusual mortality events.

15 to 65

uk_percent %>% filter(lower==15) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) +
labs(title = "Percentage of all weekly deaths occurring in the 15 to 65 year age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",   
       x = "Week",
       y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

65 to 75

uk_percent %>% filter(lower==65) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) + labs(title = "Percentage of all weekly deaths occurring in the 65 to 75 year age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",   
       x = "Week",
       y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

75 to 85

uk_percent %>% filter(lower==75) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) + labs(title = "Percentage of all weekly deaths occurring in the 75 to 85 year age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",   
       x = "Week",
       y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Over 85

uk_percent %>% filter(lower==85) %>% mutate(Year = ifelse(year==2020, "2020","Other")) %>%
 ggplot(aes(x=week,y=percent, group=as.factor(year))) +geom_line(aes(color = Year))  + scale_color_manual(values = c( 'red','#595959')) + labs(title = "Percentage of all weekly deaths occurring in the over 85 age group", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="Number of deaths in the age class divided by the weekly total expressed as a percentage",   
       x = "Week",
       y="Percent deaths") + scale_x_continuous(breaks=seq(0,52,by=2))  + theme(axis.text.x=element_text(angle=45, hjust=1))

Percentage of people in the age classes dying each year

In order to calculate this metric the data from the demographic pyramid has to be aggregated into age classes and combined with the mortality data using the same age classes.

uk %>% group_by(year, lower) %>% summarise(total=sum(sum)) ->uk_year
uk_year %>% filter(!is.na(lower)) -> uk_year


demo %>% filter(year %in% unique(uk_year$year)) -> demo
demo$age_class<-cut(demo$lower, c(-1,14,64,74,84,100))
demo %>% group_by(year, age_class) %>% summarise(n = sum(total)) -> demo

uk_year$age_class<-cut(uk_year$lower, c(-1,14,64,74,84,100))
uk_year %>% inner_join(demo, by=c("age_class","year")) -> combined
combined %>% mutate(percent=100*(total/n)) %>% mutate(age_class = recode(age_class,
      "(-1,14]" = "0-15",
      "(14,64]" = "15-65",
      "(64,74]" = "65-75",
      "(74,84]" = "75-85",
      "(84,100]" = "Over 85")) -> combined

over 75

combined %>% filter(lower>74) %>% ggplot(aes(x=year,y=percent)) + geom_col() + facet_wrap(~age_class) + scale_x_continuous(breaks=2010:2020)  + theme(axis.text.x=element_text(angle=45, hjust=1)) + labs(title = "Percentage of deaths within each age class", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="The number of deaths occurring divided by the number of people in the age class",   
       x = "Year",
       y="Percent")

Under 75

combined %>% filter(lower<74) %>% ggplot(aes(x=year,y=percent)) + geom_col() + facet_wrap(~age_class) + scale_x_continuous(breaks=2010:2020)  + theme(axis.text.x=element_text(angle=45, hjust=1)) + labs(title = "Percentage of deaths within each age class", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv",
       subtitle ="The number of deaths occurring divided by the number of people in the age class",   
       x = "Year",
       y="Percent")

Calculating excess deaths

Excess deaths in 2020 can be calculated in the following manner.

  1. Calculate the mean proportion of deaths in each age class for the preceding 9 years.
  2. Multiply the total population in each age class for each year, now including 2020, by the expected proportion.
  3. Subtract the expected number from the observed number.
  4. Sum over all age groups to find the total difference between the observed and expected.
combined %>% filter(year<2020) %>% group_by(age_class) %>% summarise(mean_percent=mean(percent)) %>% inner_join(combined) %>% mutate(expected=(mean_percent/100)*n) %>% mutate(excess=total-expected) -> excess

Total excess deaths

excess %>% group_by(year) %>% summarise(total=round(sum(total)),excess=round(sum(excess))) %>% arrange(-excess)%>% aqm::dt()

Excess in each age class

excess %>% group_by(year,age_class) %>% summarise(excess=round(sum(excess))) %>% pivot_wider(names_from = age_class,values_from =excess) %>% aqm::dt()

Plot

library(ggsci)
excess %>% group_by(year,age_class) %>% summarise(excess=round(sum(excess))) %>% ggplot(aes(x=year,y=excess,col=age_class)) + geom_line() +geom_point()  + scale_x_continuous(breaks=2010:2020)  + theme(axis.text.x=element_text(angle=45, hjust=1)) + labs(title = "Deaths compared to expected number within each age class", 
       caption = "Source https://www.mortality.org/Public/STMF/Outputs/stmf.csv \n https://www.populationpyramid.net/api",
       subtitle ="Calculated from demographic data using 9 year mean prior to 2020",   
       x = "Year",
       y="Number of additional deaths ") + scale_color_uchicago()

Raw data

aqm::dt(excess)