library(tidyverse)
library(reshape2)
library(janitor)
library(readxl)
library(countrycode)
library(scales)
library(hrbrthemes)
library(ggrepel)
library(ggpmisc)
I queried and downloaded data from WHO’s
Global Health Expenditure database and World
Bank International Comparison Project. The WHO provides current
health expenditure (HCE) estimates in local currency units and as a
share of GDP. I download them both as a cross-check just in case there
are any major discrepancies in currency units quoted by WHO versus the
local currency units provided by the ICP for AIC and GDP.
# from WHO, import SHA current health expenditures in millions of local currency units
# and as share of GDP. These can be used to cross-check the results just
# in case there's any issue with the currency the figures are denominated.
sha_health=read_xlsx(
"NHA indicators.xlsx",
skip=2,
col_names = c("country","indicator","units",sprintf("%d",2000:2019))
) %>%
mutate(
type=case_when(
str_detect(indicator,"as %") ~ 'pct_gdp',
str_detect(indicator,"^Gross Domestic Product") ~ 'gdp_lcu',
T ~ 'hce_lcu'
)
) %>%
select( -indicator,-units) %>%
melt(id.vars= c("country","type")) %>%
mutate(
year=as.integer(as.character(variable))
) %>%
dcast(country+year~type) %>%
mutate(
country_code=countrycode(country,'country.name','iso3c'),
country_code=coalesce(country_code,ifelse(country=='Türkiye','TUR',NA)), # fix turkey's country code
hce_lcu=hce_lcu/1000, # billions
gdp_lcu=gdp_lcu/1000 # billions
) %>%
rename( who_country = country )
# From WorldBank International Comparison Project
# Import AIC and GDP in PPP adjusted per capita and as aggregates of local currency units
icpraw=read_csv("icp_2011_through_2017.csv",show_col_types =F) %>%
clean_names() %>%
mutate(
wb_country=country_name,
country_code,
series=ifelse(str_detect(series_name,'ACTUAL INDIVIDUAL CONSUMPTION'),'AIC','GDP'),
variable=sprintf("%s_%s",series,classification_code),
year=as.integer(time)
) %>%
filter(
!is.na(value),
!is.na(country_code)
) %>%
dcast(country_code+wb_country+year~variable,value.var='value') %>%
clean_names()
icp_sha=merge(sha_health,icpraw,by=c('year','country_code'),all=T) %>%
mutate(
hce_share_aic=hce_lcu/aic_cn, # share of AIC (LCU over LCU)
hce_share_gdp=hce_lcu/gdp_cn, # share of GDP (LCU over LCU)
hce_pc=hce_share_aic*aic_pcap_pp, # per capita at PPPs for AIC
hce_lcu_alt=gdp_cn * ( pct_gdp / 100 ), # calculate from WHO's quoted HCE/GDP and WorldBank's GDP in LCU
hce_share_aic_alt = hce_lcu_alt / aic_cn, # share of AIC, alternate
hce_pc_alt = hce_share_aic_alt * aic_pcap_pp
)
icp_sha %>%
filter(
year==2017
) %>%
ggplot(aes(hce_share_aic,hce_share_aic_alt)) +
geom_abline(slope=1) +
geom_point(size=2,color='purple',alpha=0.6) +
theme_ipsum() +
geom_text_repel(aes(label=country_code)) +
labs(
title='Comparing two different methods of calculating SHA health spending',
y='Health share, numerator derived from WHO\'s estimated GDP share',
x='Health share, numerator derived from WHO\'s estimated expenditures in LCU'
) +
stat_poly_eq(size=6) +
scale_y_percent() +
scale_x_percent()

NA
myform <- y ~ poly(x, 2, raw = TRUE)
icp_sha %>%
filter(
year==2017,
aic_pcap_pp > 12000
) %>%
ggplot(aes(aic_pcap_pp,hce_share_aic)) +
geom_point(size=3,color='purple',alpha=0.6) +
theme_ipsum() +
theme(
panel.grid.minor = element_blank()
) +
geom_text_repel(aes(label=country_code),color='gray') +
scale_y_percent(breaks=seq(0,1,.02)) +
scale_x_continuous(breaks=seq(0,1e5,5000),label=scales::dollar_format()) +
geom_smooth(method=lm,formula='y~poly(x,2)',fullrange=F,se=T) +
stat_poly_eq(formula=myform,size=6) +
labs(
title='The health share of consumption rises rapidly with income',
x='2017 Actual Individual Consumption per capita',
y='2017 Current health expenditures as share of consumption (AIC)',
caption=paste(
'source: World Health Organization Global Health Expenditure database',
'and World Bank International Comparison Project',
sep='\n'
)
) +
coord_cartesian(expand=T)

NA
NA
icp_sha %>%
filter(
year==2017,
aic_pcap_pp > 15000
) %>%
ggplot(aes(aic_pcap_pp,hce_pc)) +
geom_point(size=2,color='purple',alpha=0.6) +
theme_ipsum() +
theme(
panel.grid.minor = element_blank()
) +
geom_text_repel(aes(label=country_code)) +
scale_x_log10(
label=scales::dollar_format(accuracy=1),
breaks=seq(0,50000,5000),
sec.axis=sec_axis(
~log(.),
breaks=seq(0,20,.1),
name='log AIC/person'
)
) +
scale_y_log10(
label=scales::dollar_format(accuracy=1),
breaks=seq(0,50000,1000),
sec.axis=sec_axis(
~log(.),
breaks=seq(0,20,.1),
name='log hce/person'
)
) +
geom_smooth(method=lm,formula=y~x) +
stat_poly_eq(aes(label = paste(stat(eq.label),
stat(rr.label), sep = "~~~~")),
formula = y ~ x, rr.digits = 2, coef.digits = 3,size=6,
parse = TRUE) +
labs(
title='The health share of consumption rises rapidly with income',
x='Actual Individual Consumption per capita',
y='Current health expenditures per capita at PPPs for AIC'
) +
coord_cartesian(expand=T)

icp_sha %>%
filter(
year==2017,
!country_code %in% c('MRT'), # Mauritania is probably an error and just clutters this
T
) %>%
ggplot(aes(log(aic_pcap_pp),log(hce_pc) )) +
geom_point(size=2,color='purple',alpha=0.6) +
theme_ipsum() +
theme(
) +
scale_x_continuous(breaks=seq(1,20,.5)) +
scale_y_continuous(breaks=seq(1,20,.5)) +
geom_smooth(method=lm,se=F,formula=y~x) +
geom_smooth(formula=y~poly(x,2),method=lm,color='black',se=T) +
stat_poly_eq(aes(label = paste(stat(eq.label),
stat(rr.label), sep = "~~~~")),
formula = y ~ x, rr.digits = 2, coef.digits = 3,size=6,
parse = TRUE) +
labs(
title='The slope is not quite linear even in log-log terms',
subtitle='(it increases at an increasing rate as income increases)',
x='log AIC per capita, PPP-adjusted',
y='log HCE per capita, PPP-adjusted'
) +
coord_cartesian(expand=T)

---
title: "AIC and SHA health expenditure comparison"
output:
  html_notebook:
    code_folding: hide
---





```{r message=FALSE, warning=FALSE, paged.print=FALSE}
library(tidyverse)
library(reshape2)
library(janitor)
library(readxl)
library(countrycode)
library(scales)
library(hrbrthemes)
library(ggrepel)
library(ggpmisc)

```



I queried and downloaded data from [WHO's Global Health Expenditure database](https://apps.who.int/nha/database/Select/Indicators/en) and [World Bank International Comparison Project](https://databank.worldbank.org/2017-AIC-and-GDP-and-LCU-and-per-capita-PPPs/id/ea31b157).  The WHO provides current health expenditure (HCE) estimates in local currency units and as a share of GDP.  I download them both as a cross-check just in case there are any major discrepancies in currency units quoted by WHO versus the local currency units provided by the ICP for AIC and GDP.





```{r message=FALSE, warning=FALSE}



# from WHO, import SHA current health expenditures in millions of local currency units
# and as share of GDP. These can be used to cross-check the results just 
# in case there's any issue with the currency the figures are denominated.

sha_health=read_xlsx(
    "NHA indicators.xlsx",
    skip=2,
    col_names = c("country","indicator","units",sprintf("%d",2000:2019))
  ) %>%
  mutate(
    type=case_when(
      str_detect(indicator,"as %") ~ 'pct_gdp',
      str_detect(indicator,"^Gross Domestic Product") ~ 'gdp_lcu',
      T ~ 'hce_lcu'
    )
  ) %>%
  select( -indicator,-units) %>%
  melt(id.vars= c("country","type")) %>%
  mutate(
    year=as.integer(as.character(variable))
  ) %>%
  dcast(country+year~type) %>%
  mutate(
    country_code=countrycode(country,'country.name','iso3c'),
    country_code=coalesce(country_code,ifelse(country=='Türkiye','TUR',NA)), # fix turkey's country code
    hce_lcu=hce_lcu/1000, # billions
    gdp_lcu=gdp_lcu/1000  # billions
  ) %>%
  rename( who_country = country )


# From WorldBank International Comparison Project
#  Import AIC and GDP in PPP adjusted per capita and as aggregates of local currency units

icpraw=read_csv("icp_2011_through_2017.csv",show_col_types =F) %>%
  clean_names() %>%
  mutate(
    wb_country=country_name,
    country_code,
    series=ifelse(str_detect(series_name,'ACTUAL INDIVIDUAL CONSUMPTION'),'AIC','GDP'),
    variable=sprintf("%s_%s",series,classification_code),
    year=as.integer(time)
  ) %>%
  filter(
    !is.na(value),
    !is.na(country_code)
  ) %>%
  dcast(country_code+wb_country+year~variable,value.var='value') %>%
  clean_names()



icp_sha=merge(sha_health,icpraw,by=c('year','country_code'),all=T) %>%
  mutate(
    hce_share_aic=hce_lcu/aic_cn,                # share of AIC (LCU over LCU)
    hce_share_gdp=hce_lcu/gdp_cn,                # share of GDP (LCU over LCU)
    hce_pc=hce_share_aic*aic_pcap_pp,            # per capita at PPPs for AIC
    hce_lcu_alt=gdp_cn * ( pct_gdp / 100 ),      # calculate from WHO's quoted HCE/GDP and WorldBank's GDP in LCU
    hce_share_aic_alt = hce_lcu_alt / aic_cn,    # share of AIC, alternate
    hce_pc_alt = hce_share_aic_alt * aic_pcap_pp
  )



```


```{r fig.height=8, fig.width=8, message=FALSE, warning=FALSE, paged.print=FALSE}

icp_sha %>%
  filter(
    year==2017
  ) %>%
  ggplot(aes(hce_share_aic,hce_share_aic_alt)) +
    geom_abline(slope=1) +
    geom_point(size=2,color='purple',alpha=0.6) +
    theme_ipsum() +
    geom_text_repel(aes(label=country_code)) +
    labs(
      title='Comparing two different methods of calculating SHA health spending',
      y='Health share, numerator derived from WHO\'s estimated GDP share',
      x='Health share, numerator derived from WHO\'s estimated expenditures in LCU'
    ) +
    stat_poly_eq(size=6) +
    scale_y_percent() +
    scale_x_percent()
    
```





```{r fig.height=7, fig.width=9, message=FALSE, warning=FALSE}


myform <- y ~ poly(x, 2, raw = TRUE)


icp_sha %>%
  filter(
    year==2017,
    aic_pcap_pp > 12000
  ) %>%
  ggplot(aes(aic_pcap_pp,hce_share_aic)) +
    geom_point(size=3,color='purple',alpha=0.6) +
    theme_ipsum() +
    theme(
      panel.grid.minor = element_blank()
    ) +
    geom_text_repel(aes(label=country_code),color='gray') +
    scale_y_percent(breaks=seq(0,1,.02)) +
    scale_x_continuous(breaks=seq(0,1e5,5000),label=scales::dollar_format()) +
    geom_smooth(method=lm,formula='y~poly(x,2)',fullrange=F,se=T) +
    stat_poly_eq(formula=myform,size=6) +
    labs(
      title='The health share of consumption rises rapidly with income',
      x='2017 Actual Individual Consumption per capita',
      y='2017 Current health expenditures as share of consumption (AIC)',
      caption=paste(
        'source: World Health Organization Global Health Expenditure database',
        'and World Bank International Comparison Project',
        sep='\n'
      )
    ) +
    coord_cartesian(expand=T)
    

```




```{r fig.height=9, fig.width=9, message=FALSE, warning=FALSE, paged.print=FALSE}


icp_sha %>%
  filter(
    year==2017,
    aic_pcap_pp > 15000
  ) %>%
  ggplot(aes(aic_pcap_pp,hce_pc)) +
    geom_point(size=2,color='purple',alpha=0.6) +
    theme_ipsum() +
    theme(
      panel.grid.minor = element_blank()
    ) +  
    geom_text_repel(aes(label=country_code)) +
    scale_x_log10(
      label=scales::dollar_format(accuracy=1),
      breaks=seq(0,50000,5000),      
      sec.axis=sec_axis(
        ~log(.),
        breaks=seq(0,20,.1),
        name='log AIC/person'
      )
    ) +
    scale_y_log10(
      label=scales::dollar_format(accuracy=1),
      breaks=seq(0,50000,1000),
      sec.axis=sec_axis(
        ~log(.),
        breaks=seq(0,20,.1),
        name='log hce/person'
      )
      
    ) +
    geom_smooth(method=lm,formula=y~x) +
    stat_poly_eq(aes(label =  paste(stat(eq.label),
                                    stat(rr.label), sep = "~~~~")),
                 formula = y ~ x, rr.digits = 2, coef.digits = 3,size=6,
                 parse = TRUE) +  
    labs(
      title='The health share of consumption rises rapidly with income',
      x='Actual Individual Consumption per capita',
      y='Current health expenditures per capita at PPPs for AIC'
    ) +
    coord_cartesian(expand=T)

```




```{r fig.height=8, fig.width=8, message=FALSE, warning=FALSE}

icp_sha %>%
  filter(
    year==2017,
    !country_code %in% c('MRT'),  # Mauritania is probably an error and just clutters this
    T
  ) %>%
  ggplot(aes(log(aic_pcap_pp),log(hce_pc) )) +
    geom_point(size=2,color='purple',alpha=0.6) +
    theme_ipsum() +
    theme(
    ) +    
    scale_x_continuous(breaks=seq(1,20,.5)) +
    scale_y_continuous(breaks=seq(1,20,.5)) +
  
    geom_smooth(method=lm,se=F,formula=y~x) +
    geom_smooth(formula=y~poly(x,2),method=lm,color='black',se=T) +
    stat_poly_eq(aes(label =  paste(stat(eq.label),
                                    stat(rr.label), sep = "~~~~")),
                 formula = y ~ x, rr.digits = 2, coef.digits = 3,size=6,
                 parse = TRUE) +  
    labs(
      title='The slope is not quite linear even in log-log terms',
      subtitle='(it increases at an increasing rate as income increases)',
      x='log AIC per capita, PPP-adjusted',
      y='log HCE per capita, PPP-adjusted'
    ) +
    coord_cartesian(expand=T)
```