Packages.

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)
library(reshape2)

## 
## Attaching package: 'reshape2'

## The following object is masked from 'package:tidyr':
## 
##     smiths

Load data.

df <- read.csv("insurance.csv")
colnames(df)[1] <- "Year"

Remove commas.

df_cleaned <- as.data.frame(lapply(df, function(x) gsub(",+","",x)))
df_cleaned <- as.data.frame(lapply(df_cleaned, function(x) as.numeric(as.character(x))))

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

## Warning in FUN(X[[i]], ...): NAs introduced by coercion

str(df_cleaned)

## 'data.frame':    56 obs. of  38 variables:
##  $ Year                                            : num  1960 1961 1962 1963 1964 ...
##  $ Total.National.Health.Expenditures              : num  27214 29138 31842 34595 38394 ...
##  $ Out.of.pocket                                   : num  12949 13357 14255 15311 16928 ...
##  $ Health.Insurance                                : num  7497 8236 8999 9892 10971 ...
##  $ Private.Health.Insurance                        : num  5812 6468 7178 7952 9052 ...
##  $ Medicare                                        : num  NA NA NA NA NA ...
##  $ Medicaid..Title.XIX.                            : num  NA NA NA NA NA ...
##  $ Medicaid..Title.XIX..Federal                    : num  NA NA NA NA NA ...
##  $ Medicaid..Title.XIX..State.and.Local            : num  NA NA NA NA NA ...
##  $ CHIP..Title.XIX.and.Title.XXI.                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CHIP..Title.XIX.and.Title.XXI..Federal          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CHIP..Title.XIX.and.Title.XXI..State.and.Local  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Department.of.Defense                           : num  788 836 860 925 861 ...
##  $ Department.of.Veterans.Affairs                  : num  897 933 961 1015 1058 ...
##  $ Other.Third.Party.Payers.and.Programs           : num  3889 4329 4703 5206 5659 ...
##  $ Worksite.Health.Care                            : num  62 66 71 78 87 97 110 128 147 171 ...
##  $ Other.Private.Revenues                          : num  295 330 365 421 486 ...
##  $ Indian.Health.Services                          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Workers..Compensation                           : num  615 644 705 742 807 ...
##  $ General.Assistance                              : num  102 110 102 103 112 122 182 240 299 359 ...
##  $ Maternal.Child.Health                           : num  116 172 180 201 220 244 291 324 377 435 ...
##  $ Maternal.Child.Health.Federal                   : num  27 44 48 50 60 80 114 142 167 190 ...
##  $ Maternal.Child.Health.State.and.Local           : num  89 127 133 150 159 164 176 182 210 244 ...
##  $ Vocational.Rehabilitation                       : num  20 22 25 29 33 41 58 88 115 131 ...
##  $ Vocational.RehabilitationFederal                : num  12 14 15 18 20 27 42 66 86 100 ...
##  $ Vocational.Rehabilitation.State.and.Local       : num  8 8 10 11 13 15 16 22 29 31 ...
##  $ Other.Federal.Programs.                         : num  337 447 598 715 750 831 653 463 489 621 ...
##  $ SAMHSA                                          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Other.State.and.Local.Programs..                : num  2221 2399 2510 2773 3013 ...
##  $ School.Health                                   : num  122 140 147 143 151 168 187 215 242 263 ...
##  $ Public.Health.Activity                          : num  371 409 456 509 572 ...
##  $ Public.Health.Activity.Federal                  : num  102 108 140 208 226 214 294 401 470 564 ...
##  $ Public.Health.Activity.State.and.Local          : num  269 302 316 301 347 407 439 460 499 610 ...
##  $ Investment                                      : num  2508 2806 3427 3677 4264 ...
##  $ Research                                        : num  694 886 1068 1219 1366 ...
##  $ Structures...Equipment                          : num  1814 1921 2360 2458 2898 ...
##  $ Total.CMS.Programs..Medicaid..CHIP.and.Medicare.: num  NA NA NA NA NA ...
##  $ POPULATION                                      : num  186 189 192 195 197 200 202 204 206 208 ...

Total health expenditures over time.

plot1 <- ggplot(df_cleaned, aes(Year, Total.National.Health.Expenditures)) + geom_area(fill="#228B22")
last_plot() + ggtitle("Total National Healthcare Expenditures") + labs(x="Year",y="Amount")

Does this at all correlate with population growth?

df_shaved <- select(df_cleaned, Year, Total.National.Health.Expenditures, POPULATION)
colnames(df_shaved) <- c("Year", "National_Health_Expenditures", "Population")

Scale expenditures to billions.

df_shaved$National_Health_Expenditures <-  df_shaved$National_Health_Expenditures/1000

Reshape.

df_tall <- melt(df_shaved, id="Year")
colnames(df_tall) <- c("Year", "Statistic", "Value")

Plot.

plot2 <- ggplot(df_tall, aes(Year, Value, color=Statistic)) + geom_line() + scale_color_manual(labels=c("Healthcare Expenditures ($B)", "Population (M)"), values=c("#228B22", "#4169E1"))
last_plot() + ggtitle(bquote(atop(.("National Health Expenditures and Population"))))  + labs(x="Year",y="Amount")

Expenditures are in billions, population in millions. Hm, not so correlated.

To be more accurate, we can look at growth rates.

growth <- function(x) x/lag(x)-1

df_growth <- df_shaved %>% mutate(National_Health_Expenditures_Change = growth(National_Health_Expenditures))
df_growth <- df_growth %>% mutate(Population_Change = growth(Population))
df_growth_shaved <- select(df_growth, Year, National_Health_Expenditures_Change, Population_Change)

df_growth_tall <- melt(df_growth_shaved, id="Year")
colnames(df_growth_tall) <- c("Year", "Statistic", "Value")

plot3 <- ggplot(na.omit(df_growth_tall), aes(Year, Value, color=Statistic)) + geom_line() + scale_color_manual(labels=c("% Change in Expenditures", "% Change in Population"), values=c("#228B22", "#4169E1"))

last_plot() + ggtitle("Growth in Healthcare Expenditures and Population")  + labs(x="Year",y="Amount")

So our hunch was totally correct. Interestingly though, the growth rate of expenditures was by far the highest around 1980, and then decreased for a while. We’ll come back to what happened there later.

We can make overlayed graphs for each line item, first in absolute terms and then in percentage change.

df_all_tall <- melt(df_cleaned, id="Year")
colnames(df_all_tall) <- c("Year", "Statistic", "Value")

plot4 <- ggplot(df_all_tall, aes(Year, Value, color=Statistic)) + geom_line()
last_plot() + ggtitle("Hi")

## Warning: Removed 174 rows containing missing values (geom_path).

To decide which ones to use, we’ll make a threshold / cutoff - anything below 2 standard deviations from the mean.

means <- data.frame(Variable = colnames(df_cleaned), Mean = colMeans(na.omit(df_cleaned)))
quantiles <- quantile(means$Mean)

Here’s what lies in the 75th percentile.

top75 <- filter(means, Mean >= quantiles[4])
top75names <- as.vector(as.character(top75$Variable))

top75names

##  [1] "Total.National.Health.Expenditures"              
##  [2] "Out.of.pocket"                                   
##  [3] "Health.Insurance"                                
##  [4] "Private.Health.Insurance"                        
##  [5] "Medicare"                                        
##  [6] "Medicaid..Title.XIX."                            
##  [7] "Medicaid..Title.XIX..Federal"                    
##  [8] "Medicaid..Title.XIX..State.and.Local"            
##  [9] "Other.Third.Party.Payers.and.Programs"           
## [10] "Total.CMS.Programs..Medicaid..CHIP.and.Medicare."

Remove 1 and 10, as they’re total figures.

df_shaved <- select(df_cleaned, Year, one_of(top75names[2:9]))
df_tall <- melt(df_shaved, id="Year")
colnames(df_tall) <- c("Year", "Statistic", "Value")

plot5 <- ggplot(na.omit(df_tall), aes(Year, Value, color=Statistic)) + geom_line()
last_plot() + ggtitle("Top Line Items") + labs(x="Year",y="Amount")

More legible.

We can also look at average growth rate over the years. We’ll find the growth rate for all first to see if anything is out of the norm, then separate into groups.

df_all_growth <- as.data.frame(lapply(df_cleaned, function(x) growth(x)))

Now, the annualized growth rate over the period should be the average of each column.

growth_means <- colMeans(df_all_growth, 1)

#Select the top 10 to graph
df_growth_graph1 <- data.frame(Statistic = colnames(df_cleaned), Growth = growth_means)
df_growth_top10 <- arrange(df_growth_graph1, desc(Growth))[1:10,]

plot6 <- ggplot(df_growth_top10, aes(reorder(Statistic, -Growth), Growth, fill=Statistic)) + geom_bar(stat="identity") + theme(axis.text.x=element_text(angle=45, hjust=1)) + guides(fill=FALSE)
last_plot() + labs(title="Top 10 Annualized Growth Rates", x="", y="Growth")

What are these top 3 items that are growing much faster than everything else? CHIP stands for Children’s Health Insurance Program. It gives low cost coverage to kids who’s families make too much to quality for medicaid.

top_3_growth <- select(df_cleaned, Year, CHIP..Title.XIX.and.Title.XXI..Federal, CHIP..Title.XIX.and.Title.XXI., CHIP..Title.XIX.and.Title.XXI..State.and.Local)

The first year this thing came into being is:

top_3_growth[!is.na(top_3_growth$CHIP..Title.XIX.and.Title.XXI.),][1,"Year"]

## [1] 1998

And apparently it has been growing rather quickly. But, at least as of 2015, the total for the program is tiny compared to other line items here.

mean(na.omit(top_3_growth$CHIP..Title.XIX.and.Title.XXI.))

## [1] 8449.167

But it’s definitely something to keep an eye on.

Otherwise, the growth rates of all of our items don’t actually vary that much.

df_normal_growth <- df_cleaned %>% select(-CHIP..Title.XIX.and.Title.XXI., -CHIP..Title.XIX.and.Title.XXI..Federal, -CHIP..Title.XIX.and.Title.XXI..State.and.Local)
df_normal_growth <- as.data.frame(lapply(na.omit(df_normal_growth), function(x) growth(x)))
normal_growth_means <- colMeans(df_normal_growth,1)

#Standard deviation of growth rates
sd(normal_growth_means)

## [1] 0.02792628

It makes sense to look at things in terms of groups: e.x. (private + medicare + medicaid), (federal + state and local).

str(df_cleaned)

## 'data.frame':    56 obs. of  38 variables:
##  $ Year                                            : num  1960 1961 1962 1963 1964 ...
##  $ Total.National.Health.Expenditures              : num  27214 29138 31842 34595 38394 ...
##  $ Out.of.pocket                                   : num  12949 13357 14255 15311 16928 ...
##  $ Health.Insurance                                : num  7497 8236 8999 9892 10971 ...
##  $ Private.Health.Insurance                        : num  5812 6468 7178 7952 9052 ...
##  $ Medicare                                        : num  NA NA NA NA NA ...
##  $ Medicaid..Title.XIX.                            : num  NA NA NA NA NA ...
##  $ Medicaid..Title.XIX..Federal                    : num  NA NA NA NA NA ...
##  $ Medicaid..Title.XIX..State.and.Local            : num  NA NA NA NA NA ...
##  $ CHIP..Title.XIX.and.Title.XXI.                  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CHIP..Title.XIX.and.Title.XXI..Federal          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ CHIP..Title.XIX.and.Title.XXI..State.and.Local  : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Department.of.Defense                           : num  788 836 860 925 861 ...
##  $ Department.of.Veterans.Affairs                  : num  897 933 961 1015 1058 ...
##  $ Other.Third.Party.Payers.and.Programs           : num  3889 4329 4703 5206 5659 ...
##  $ Worksite.Health.Care                            : num  62 66 71 78 87 97 110 128 147 171 ...
##  $ Other.Private.Revenues                          : num  295 330 365 421 486 ...
##  $ Indian.Health.Services                          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Workers..Compensation                           : num  615 644 705 742 807 ...
##  $ General.Assistance                              : num  102 110 102 103 112 122 182 240 299 359 ...
##  $ Maternal.Child.Health                           : num  116 172 180 201 220 244 291 324 377 435 ...
##  $ Maternal.Child.Health.Federal                   : num  27 44 48 50 60 80 114 142 167 190 ...
##  $ Maternal.Child.Health.State.and.Local           : num  89 127 133 150 159 164 176 182 210 244 ...
##  $ Vocational.Rehabilitation                       : num  20 22 25 29 33 41 58 88 115 131 ...
##  $ Vocational.RehabilitationFederal                : num  12 14 15 18 20 27 42 66 86 100 ...
##  $ Vocational.Rehabilitation.State.and.Local       : num  8 8 10 11 13 15 16 22 29 31 ...
##  $ Other.Federal.Programs.                         : num  337 447 598 715 750 831 653 463 489 621 ...
##  $ SAMHSA                                          : num  NA NA NA NA NA NA NA NA NA NA ...
##  $ Other.State.and.Local.Programs..                : num  2221 2399 2510 2773 3013 ...
##  $ School.Health                                   : num  122 140 147 143 151 168 187 215 242 263 ...
##  $ Public.Health.Activity                          : num  371 409 456 509 572 ...
##  $ Public.Health.Activity.Federal                  : num  102 108 140 208 226 214 294 401 470 564 ...
##  $ Public.Health.Activity.State.and.Local          : num  269 302 316 301 347 407 439 460 499 610 ...
##  $ Investment                                      : num  2508 2806 3427 3677 4264 ...
##  $ Research                                        : num  694 886 1068 1219 1366 ...
##  $ Structures...Equipment                          : num  1814 1921 2360 2458 2898 ...
##  $ Total.CMS.Programs..Medicaid..CHIP.and.Medicare.: num  NA NA NA NA NA ...
##  $ POPULATION                                      : num  186 189 192 195 197 200 202 204 206 208 ...

Groups to check out: • Private / Medicare / Medicaid

df_group1 <- df_cleaned %>% select(Year, Private.Health.Insurance, Medicare, Medicaid..Title.XIX.) %>% mutate(Total = Private.Health.Insurance + Medicare + Medicaid..Title.XIX.) %>% mutate(Private.Health.Insurance.Share = Private.Health.Insurance / Total, Medicare.Share = Medicare / Total, Medicaid..Title.XIX.Share = Medicaid..Title.XIX. / Total) %>% select(Year, Private.Health.Insurance.Share, Medicare.Share, Medicaid..Title.XIX.Share)

First we can plot how these shares shifted over time, and the a pie chart of the average over the time period.

df_group1_tall <- melt(df_group1, id="Year")
colnames(df_group1_tall) <- c("Year", "Item", "Share")

plot7 <- ggplot(na.omit(df_group1_tall), aes(Year, Share, color=Item)) + geom_line()
last_plot() + scale_color_manual(name="Insurance Type", labels=c("Private", "Medicare", "Medicaid"),values=c("#228B22", "#4169E1","#ff0000")) + ggtitle("Share of Total Expenditures by Insurance Type")

Average over the period:

shares <- data.frame(Type = colnames(df_group1)[2:4], Share = colMeans(df_group1, 1)[2:4])

#Plot
plot8 <- ggplot(shares, aes("", Share, fill=Type)) + geom_bar(width = 1, stat="identity")
plot8 + coord_polar(theta = "y") + scale_fill_manual(name="Insurance Type", labels=c("Medicaid", "Medicare", "Private"),values=c("#ff0000", "#4169E1", "#228B22")) + ggtitle("Share of Total Expenditures by Insurance Type") + theme(axis.text = element_blank(), axis.ticks = element_blank(), panel.grid  = element_blank(), axis.title.x = element_blank(), axis.title.y = element_blank())

Summary: • Things growing very fast • Expenditures growing much faster than population • Expenditures growth not even, around 15% in the 80’s, has been increasing lately • Aside from 3 small programs, everything growing at a similar rate • Medicare / Medicaid have been increasing their share of total insurance expenditures