In recent years, political figures have had debates on what type of healthcare insurance American citizens should have. Often, the debate is whether or not health insurance should remain private and be paid out of pocket or by paycheck, or should it be public through a government marketplace. The maps I will be looking at show the percentage change in uninsured people between the ages of 18 through 64 from the end of President Obama’s first term (2012) and the end of his second term (2016). I want to see how the percentage of those uninsured changed after the enacting of the Affordable Care Act, which was signed into law by President Obama on March 23, 2010. The data on percentage of uninsured people by county is from Social Explorer.

Loading packages

library(tidyverse)
library(dplyr)
library(sf)
library(tmap)
library(tigris)
library(spdep)
library(tidycensus)

Reading in map file

options(tigris_class = "sf")
counties<-st_read("C:/R-3.5.2/tl_2016_us_county.shp", stringsAsFactors=FALSE)
counties=counties(cb = TRUE)

Reading in social explorer data

library(readr)
healthinsurance2016 <- read_csv("C:/Users/abbys/Downloads/R12140862_SL050.csv", 
    col_types = cols(Geo_COUNTY = col_number(), 
        Geo_FIPS = col_integer(), STATEFP = col_number()))%>%
   dplyr::mutate(fips = Geo_FIPS, health2016=SE_T006_002)

healthinsurance2012 <- read_csv("C:/Users/abbys/Downloads/R12140759_SL050.csv", 
    col_types = cols(Geo_COUNTY = col_number(), 
        Geo_FIPS = col_integer(), STATEFP = col_number()))%>%
  dplyr::mutate(fips = Geo_FIPS,health2012=SE_T006_001)

Combining both social explorer files to each other and to the map file; cleaning data

combo_health<-left_join(healthinsurance2016,healthinsurance2012, by="fips")
head(combo_health)
## # A tibble: 6 x 23
##   Geo_FIPS.x Geo_NAME.x Geo_QNAME.x Geo_STATE.x Geo_COUNTY.x SE_T006_001.x
##        <int> <chr>      <chr>       <chr>              <dbl>         <dbl>
## 1       1001 Autauga C~ Autauga Co~ 01                     1          3.86
## 2       4001 Apache Co~ Apache Cou~ 04                     1         14.6 
## 3       5001 Arkansas ~ Arkansas C~ 05                     1          6.26
## 4       6001 Alameda C~ Alameda Co~ 06                     1          5.95
## 5       8001 Adams Cou~ Adams Coun~ 08                     1         10.7 
## 6       9001 Fairfield~ Fairfield ~ 09                     1          5.98
## # ... with 17 more variables: SE_T006_002.x <dbl>, SE_T006_003 <dbl>,
## #   SE_NV005_001 <dbl>, SE_NV005_002 <dbl>, SE_NV005_003 <dbl>,
## #   fips <int>, health2016 <dbl>, Geo_FIPS.y <int>, Geo_NAME.y <chr>,
## #   Geo_QNAME.y <chr>, Geo_STATE.y <chr>, Geo_COUNTY.y <dbl>,
## #   SE_T006_001.y <dbl>, SE_T006_002.y <dbl>, SE_NV007_001 <dbl>,
## #   SE_NV007_002 <dbl>, health2012 <dbl>
counties <- counties %>% 
  dplyr::mutate(fips = parse_integer(GEOID))
combined_data <- counties %>% 
  left_join(combo_health,counties, by = "fips")

combined_data2 =combined_data %>%
  filter(STATEFP != "02") %>%
  filter(STATEFP != "15") %>%
  filter(STATEFP != "60") %>%
  filter(STATEFP != "66") %>%
  filter(STATEFP != "69") %>%
  filter(STATEFP != "72") %>%
  filter(STATEFP != "78")
head(combined_data2)
## Simple feature collection with 6 features and 32 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -102.042 ymin: 37.38839 xmax: -84.79633 ymax: 43.49961
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
##   STATEFP COUNTYFP COUNTYNS       AFFGEOID GEOID      NAME LSAD      ALAND
## 1      19      107 00465242 0500000US19107 19107    Keokuk   06 1500067253
## 2      19      189 00465283 0500000US19189 19189 Winnebago   06 1037261946
## 3      20      093 00485011 0500000US20093 20093    Kearny   06 2254696689
## 4      20      123 00485026 0500000US20123 20123  Mitchell   06 1817632928
## 5      20      187 00485055 0500000US20187 20187   Stanton   06 1762104518
## 6      21      005 00516849 0500000US21005 21005  Anderson   06  522745702
##     AWATER  fips Geo_FIPS.x       Geo_NAME.x               Geo_QNAME.x
## 1  1929323 19107      19107    Keokuk County       Keokuk County, Iowa
## 2  3182052 19189      19189 Winnebago County    Winnebago County, Iowa
## 3  1133601 20093      20093    Kearny County     Kearny County, Kansas
## 4 44979981 20123      20123  Mitchell County   Mitchell County, Kansas
## 5   178555 20187      20187   Stanton County    Stanton County, Kansas
## 6  6311537 21005      21005  Anderson County Anderson County, Kentucky
##   Geo_STATE.x Geo_COUNTY.x SE_T006_001.x SE_T006_002.x SE_T006_003
## 1          19          107      5.750408      13.06944   10.906204
## 2          19          189      4.407228      10.83502    9.065015
## 3          20           93     12.209302      24.63768   20.352134
## 4          20          123      6.365503      13.19063   11.120298
## 5          20          187     14.375000      31.14618   25.482625
## 6          21            5      6.876948      18.32197   15.047139
##   SE_NV005_001 SE_NV005_002 SE_NV005_003 health2016 Geo_FIPS.y Geo_NAME.y
## 1          141          766          893   13.06944      19107     Keokuk
## 2          100          641          732   10.83502      19189  Winnebago
## 3          147          544          682   24.63768      20093     Kearny
## 4           93          456          538   13.19063      20123   Mitchell
## 5           92          375          462   31.14618      20187    Stanton
## 6          375         2472         2809   18.32197      21005   Anderson
##                 Geo_QNAME.y Geo_STATE.y Geo_COUNTY.y SE_T006_001.y
## 1       Keokuk County, Iowa          19        19107          13.3
## 2    Winnebago County, Iowa          19        19189          10.8
## 3     Kearny County, Kansas          20        20093          25.4
## 4   Mitchell County, Kansas          20        20123          16.2
## 5    Stanton County, Kansas          20        20187          29.0
## 6 Anderson County, Kentucky          21        21005          17.6
##   SE_T006_002.y SE_NV007_001 SE_NV007_002 health2012
## 1          10.9          780          905       13.3
## 2           9.0          654          750       10.8
## 3          22.5          602          815       25.4
## 4          14.2          580          698       16.2
## 5          25.5          335          450       29.0
## 6          14.3         2343         2659       17.6
##                         geometry
## 1 MULTIPOLYGON (((-92.41199 4...
## 2 MULTIPOLYGON (((-93.97076 4...
## 3 MULTIPOLYGON (((-101.5419 3...
## 4 MULTIPOLYGON (((-98.49007 3...
## 5 MULTIPOLYGON (((-102.0419 3...
## 6 MULTIPOLYGON (((-85.16919 3...
library(tmaptools)
states<-combined_data2%>%
  aggregate_map(by="STATEFP")

2012 and 2016 Maps

Looking at the maps for the 2 years, I see that in 2016, the map gets lighter (on the legend, 0-10=lightest) showing that there were fewer uninsured people in 2016 than in 2012.

2012

tm_shape(combined_data2,projection=2163)+tm_polygons("health2012",palette="Reds",midpoint=50, border.col="beige")+tm_shape(states)+tm_borders(lwd=.36,col="black",alpha=1)+tm_layout(panel.show=TRUE)+tm_layout(
legend.position = c("left", "bottom"), frame = FALSE,inner.margins = c(0.1, 0.1, 0.05,  0.05)) + tm_layout( panel.labels=c("2012"))

2016

tm_shape(combined_data2,projection=2163)+tm_polygons("health2016",palette="Reds",midpoint=50, border.col="beige")+tm_shape(states)+tm_borders(lwd=.36,col="black",alpha=1)+tm_layout(panel.show=TRUE)+tm_layout(
legend.position = c("left", "bottom"), frame = FALSE,inner.margins = c(0.1, 0.1, 0.05, 0.05)) + tm_layout( panel.labels=c("2016"))

Non-spatial data visualization:

I am taking the mean of 2012’s uninsured percentages and 2016’s uninsured percentages to visualize and compare non-spatially.

According to the bar graph below, 2012 had ~1% more people on average uninsured than in 2016. This isn’t a lot and the bar graphs don’t show specifically where the changes occured throughout the country.

library(ggplot2)
combined_data2a<-combined_data2%>%
  dplyr::mutate(health2016mean=mean(combined_data2$health2016,na.rm=TRUE),health2012mean=mean(combined_data2$health2012,na.rm=TRUE))

temp=data.frame(name=c("Health 2012","Health 2016"),value=c(21.82687,20.97545))
ggplot(temp,aes(name,value))+geom_col(fill="pink")+xlab("Year")+ylab("Mean % of people uninsured")

Non-spatial difference between 2012 and 2016:

Here, I am subtracting 2012 from 2016 to see how large the differences are by county and state.

library(dplyr)
combined_data3=combined_data2%>%
  group_by(GEOID)%>%
  dplyr::mutate(Uninsured_Diff=(health2016-health2012))
head(combined_data3)
## Simple feature collection with 6 features and 33 fields
## geometry type:  MULTIPOLYGON
## dimension:      XY
## bbox:           xmin: -102.042 ymin: 37.38839 xmax: -84.79633 ymax: 43.49961
## epsg (SRID):    4269
## proj4string:    +proj=longlat +datum=NAD83 +no_defs
## # A tibble: 6 x 34
## # Groups:   GEOID [6]
##   STATEFP COUNTYFP COUNTYNS AFFGEOID GEOID NAME  LSAD   ALAND AWATER  fips
##   <chr>   <chr>    <chr>    <chr>    <chr> <chr> <chr>  <dbl>  <dbl> <int>
## 1 19      107      00465242 0500000~ 19107 Keok~ 06    1.50e9 1.93e6 19107
## 2 19      189      00465283 0500000~ 19189 Winn~ 06    1.04e9 3.18e6 19189
## 3 20      093      00485011 0500000~ 20093 Kear~ 06    2.25e9 1.13e6 20093
## 4 20      123      00485026 0500000~ 20123 Mitc~ 06    1.82e9 4.50e7 20123
## 5 20      187      00485055 0500000~ 20187 Stan~ 06    1.76e9 1.79e5 20187
## 6 21      005      00516849 0500000~ 21005 Ande~ 06    5.23e8 6.31e6 21005
## # ... with 24 more variables: Geo_FIPS.x <int>, Geo_NAME.x <chr>,
## #   Geo_QNAME.x <chr>, Geo_STATE.x <chr>, Geo_COUNTY.x <dbl>,
## #   SE_T006_001.x <dbl>, SE_T006_002.x <dbl>, SE_T006_003 <dbl>,
## #   SE_NV005_001 <dbl>, SE_NV005_002 <dbl>, SE_NV005_003 <dbl>,
## #   health2016 <dbl>, Geo_FIPS.y <int>, Geo_NAME.y <chr>,
## #   Geo_QNAME.y <chr>, Geo_STATE.y <chr>, Geo_COUNTY.y <dbl>,
## #   SE_T006_001.y <dbl>, SE_T006_002.y <dbl>, SE_NV007_001 <dbl>,
## #   SE_NV007_002 <dbl>, health2012 <dbl>, geometry <MULTIPOLYGON [°]>,
## #   Uninsured_Diff <dbl>

The histogram below shows the difference in uninsured people between 2012 and 2016. I see the non-spatial distribution of percentage differences in uninsured people between 2012 and 2016. Some percentage rate differences are greater and some are less. In some areas around the country, the difference can be a 10% increase in people uninsured, and in others, a 15% decrease in uninsured people. However, most areas show little to no change.

library(ggplot2)
ggplot(combined_data3, aes(Uninsured_Diff, fill="Uninsured_Diff")) + geom_histogram() +  xlab("Difference in % of People Without Health Insurance in USA Between 2012 and 2016")

However, the non-spatial visuals are limited since they only show these differences numerically. I want to see where these differences appear by county and state, and perhaps find some trends on the maps.

Difference in uninsured people between 2012 and 2016, county level:

I want to see how each county changed between 2012 and 2016. Looking at the maps, I see that most counties had small changes. The largest extremes appear to be in the west (darkest greens and darkest reds).

tm_shape(combined_data3,projection = 2163)+tm_polygons( 'Uninsured_Diff',midpoint=-3,border.col = "grey", border.alpha = .5,title="Percentage Difference by County")+tm_shape(counties)+ tm_borders(lwd = .36, col = "grey", alpha = .6)+tm_layout(inner.margins = c(0.1, 0.1, 0.05, 0.05))

Adding state borders

Most of the country shows small changes by county (between -5 and 5%). However, some states west of the Mississippi show a different trends than in the east. It appears that Texas and Idaho show the greatest extremes.

tm_shape(combined_data3,projection=2163)+tm_polygons("Uninsured_Diff",midpoint=0, border.col="grey",border.alpha = .5,title="Percentage Difference by County")+tm_shape(states)+tm_borders(lwd=.36,col="black",alpha=1)+tm_layout(inner.margins = c(0.1, 0.1, 0.05, 0.05))

Texas

Several counties in Texas show large differences. Looking at some of the darkest red areas, I see the largest differences by county are approximately Kenedy County, Kames County, and Concho County showing a decrease in people uninsured by 15 to 20%.

tx<-combined_data3%>%
subset(STATEFP==48)
tx<-tm_shape(tx, projection = 2163) + tm_polygons("Uninsured_Diff",id="Geo_QNAME.x", midpoint = 30, border.col = "grey", border.alpha = 1,title="Uninsured Difference %") +tm_text("NAME",size = "AREA")+ tm_borders(lwd = 1, col = "black", alpha = .5)+ tm_layout(legend.text.size =.5) 

tmap_leaflet(tx)

Idaho

Idaho is the only state that has a dark green county which is Clark County, showing a 10 to 15 % increase in people uninsured.

idaho<-combined_data3%>%
subset(STATEFP==16)
 
idaho<-tm_shape(idaho)+tm_polygons( "Uninsured_Diff",id="Geo_QNAME.x",midpoint=30,border.col = "grey", border.alpha = 1,title="Uninsured Difference %") +tm_text("NAME",size = "AREA")+ tm_borders(lwd = .28, col = "black", alpha = 1)+  tm_layout(legend.text.size =2)
tmap_leaflet(idaho)

Spatial vs Non-Spatial Differences: Summary

Considering the United States as a whole, the change is relatively small. For example, when examining the maps on a county by county basis, I can see that there are some changes that cannot be seen on a non-spatial plot. The spatial maps allow me to localize these changes on a map and evaluate the significance and see where the changes occur.

The non-spatial plots give me a total picture of these changes but are limiting in that they only display numerical evlauations and do not show the entire picture and leave out part of the story where the maps fill in the gaps.

Changing CB to FALSE

What happens when I change CB to false?

Changing CB to FALSE Example:

I see that the map below is much less detailed and there are counties added in bodies of water such as the great lakes, and the Long Island Sound is left out because Suffolk county is there instead. Therefore, I can conclude that changing cb to false makes the map less detailed than cb=true.

library(tidyverse)
library(dplyr)
library(sf)
library(tmap)
library(tigris)
library(spdep)
library(tidycensus)
options(tigris_class = "sf")
counties<-st_read("C:/R-3.5.2/tl_2016_us_county.shp", stringsAsFactors=FALSE)
counties <- counties(cb = FALSE)
library(readr)
healthinsurance2016 <- read_csv("C:/Users/abbys/Downloads/R12140862_SL050.csv", 
    col_types = cols(Geo_COUNTY = col_number(), 
        Geo_FIPS = col_integer(), STATEFP = col_number()))%>%
   dplyr::mutate(fips = Geo_FIPS, health2016=SE_T006_002)

healthinsurance2012 <- read_csv("C:/Users/abbys/Downloads/R12140759_SL050.csv", 
    col_types = cols(Geo_COUNTY = col_number(), 
        Geo_FIPS = col_integer(), STATEFP = col_number()))%>%
  dplyr::mutate(fips = Geo_FIPS,health2012=SE_T006_001)

combo_health<-left_join(healthinsurance2016,healthinsurance2012, by="fips")

counties <- counties %>% 
  dplyr::mutate(fips = parse_integer(GEOID))
combined_data <- counties %>% 
  left_join(combo_health,counties, by = "fips")

combined_data2 =combined_data %>%
  filter(STATEFP != "02") %>%
  filter(STATEFP != "15") %>%
  filter(STATEFP != "60") %>%
  filter(STATEFP != "66") %>%
  filter(STATEFP != "69") %>%
  filter(STATEFP != "72") %>%
  filter(STATEFP != "78")
library(tmaptools)
states<-combined_data2%>%
  aggregate_map(by="STATEFP")
combined_data3=combined_data2%>%
  group_by(GEOID)%>%
  dplyr::mutate(Uninsured_Diff=health2016-health2012)

tm_shape(combined_data3,projection=2163)+tm_polygons("Uninsured_Diff",midpoint=0, border.col="grey")+tm_shape(states)+tm_borders(lwd=.36,col="black",alpha=.4)