Notes

This report was generated on 2018-03-08 14:22:39. Output is available at https://graveja0.github.io/sccs

GitHub

The code and primary data for this project is available at https://github.com/graveja0/sccs.

Preparations

# from https://mran.revolutionanalytics.com/web/packages/checkpoint/vignettes/using-checkpoint-with-knitr.html
# if you don't need a package, remove it from here (commenting is probably not sufficient)
# tidyverse: see https://blog.rstudio.org/2016/09/15/tidyverse-1-0-0/

needed_packages <- c(
  "tidyverse","covr", "devtools", "rlang", "roxygen2", "shiny", "testthat",
  "purrr", "repurrrsive", "rstudioapi", "usethis",
  "rlang","MatchIt","Hmisc","rms","magrittr","stringr",
  "readxl","scales","jsonlite","forcats","lintr","haven",
  "janitor","stargazer","ggthemes","mice","VIM","DiagrammeR",
  "survminer","directlabels","knitr","usethis","devtools",
  "quantreg","rms","mice","rlang","designmatch","Matching","rgenoud",
  "cem","here","plotly","broom","patchwork"
)
source("./scripts/setup.R")
cat(paste0("library(",needed_packages,")\n"),file = "manifest.R")

# For .gitignore in ./analysis/
# .httr-oauth
# .google_API_keys
# *.Rproj
# *.Rproj.user
# *.Rhistory
# *.Rprofile
# *.html
# auth
# .~lock*
# manifest.R
# output/ignore/*
# input/ignore/*
# scripts/ignore/*

source("./scripts/checkpoint.R")
source("manifest.R")
unlink("manifest.R")
select <- dplyr::select
sessionInfo()

## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
## 
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] tcltk     grid      stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] patchwork_0.0.1           broom_0.4.3              
##  [3] plotly_4.7.1              here_0.1                 
##  [5] cem_1.1.17                rgenoud_5.8-1.0          
##  [7] Matching_4.9-2            designmatch_0.3.0        
##  [9] Rglpk_0.6-3               slam_0.1-42              
## [11] MASS_7.3-48               quantreg_5.34            
## [13] knitr_1.19                directlabels_2017.03.31  
## [15] survminer_0.4.2           ggpubr_0.1.6             
## [17] DiagrammeR_0.9.2          VIM_4.7.0                
## [19] data.table_1.10.4-3       colorspace_1.3-2         
## [21] mice_2.46.0               ggthemes_3.4.0           
## [23] stargazer_5.2.1           janitor_0.3.1            
## [25] haven_1.1.1               lintr_1.0.2              
## [27] jsonlite_1.5              scales_0.5.0.9000        
## [29] readxl_1.0.0              magrittr_1.5             
## [31] rms_5.1-2                 SparseM_1.77             
## [33] Hmisc_4.1-1               Formula_1.2-2            
## [35] survival_2.41-3           lattice_0.20-35          
## [37] MatchIt_3.0.2             usethis_1.2.0            
## [39] rstudioapi_0.7            repurrrsive_0.1.0        
## [41] testthat_2.0.0            shiny_1.0.5              
## [43] roxygen2_6.0.1            rlang_0.1.6.9003         
## [45] devtools_1.13.4           covr_3.0.1               
## [47] forcats_0.2.0             stringr_1.2.0            
## [49] dplyr_0.7.4               purrr_0.2.4              
## [51] readr_1.1.1               tidyr_0.8.0              
## [53] tibble_1.4.2              ggplot2_2.2.1.9000       
## [55] tidyverse_1.2.1           sccs.functions_0.0.0.9000
## 
## loaded via a namespace (and not attached):
##  [1] backports_1.1.2     plyr_1.8.4          igraph_1.1.2       
##  [4] lazyeval_0.2.1      sp_1.2-7            splines_3.4.3      
##  [7] TH.data_1.0-8       digest_0.6.15       htmltools_0.3.6    
## [10] viridis_0.5.0       checkmate_1.8.5     memoise_1.1.0      
## [13] cluster_2.0.6       modelr_0.1.1        sandwich_2.4-0     
## [16] rvest_0.3.2         crayon_1.3.4        lme4_1.1-15        
## [19] bindr_0.1           brew_1.0-6          zoo_1.8-1          
## [22] glue_1.2.0          gtable_0.2.0        MatrixModels_0.4-1 
## [25] car_2.1-6           Rook_1.1-1          DEoptimR_1.0-8     
## [28] mvtnorm_1.0-7       Rcpp_0.12.15        viridisLite_0.3.0  
## [31] xtable_1.8-2        cmprsk_2.2-7        laeken_0.4.6       
## [34] htmlTable_1.11.2    foreign_0.8-69      km.ci_0.5-2        
## [37] vcd_1.4-4           htmlwidgets_1.0     rex_1.1.2          
## [40] httr_1.3.1          RColorBrewer_1.1-2  acepack_1.4.1      
## [43] pkgconfig_2.0.1     XML_3.98-1.9        nnet_7.3-12        
## [46] reshape2_1.4.3      munsell_0.4.3       cellranger_1.1.0   
## [49] tools_3.4.3         visNetwork_2.0.3    downloader_0.4     
## [52] cli_1.0.0           evaluate_0.10.1     yaml_2.1.16        
## [55] robustbase_0.92-8   survMisc_0.5.4      randomForest_4.6-12
## [58] bindrcpp_0.2        nlme_3.1-131        mime_0.5           
## [61] xml2_1.2.0          compiler_3.4.3      pbkrtest_0.4-7     
## [64] rgexf_0.15.3        e1071_1.6-8         stringi_1.1.6      
## [67] Matrix_1.2-12       commonmark_1.4      psych_1.7.8        
## [70] nloptr_1.0.4        KMsurv_0.1-5        pillar_1.1.0       
## [73] combinat_0.0-8      lmtest_0.9-35       httpuv_1.3.5       
## [76] R6_2.2.2            latticeExtra_0.6-28 gridExtra_2.3      
## [79] codetools_0.2-15    polspline_1.1.12    boot_1.3-20        
## [82] assertthat_0.2.0    rprojroot_1.3-2     withr_2.1.1.9000   
## [85] mnormt_1.5-5        multcomp_1.4-8      mgcv_1.8-23        
## [88] expm_0.999-2        parallel_3.4.3      hms_0.4.1          
## [91] influenceR_0.1.0    quadprog_1.5-5      rpart_4.1-12       
## [94] class_7.3-14        minqa_1.2.4         rmarkdown_1.8      
## [97] lubridate_1.7.1     base64enc_0.1-3

Data Set Up

Read in and Tidy Data

nr.weight.ver <- "1-0" 
imputation.ver <- "1-0"
impute.data <- FALSE # This only needs to be run once
source("./scripts/read-in-and-tidy-data.R")

Define Baseline, Outcome and other Key Variables

source("./scripts/define-baseline-variables.R")
# TK: This needs to be redone to reflect the final samples -- 
  source("./scripts/consort-diagram-main-samples.R")

Imputation

# TK: These variables still need to be imputed.
    # ASPIRIN
    # FIBROIDSF1
    # Parkinsonsf1
    # msf1

# Imputation of Missing Values
xx.preimp <- xx0 %>% filter(idnumber %in% c(ids.ss.within.state,ids.ss.across.state.nonelderly))
impute.data = FALSE
source("./scripts/impute-missing-values.R")

# Adding in non-imputed variables 
not.in.xx <- c("idnumber",names(xx.preimp)[-(which(names(xx.preimp) %in% intersect(names(xx.preimp),names(xx.i))))])
xx <- xx.i %>% left_join(xx.preimp[,not.in.xx],"idnumber")

Define Samples

In this section we will define some key samples to use later in the analysis. Note that ALL samples are defined among FU1 responders in the 12 SCCS states, and include both FU3 responders as well as those who were lost to follow-up due to death. The FU3 survey nonresponders will be dealt with via nonresponse weights.

The Nonelderly Between-State sample are individuals aged 64 or younger as of the last vital status update (12/31/15).
The Within-State Medicare sample is broadly comprised of FU1 and FU3 responders (plus those who died). We identify Medicare status as of FU1 to ensure consistency in insurance coverage over time. W
The Within-State Nonelderly Medicare sample is a subset of the within-state Medicare sample: only those who were 64 or less as of the last vital status update (12/31/15).
The Within-State Nonelderly General Population and Clinic Population sample is also a subset of the within-state sample: only individuals 64 and younger who were either recruited at the health clinics, or via the general population mail survey.
We define a few sub-samples for robustness checks by only using those identified with FU1 income <400% FPL.

source("./scripts/define-samples.R")

# Merge in the Nonresponse weight
# TK This should probably be redone if we want to use the nonresponse weights in any analysis. 
# source("./scripts/nonresponse-weights.R")
load(file.path(fileoutputdir,paste0("nonresponse-weight-",nr.weight.ver,".RData")))
xx <-  xx %>% left_join(nr.weight, "idnumber") %>%
  mutate(w.FU3 = ifelse(is.na(w.FU3), 0, w.FU3))

xx <-
  xx %>% 
  mutate(
    insurancef1 = factor(insurancecoveragef1,labels=c("Uninsured","Insured")),
    insurancef2 = factor(ifelse(AFU1_DFU2_DFU3==1,3,insurancecoveragef2),
                         labels=c("Uninsured","Insured","Death")),
    insurancef3 = factor(ifelse(AFU1_DFU2_DFU3==1|AFU1_AFU2_DFU3==1,3,insurancecoveragef3),
                         labels=c("Uninsured","Insured","Death"))
  ) %>% 
  mutate(education=factor(education),
       sex = factor(sex),
       maritalstatus=factor(maritalstatus),
       employed = factor(employed),
       raceanalysis = factor(raceanalysis),
       insurancecoverage = factor(insurancecoverage),
       sf12v2healthf1 = factor(sf12v2healthf1))  %>% 
  # Survival censored at 2014
  mutate(jan1_2010_to_vs_months_c14 = jan1_2010_to_vs_months) %>% 
  mutate(vs_c14 = vitalstatus) %>% 
  mutate(vs_c14 = ifelse(jan1_2010_to_vs_months>480,"A",vs_c14)) %>% 
  mutate(jan1_2010_to_vs_months_c14 = ifelse(jan1_2010_to_vs_months>480,48,jan1_2010_to_vs_months))

Propensity Scores and Matching

# Set the following to true to run the various 
# matching algorithms (e.g., propensity score, 
# genetic matching, coarsened exact matching, etc.)

# TK rematch after final matching variables are chosen. 
perform_match <- FALSE
source("./scripts/match-samples.R")

Balance Diagnostics: Pre-Intervention Survival Differences

Between-State Nonelderly Sample

dd <- xx %>% mutate(surv = jan1_2010_to_vs_months_c14,
       cens = as.integer(vs_c14=="D"),
       tx = expansion_state,
       ps_weight = sample(1:1000,nrow(xx),replace=TRUE),
       cem_weight = sample(1:1000,nrow(xx),replace=TRUE)) %>% datadist() 
options(datadist = "dd")
par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.across.state.nonelderly, treatment = expansion_state)

Pre-Intervention Survival: Across State Comparisons

Within-State Sample: Newly-Elderly Medicare Controls

par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.newly.elderly.medicare, treatment = intervention)

Pre-Intervention Survival: Within-State Sample, Newly-Elderly Medicare Controls

The following plot shows how the survival differences change when pre-intervention general health status is not used in the matching or estimation of the propensity score.

par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.newly.elderly.medicare.nohealth, treatment = intervention)

NO PRE-INTERVENTION HEALTH STATUS: Pre-Intervention Survival: Within-State Sample, Newly-Elderly Medicare Controls

Within-State Sample: Higher-Income Nonelderly Controls

par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.genpop, treatment = intervention)

Pre-Intervention Survival: Within-State Sample, Higher Income Controls

Within-State Sample: Nonelderly Medicare Controls

par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.nonelderly.medicare, treatment = intervention)

Pre-Intervention Survival: Within-State Sample, Nonelderly Medicare Controls

** Given that the nonelderly Medicare sample does not appear to yield sufficient balance, I’m going to ignore them in the analysis for now.**

# Add composite health status.
source("./scripts/composite-sf12-score.R")
xx <-
  xx %>% left_join(sf12f1[,c("idnumber","sf12_physicalf1","sf12_mentalf1")], "idnumber") %>% left_join(sf12f3[,c("idnumber","sf12_physicalf3","sf12_mentalf3")], "idnumber")

sf12f1.unimputed %>% select(starts_with("i")) %>% select(-idnumber) %>% filter(row_number() %in% sample(seq(nrow(sf12f1)),200)) %>% ggplot_missing() -> p.sf12f1.unimp

sf12f1 %>% select(starts_with("i")) %>% select(-idnumber) %>% filter(row_number() %in% sample(seq(nrow(sf12f1)),200)) %>% ggplot_missing() ->
  p.sf12f1.imp

# Define samples 
ss.ps.neareld <- mm.newly.elderly.medicare %>% 
  pluck("df.ps.match") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))

ss.ps.genpop <- mm.genpop %>% 
  pluck("df.ps.match") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))

ss.ps.across <- mm.across.state.nonelderly %>% 
  pluck("df.ps.match") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.across.full <- mm.across.state.nonelderly %>% 
  pluck("df.unweighted") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))

ss.ps.neareld.nohealth <- mm.newly.elderly.medicare.nohealth %>% 
  pluck("df.ps.match") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.neareld.full <- mm.newly.elderly.medicare %>% 
  pluck("df.unweighted") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.genpop.full <- mm.genpop %>% 
  pluck("df.unweighted") %>% 
  left_join(xx,"idnumber") %>% 
  mutate(orig_idnumber = idnumber) %>% 
  mutate(idnumber = paste0(idnumber,"-",row_number()))

ss <- list(
      ss.ps.neareld = ss.ps.neareld,
      ss.ps.neareld.nohealth  = ss.ps.neareld.nohealth,
      ss.ps.neareld.full = ss.ps.neareld.full,
      ss.ps.genpop = ss.ps.genpop,
      ss.ps.genpop.full = ss.ps.genpop.full
)

Between-State (Medicaid Expansion) Analyses

Outcome: Insurance Coverage

We will first consider an between-state sample regression to estimate the differential impact of Medicaid expansion on a binary measure of being insured.

This is based on teh following regression specification:

\[ h(E(Y_{itk})) = \beta_0 + \beta_1 \textrm{Year}_t + \beta_2 \textrm{State}_k + \\ \beta_3 \textrm{Post-Expansion}_t \times \textrm{Expansion_State}_k + \beta_4 X_i \] where \(i\) indexes individuals, \(k\) indexes states, and \(t\) indexes time. \(\beta_3\) is the coefficient of interest. \(X_i\) are baseline patient characteristics (e.g., education, gender, race/ethnicity, etc.).

Due to the small number of clusters in our data (i.e., 4 expansion states and 8 nonexpansions states) we implement a permutation-based inference procedure whereby we permute state expansion status \(M = 300\) times. The density distribution of \(\hat \beta_3^m\) values from this procedure is then plotted, and the estimated coefficient in the observed data, i.e., \(\beta_3^{obs}\) is denoted by a vertical line.

The plot below provides the distribution of coefficient values for a linear probability regression specification for the binary insurance outcome that includes (black) and does not include (blue) baseline patient demographic characteristics (e.g., education, gender, marital status, employment status, race/ethnicity).

load_all("../sccs.functions")
RHS.unadj <- c("state")
RHS.adj <- c("education","sex","maritalstatus","employed","raceanalysis","state","vitalstatus_age")
f.insurance01.unadj <- fit_dd_across(insurance01, scale = FALSE, RHS = RHS.unadj, M = 300)
f.insurance01.adj <- fit_dd_across(insurance01, scale = FALSE, RHS = RHS.adj, M = 300)

f.insurance01.unadj$df %>% 
 ggplot(aes(x=estimate))+geom_density(colour="blue") + theme_tufte_revised()  +
    geom_line(data=f.insurance01.unadj$est_df,aes(x=estimate,y=y),lty=2,colour="blue")+
    xlab("Value of Difference-\nIn-Difference Estimate") + ylab("Density")+
    ggtitle(health_outcomes_lut["insurance01"])+
    geom_dl(data = f.insurance01.unadj$est_df , aes(label = label,y=y),method=list("last.points",rot=0),
            colour= "blue") + 
  geom_density(data=f.insurance01.adj$df,colour="black") +
  geom_line(data=f.insurance01.adj$est_df,aes(x=estimate,y=y),lty=2,colour="black")+
  geom_dl(data = f.insurance01.adj$est_df , aes(label = label,y=y),method=list("last.points",rot=0),colour="black") + xlim(c(-.1,.1))

General Health Outcomes

Towards an (Interpretable) DD Strategy for Categorial Health Outcomes

analysis_ver <- "between-ver1-0"
mainDir <- file.path(here::here(),"analysis")
subDir <- file.path(mainDir,"output","fit",analysis_ver)
run_analysis <- FALSE

if (!file.exists(subDir)){
    dir.create(subDir)
    run_analysis <- TRUE
}

usualsource <- 
  c("cat_1" = "CHC or Free Clinic",
    "cat_2" = "Private MD Office",
    "cat_3" = "Emergency Room",
    "cat_4" = "VA",
    "cat_5" = "Hospital (Not ED)",
    "cat_6" = "Other",
    "cat_7" = "No USOC")

ex_vg_g_fr_pr <- 
  c("cat_1" = "1. Excellent",
  "cat_2" = "2. Very Good",
  "cat_3" = "3. Good" ,
  "cat_4" =  "4. Fair",
  "cat_5" = "5. Poor")

all_most_some_little_none <- 
  c("cat_1" = "1 = All of the Time",
    "cat_2" = "2 = Most of the Time",
    "cat_3" = "3 = Some of the Time",
    "cat_4" = "4 = A Little of the Time",
    "cat_5" = "5 = None of the Time")

not_little_mod_quite_extreme <-
  c("cat_1" = "1 = Not at all",
    "cat_2" = "2 = A little bit",
    "cat_3" = "3 = Moderately",
    "cat_4" = "4 = Quite a bit",
    "cat_5" = "5 = Extremely")

lot_little_not <- 
  c("cat_1" = "1 = Limited a lot", 
    "cat_2" = "2 = Limited a little", 
    "cat_3" = "3 = Not limited at all")

load_all("../sccs.functions")

## Loading sccs.functions

if (run_analysis) {
  
  # Binary Outcomes
  f.insurance01 <- fit_dd_across(insurance01, scale = FALSE, RHS = RHS.adj, M = 300)
    saveRDS(f.insurance01,file=file.path(subDir,"fit_insurance01.rds"))
    
  f.fr_pr <- fit_dd_across(sf12v2health_fr_pr,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.fr_pr,file=file.path(subDir,"fit_sf12v2health_fr_pr.rds"))
  f.social <- fit_dd_across(sf12v2social_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.social,file=file.path(subDir,"fit_sf12v2social_all_most_some.rds"))
  
  f.pain <- fit_dd_across(sf12v2pain_mod_extreme,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.pain,file=file.path(subDir,"fit_sf12v2pain_mod_extreme.rds"))
  f.limitact <- fit_dd_across(sf12v2limitact_lot_little,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.limitact,file=file.path(subDir,"fit_sf12v2limitact_lot_little.rds"))
  f.limitstairs <- fit_dd_across(sf12v2limitstairs_lot_little,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.limitstairs,file=file.path(subDir,"fit_sf12v2limitstairs_lot_little.rds"))
  f.phyaccomplish <- fit_dd_across(sf12v2phyaccomplish_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.phyaccomplish,file=file.path(subDir,"fit_sf12v2phyaccomplish_all_most_some.rds"))
  f.phylimit <- fit_dd_across(sf12v2phylimit_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.phylimit,file=file.path(subDir,"fit_sf12v2phylimit_all_most_some.rds"))
    
  f.emaccomplish <- fit_dd_across(sf12v2emaccomplish_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.emaccomplish,file=file.path(subDir,"fit_sf12v2emaccomplish_all_most_some.rds"))
  f.emcareful <- fit_dd_across(sf12v2emcareful_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.emcareful,file=file.path(subDir,"fit_sf12v2emcareful_all_most_some.rds"))
  f.calm <- fit_dd_across(sf12v2calm_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.calm,file=file.path(subDir,"fit_sf12v2calm_all_most_some.rds"))
  f.energy <- fit_dd_across(sf12v2energy_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.energy,file=file.path(subDir,"fit_sf12v2energy_all_most_some.rds"))
  f.depressed <- fit_dd_across(sf12v2depressed_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.depressed,file=file.path(subDir,"fit_sf12v2depressed_all_most_some.rds"))

  f.nodoccost <- fit_dd_across(nodoctorcost, scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.nodoccost,file=file.path(subDir,"fit_nodoctorcost.rds"))
  f.usualsource <- fit_dd_across(usualsourceofcare_not_ed,scale = FALSE, M = 300, RHS = RHS.adj)
    saveRDS(f.usualsource,file=file.path(subDir,"fit_usualsourceofcare_not_ed.rds"))

# Categorical Outcomes
  f.sf12v2health <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2health,
    lut = ex_vg_g_fr_pr,
    M = 100
  )
  saveRDS(f.sf12v2health,file=file.path(subDir,"fit_sf12v2health.rds"))
  
  sf12v2social <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2social,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2social,file=file.path(subDir,"fit_sf12v2social.rds"))
  
  sf12v2pain <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2pain,
    lut = not_little_mod_quite_extreme,
    M = 100
  )
  saveRDS(sf12v2pain,file=file.path(subDir,"fit_sf12v2pain.rds"))
  
  sf12v2limitact <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2limitact,
    lut = lot_little_not,
    M = 100
  )
  saveRDS(sf12v2limitact,file=file.path(subDir,"fit_sf12v2limitact.rds"))
  
  sf12v2limitstairs <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2limitstairs,
    lut = lot_little_not,
    M = 100
  )
  saveRDS(sf12v2limitstairs,file=file.path(subDir,"fit_sf12v2limitstairs.rds"))
  
  sf12v2phyaccomplish <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2phyaccomplish,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2phyaccomplish,file=file.path(subDir,"fit_sf12v2phyaccomplish.rds"))
  
  sf12v2phylimit <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2phylimit,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2phylimit,file=file.path(subDir,"fit_sf12v2phylimit.rds"))
  
  
  sf12v2emaccomplish <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2emaccomplish,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2emaccomplish,file=file.path(subDir,"fit_sf12v2emaccomplish.rds"))
  
  sf12v2emcareful <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2emcareful,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2emcareful,file=file.path(subDir,"fit_sf12v2emcareful.rds"))
  
  sf12v2calm <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2calm,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2calm,file=file.path(subDir,"fit_sf12v2calm.rds"))
  
  sf12v2energy <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2energy,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2energy,file=file.path(subDir,"fit_sf12v2energy.rds"))
  
  sf12v2depressed <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
    outcome = sf12v2depressed,
    lut = all_most_some_little_none,
    M = 100
  )
  saveRDS(sf12v2depressed,file=file.path(subDir,"fit_sf12v2depressed.rds"))

  usualsourceofcare <- fit_dd_transition_across(
    df = ss.ps.across.full  %>% filter(vitalstatus == "A" & r_1==1 & r_3==1 & usualsourceofcaref3<8),
    outcome = usualsourceofcare,
    lut = usualsource,
    M = 100
  )
  saveRDS(usualsourceofcare ,file=file.path(subDir,"fit_usualsourceofcare.rds"))
  
} else {
  # Binary Outcomes
  fit_insurance01 <- readRDS(file.path(subDir,"fit_insurance01.rds"))
  fit_sf12v2health_fr_pr <- readRDS(file.path(subDir,"fit_sf12v2health_fr_pr.rds"))
  fit_sf12v2social_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2social_all_most_some.rds"))
  fit_sf12v2pain_mod_extreme <- readRDS(file.path(subDir,"fit_sf12v2pain_mod_extreme.rds"))
  fit_f12v2limitact_lot_little <- readRDS(file.path(subDir,"fit_sf12v2limitact_lot_little.rds"))
  fit_sf12v2limitstairs_lot_little <- readRDS(file.path(subDir,"fit_sf12v2limitstairs_lot_little.rds"))
  fit_sf12v2phyaccomplish_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2phyaccomplish_all_most_some.rds"))
  fit_sf12v2phylimit_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2phylimit_all_most_some.rds"))
  fit_sf12v2emaccomplish_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2emaccomplish_all_most_some.rds"))
  fit_sf12v2emcareful_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2emcareful_all_most_some.rds"))
  fit_sf12v2calm_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2calm_all_most_some.rds"))
  fit_sf12v2energy_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2energy_all_most_some.rds"))
  fit_sf12v2depressed_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2depressed_all_most_some.rds"))
  fit_usualsourceofcare_not_ed <- readRDS(file.path(subDir,"fit_usualsourceofcare_not_ed.rds"))
  fit_nodoctorcost <- readRDS(file.path(subDir,"fit_nodoctorcost.rds"))
  
  # Categorical Outcomes
  fit_sf12v2health <- readRDS(file.path(subDir,"fit_sf12v2health.rds"))
  fit_sf12v2social <- readRDS(file.path(subDir,"fit_sf12v2social.rds"))
  fit_sf12v2pain <- readRDS(file.path(subDir,"fit_sf12v2pain.rds"))
  fit_sf12v2limitact <- readRDS(file.path(subDir,"fit_sf12v2limitact.rds"))
  fit_sf12v2limitstairs <- readRDS(file.path(subDir,"fit_sf12v2limitstairs.rds"))
  fit_sf12v2phyaccomplish <- readRDS(file.path(subDir,"fit_sf12v2phyaccomplish.rds"))
  fit_sf12v2phylimit <- readRDS(file.path(subDir,"fit_sf12v2phylimit.rds"))
  fit_sf12v2emaccomplish <- readRDS(file.path(subDir,"fit_sf12v2emaccomplish.rds"))
  fit_sf12v2emcareful <- readRDS(file.path(subDir,"fit_sf12v2emcareful.rds"))
  fit_sf12v2calm <- readRDS(file.path(subDir,"fit_sf12v2calm.rds"))
  fit_sf12v2energy <- readRDS(file.path(subDir,"fit_sf12v2energy.rds"))
  fit_sf12v2depressed <- readRDS(file.path(subDir,"fit_sf12v2depressed.rds"))
  
  fit_usualsourceofcare <- readRDS(file.path(subDir,"fit_usualsourceofcare.rds"))
}

We’ll begin by considering a health outcome variable that maps nicely to a binary variable, so that we can see the difference between the binary vs. categorical outcome approach.

The SF12 question on activity limitations asks whether an individul’s health now limits them in moderate activities. The possible answers are:

Yes, limited a lot
Yes, limited a little
No, not limited at all.

Let’s first define binary outcome for whether the individual answered that a physical activity limitation limited them a lot or a little. We fit this binary outcome using the standard DD regression on the between-state sample, and then compare the estimted DD coefficient to the distribution of DD coefficients we obtain by permuting state expansion status \(M = 300\) times. For illustration I will fit a DD model with only state and year controls, but no other patient level controls.

Here, we can see that our estimate of -0.025 is relatively far out in the state cluster-based estimate of the sampling distribution. (Note that the adjsuted estimate of -.023 is quite similar.)

fit_binary <- fit_dd_across(sf12v2limitact_lot_little,scale = FALSE, M = 300, RHS = RHS.unadj)
fit_binary$plot

Next we will consider the longitudinal transitions among different possible “states” for this matrix.

We begin by simply looking at the marginal distribution of the variable by expansion state status at baseline (which for our purposes is follow-up 1). What’s clear here is that expansoin vs. nonexpansion groups are nicely balanced.

tt$marginal %>% set_names(c("Expansion State",lut)) %>% kable(caption = "Marginal (Baseline) Distribution of Physical Limitation Variable")

Marginal (Baseline) Distribution of Physical Limitation Variable
Expansion State	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
0	0.23408	0.27576	0.49016
1	0.22363	0.28210	0.49428

Next let’s consider the transitions from baseline to follwup by expansion state grouping. First is the observed (unadusted) transition matrix for individuals in the expansion states:

tt$health %>% filter(tx==1) %>% select(-tx) %>% mutate(health = lut) %>% 
  set_names(c("Physical Limitation",lut)) %>% kable(caption = "Transition Matrix: Expansion States")

Transition Matrix: Expansion States
Physical Limitation	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
1 = Limited a lot	0.58388	0.28322	0.13290
2 = Limited a little	0.27547	0.46028	0.26425
3 = Not limited at all	0.10843	0.22967	0.66190

Next is the observed transition matrix for individuals in the nonexpansion states.

tt$health %>% filter(tx==0) %>% select(-tx) %>% mutate(health = lut) %>% 
  set_names(c("Physical Limitation",lut)) %>% kable(caption = "Transition Matrix: Nonexpansion States")

Transition Matrix: Nonexpansion States
Physical Limitation	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
1 = Limited a lot	0.53658	0.33366	0.12976
2 = Limited a little	0.30374	0.46839	0.22787
3 = Not limited at all	0.12536	0.23886	0.63579

Finally, we will take the difference between these two.

tt$diff %>% select(-tx) %>% mutate(health = lut) %>% 
  set_names(c("Difference",lut)) %>% kable(caption = "Transition Difference Matrix")

Transition Difference Matrix
Difference	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
1 = Limited a lot	0.04730	-0.05044	0.00314
2 = Limited a little	-0.02826	-0.00811	0.03638
3 = Not limited at all	-0.01693	-0.00919	0.02611

Now we will fit a difference-in-difference model without any covariates to try to recover and match this transition difference matrix. One way to think about this is that we could fit a separate DD model on binary outcomes for each of the transition types. However, there is a more systematic way to do it using a standard regression specification, fully interacted with the baseline health outcome measure, and fit using multiple multivariate (linear) regression (MMR). I will call this approach a multiple multivariate difference-in-differences model, or *MMDD**.

For now the MMDD model is essentially the standard \(\textrm{post} + \textrm{treatment} + \textrm{post} * \textrm{treatment}\) model, which is then fully interacted with the baseline health outcome value. We fit this model to the outcome, then utilize the predicted values from this model to construct the full (adjusted) transition probability matrix.

The transition matrix below uses a linear probability MMDD model without any patient controls. As can be seen, this approach fully recovers the observed transition probability matrix above.

dd$diff_adj %>% mutate(category = lut) %>% set_colnames(c("Adjusted Difference",lut)) %>% kable(caption = "Adjusted Transition Difference Matrix")

Adjusted Transition Difference Matrix
Adjusted Difference	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
1 = Limited a lot	0.04730	-0.05044	0.00314
2 = Limited a little	-0.02826	-0.00811	0.03638
3 = Not limited at all	-0.01693	-0.00919	0.02611

The next step in the process is to combine the marginal outcome distributions with the transition probability matricies to be able to say something about how the likelihood of being in a given category changes with and without Medicaid expansion.

We can obtain from the table above the marginal distribution (at baseline) of the outcome for the treated group. We then use matrix algebra and multiply this by the transtion probability matrix for expansion states to recover the (observed) ex post distribution of the outcome.

A %*% B %>% set_rownames("Ex Post (Observed)") %>% set_colnames(lut) %>% kable(caption = "Observed Ex Post Distribution")

Observed Ex Post Distribution
	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
Ex Post (Observed)	0.26188	0.3067	0.43143

Similarly, we can construct an estimate of the counterfactual by subtracting our differences-in-differences estimate of the impact of expansion on the transitions.

A %*% (B - DD) %>% set_rownames("Ex Post (Counterfactual)") %>% set_colnames(lut) %>% kable(caption = "Counterfactual Ex Post Distribution")

Counterfactual Ex Post Distribution
	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
Ex Post (Counterfactual)	0.26764	0.32481	0.40755

The difference between these two provides an estimate of the impact of the expansion on each of the outcome categories:

(A %*% B - A %*% (B - DD) ) %>% set_rownames("Ex Post (Counterfactual)") %>% set_colnames(lut) %>% kable(caption = "Expansion Effect")

Expansion Effect
	1 = Limited a lot	2 = Limited a little	3 = Not limited at all
Ex Post (Counterfactual)	-0.00576	-0.01811	0.02387

Note that the estimated effect on the “Not Limited At All Category” of 0.024 is nearly identical to the DD estimate we obtained based on the binary outcome analysis above (-0.025). (The sign is different because that binary outcome was defined in terms of having a physical limitation, not in terms of not having a limitation.)

The next step in the process is just to fold in the state cluster-based permutation inference procedure to say something about how extreme these estimates are relative to the overall sampling distribution. This procedure takes a decent amount of time to run so the below plot is only based on M = 100 permutations; I plan to update with more for next time.

fit_sf12v2limitact$plot

Finally, we can also run repeated applications of this process to see where things converge to. (This is similar to the anlaysis that Laura did a few months back).

est_df <- fit_sf12v2limitact$estimate %>% mutate()

fit_sf12v2limitact$estimate %>% 
    gather(key,value,-markov_iteration,-category) %>%
    mutate(label = case_when (
      key == "Y_Tx1" ~ "Expansion",
      key == "Y_Tx0" ~ "No\nExpansion",
      key == "Y_diff" ~ "Difference"
    )) %>% 
    ggplot(aes(x=markov_iteration)) +
    geom_line(aes(y = value, lty=key)) + theme_tufte_revised() +
    facet_wrap(~category) +
    geom_hline(aes(yintercept = 0), lty=1, colour="grey") +
    scale_linetype_manual(labels = c("Difference","Without Expansion","With Expansion"), values = c(3,2,1), guide= FALSE) +
    geom_dl(aes(label = label,y=value),method = list("maxvar.qp",cex=.4)) + xlim(c(-1,12))

Usual Source of Care

One interesting thing we can do is look to see how any changes in usual source of care came about. The following is the (adjusted) transition DD matrix among categories of usual source of care.

dd$diff_adj %>% mutate(category = lut) %>% set_colnames(c("Adjusted Difference",lut)) %>% kable(caption = "Adjusted Transition Difference Matrix")

Adjusted Transition Difference Matrix
Adjusted Difference	CHC or Free Clinic	Private MD Office	Emergency Room	VA	Hospital (Not ED)	Other	No USOC
CHC or Free Clinic	0.01861	0.01634	-0.00996	-0.00048	-0.00752	0.00066	-0.01764
Private MD Office	-0.00947	0.03729	-0.00491	-0.00487	-0.00910	-0.00218	-0.00676
Emergency Room	0.02794	-0.01014	0.00426	-0.02259	0.01734	0.02350	-0.04033
VA	-0.01891	-0.04129	0.00439	0.05147	-0.00005	0.00109	0.00331
Hospital (Not ED)	0.06549	0.08299	0.00099	-0.01263	-0.10174	-0.01435	-0.02076
Other	0.01152	0.01906	0.01049	-0.02381	-0.05598	0.02426	0.01447
No USOC	-0.11040	0.13053	-0.01648	0.00510	0.00148	0.02623	-0.03647

fit_usualsourceofcare$plot + ggtitle(health_outcomes_lut["usualsourceofcare"])

Did Not See MD Due to Cost

fit_nodoctorcost$plot + ggtitle(health_outcomes_lut["nodoctorcost"])

SF12 Measures

fit_sf12v2health$plot + ggtitle(health_outcomes_lut["sf12v2health"])

## Warning: Removed 60 rows containing non-finite values (stat_density).

fit_sf12v2limitact$plot + ggtitle(health_outcomes_lut["sf12v2limitact"])

fit_sf12v2limitstairs$plot + ggtitle(health_outcomes_lut["sf12v2limitstairs"])

fit_sf12v2phyaccomplish$plot + ggtitle(health_outcomes_lut["sf12v2phyaccomplish"])

fit_sf12v2phylimit$plot + ggtitle(health_outcomes_lut["sf12v2phylimit"])

fit_sf12v2emaccomplish$plot + ggtitle(health_outcomes_lut["sf12v2emaccomplish"])

fit_sf12v2emcareful$plot + ggtitle(health_outcomes_lut["sf12v2emcareful"])

fit_sf12v2pain$plot + ggtitle(health_outcomes_lut["sf12v2pain"])

fit_sf12v2calm$plot + ggtitle(health_outcomes_lut["sf12v2calm"])

fit_sf12v2energy$plot + ggtitle(health_outcomes_lut["sf12v2energy"])

fit_sf12v2depressed$plot + ggtitle(health_outcomes_lut["sf12v2depressed"])

fit_sf12v2social$plot + ggtitle(health_outcomes_lut["sf12v2social"])

Effects of Expanded Coverage on Access, Health Care and Health in the South

Primary Analytic File