This report was generated on 2018-03-08 14:22:39. Output is available at https://graveja0.github.io/sccs
The code and primary data for this project is available at https://github.com/graveja0/sccs.
# from https://mran.revolutionanalytics.com/web/packages/checkpoint/vignettes/using-checkpoint-with-knitr.html
# if you don't need a package, remove it from here (commenting is probably not sufficient)
# tidyverse: see https://blog.rstudio.org/2016/09/15/tidyverse-1-0-0/
needed_packages <- c(
"tidyverse","covr", "devtools", "rlang", "roxygen2", "shiny", "testthat",
"purrr", "repurrrsive", "rstudioapi", "usethis",
"rlang","MatchIt","Hmisc","rms","magrittr","stringr",
"readxl","scales","jsonlite","forcats","lintr","haven",
"janitor","stargazer","ggthemes","mice","VIM","DiagrammeR",
"survminer","directlabels","knitr","usethis","devtools",
"quantreg","rms","mice","rlang","designmatch","Matching","rgenoud",
"cem","here","plotly","broom","patchwork"
)
source("./scripts/setup.R")
cat(paste0("library(",needed_packages,")\n"),file = "manifest.R")
# For .gitignore in ./analysis/
# .httr-oauth
# .google_API_keys
# *.Rproj
# *.Rproj.user
# *.Rhistory
# *.Rprofile
# *.html
# auth
# .~lock*
# manifest.R
# output/ignore/*
# input/ignore/*
# scripts/ignore/*
source("./scripts/checkpoint.R")
source("manifest.R")
unlink("manifest.R")
select <- dplyr::select
sessionInfo()
## R version 3.4.3 (2017-11-30)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: macOS High Sierra 10.13.3
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] tcltk grid stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] patchwork_0.0.1 broom_0.4.3
## [3] plotly_4.7.1 here_0.1
## [5] cem_1.1.17 rgenoud_5.8-1.0
## [7] Matching_4.9-2 designmatch_0.3.0
## [9] Rglpk_0.6-3 slam_0.1-42
## [11] MASS_7.3-48 quantreg_5.34
## [13] knitr_1.19 directlabels_2017.03.31
## [15] survminer_0.4.2 ggpubr_0.1.6
## [17] DiagrammeR_0.9.2 VIM_4.7.0
## [19] data.table_1.10.4-3 colorspace_1.3-2
## [21] mice_2.46.0 ggthemes_3.4.0
## [23] stargazer_5.2.1 janitor_0.3.1
## [25] haven_1.1.1 lintr_1.0.2
## [27] jsonlite_1.5 scales_0.5.0.9000
## [29] readxl_1.0.0 magrittr_1.5
## [31] rms_5.1-2 SparseM_1.77
## [33] Hmisc_4.1-1 Formula_1.2-2
## [35] survival_2.41-3 lattice_0.20-35
## [37] MatchIt_3.0.2 usethis_1.2.0
## [39] rstudioapi_0.7 repurrrsive_0.1.0
## [41] testthat_2.0.0 shiny_1.0.5
## [43] roxygen2_6.0.1 rlang_0.1.6.9003
## [45] devtools_1.13.4 covr_3.0.1
## [47] forcats_0.2.0 stringr_1.2.0
## [49] dplyr_0.7.4 purrr_0.2.4
## [51] readr_1.1.1 tidyr_0.8.0
## [53] tibble_1.4.2 ggplot2_2.2.1.9000
## [55] tidyverse_1.2.1 sccs.functions_0.0.0.9000
##
## loaded via a namespace (and not attached):
## [1] backports_1.1.2 plyr_1.8.4 igraph_1.1.2
## [4] lazyeval_0.2.1 sp_1.2-7 splines_3.4.3
## [7] TH.data_1.0-8 digest_0.6.15 htmltools_0.3.6
## [10] viridis_0.5.0 checkmate_1.8.5 memoise_1.1.0
## [13] cluster_2.0.6 modelr_0.1.1 sandwich_2.4-0
## [16] rvest_0.3.2 crayon_1.3.4 lme4_1.1-15
## [19] bindr_0.1 brew_1.0-6 zoo_1.8-1
## [22] glue_1.2.0 gtable_0.2.0 MatrixModels_0.4-1
## [25] car_2.1-6 Rook_1.1-1 DEoptimR_1.0-8
## [28] mvtnorm_1.0-7 Rcpp_0.12.15 viridisLite_0.3.0
## [31] xtable_1.8-2 cmprsk_2.2-7 laeken_0.4.6
## [34] htmlTable_1.11.2 foreign_0.8-69 km.ci_0.5-2
## [37] vcd_1.4-4 htmlwidgets_1.0 rex_1.1.2
## [40] httr_1.3.1 RColorBrewer_1.1-2 acepack_1.4.1
## [43] pkgconfig_2.0.1 XML_3.98-1.9 nnet_7.3-12
## [46] reshape2_1.4.3 munsell_0.4.3 cellranger_1.1.0
## [49] tools_3.4.3 visNetwork_2.0.3 downloader_0.4
## [52] cli_1.0.0 evaluate_0.10.1 yaml_2.1.16
## [55] robustbase_0.92-8 survMisc_0.5.4 randomForest_4.6-12
## [58] bindrcpp_0.2 nlme_3.1-131 mime_0.5
## [61] xml2_1.2.0 compiler_3.4.3 pbkrtest_0.4-7
## [64] rgexf_0.15.3 e1071_1.6-8 stringi_1.1.6
## [67] Matrix_1.2-12 commonmark_1.4 psych_1.7.8
## [70] nloptr_1.0.4 KMsurv_0.1-5 pillar_1.1.0
## [73] combinat_0.0-8 lmtest_0.9-35 httpuv_1.3.5
## [76] R6_2.2.2 latticeExtra_0.6-28 gridExtra_2.3
## [79] codetools_0.2-15 polspline_1.1.12 boot_1.3-20
## [82] assertthat_0.2.0 rprojroot_1.3-2 withr_2.1.1.9000
## [85] mnormt_1.5-5 multcomp_1.4-8 mgcv_1.8-23
## [88] expm_0.999-2 parallel_3.4.3 hms_0.4.1
## [91] influenceR_0.1.0 quadprog_1.5-5 rpart_4.1-12
## [94] class_7.3-14 minqa_1.2.4 rmarkdown_1.8
## [97] lubridate_1.7.1 base64enc_0.1-3
nr.weight.ver <- "1-0"
imputation.ver <- "1-0"
impute.data <- FALSE # This only needs to be run once
source("./scripts/read-in-and-tidy-data.R")
source("./scripts/define-baseline-variables.R")
# TK: This needs to be redone to reflect the final samples --
source("./scripts/consort-diagram-main-samples.R")
# TK: These variables still need to be imputed.
# ASPIRIN
# FIBROIDSF1
# Parkinsonsf1
# msf1
# Imputation of Missing Values
xx.preimp <- xx0 %>% filter(idnumber %in% c(ids.ss.within.state,ids.ss.across.state.nonelderly))
impute.data = FALSE
source("./scripts/impute-missing-values.R")
# Adding in non-imputed variables
not.in.xx <- c("idnumber",names(xx.preimp)[-(which(names(xx.preimp) %in% intersect(names(xx.preimp),names(xx.i))))])
xx <- xx.i %>% left_join(xx.preimp[,not.in.xx],"idnumber")
In this section we will define some key samples to use later in the analysis. Note that ALL samples are defined among FU1 responders in the 12 SCCS states, and include both FU3 responders as well as those who were lost to follow-up due to death. The FU3 survey nonresponders will be dealt with via nonresponse weights.
The Nonelderly Between-State sample are individuals aged 64 or younger as of the last vital status update (12/31/15).
The Within-State Medicare sample is broadly comprised of FU1 and FU3 responders (plus those who died). We identify Medicare status as of FU1 to ensure consistency in insurance coverage over time. W
The Within-State Nonelderly Medicare sample is a subset of the within-state Medicare sample: only those who were 64 or less as of the last vital status update (12/31/15).
The Within-State Nonelderly General Population and Clinic Population sample is also a subset of the within-state sample: only individuals 64 and younger who were either recruited at the health clinics, or via the general population mail survey.
We define a few sub-samples for robustness checks by only using those identified with FU1 income <400% FPL.
source("./scripts/define-samples.R")
# Merge in the Nonresponse weight
# TK This should probably be redone if we want to use the nonresponse weights in any analysis.
# source("./scripts/nonresponse-weights.R")
load(file.path(fileoutputdir,paste0("nonresponse-weight-",nr.weight.ver,".RData")))
xx <- xx %>% left_join(nr.weight, "idnumber") %>%
mutate(w.FU3 = ifelse(is.na(w.FU3), 0, w.FU3))
xx <-
xx %>%
mutate(
insurancef1 = factor(insurancecoveragef1,labels=c("Uninsured","Insured")),
insurancef2 = factor(ifelse(AFU1_DFU2_DFU3==1,3,insurancecoveragef2),
labels=c("Uninsured","Insured","Death")),
insurancef3 = factor(ifelse(AFU1_DFU2_DFU3==1|AFU1_AFU2_DFU3==1,3,insurancecoveragef3),
labels=c("Uninsured","Insured","Death"))
) %>%
mutate(education=factor(education),
sex = factor(sex),
maritalstatus=factor(maritalstatus),
employed = factor(employed),
raceanalysis = factor(raceanalysis),
insurancecoverage = factor(insurancecoverage),
sf12v2healthf1 = factor(sf12v2healthf1)) %>%
# Survival censored at 2014
mutate(jan1_2010_to_vs_months_c14 = jan1_2010_to_vs_months) %>%
mutate(vs_c14 = vitalstatus) %>%
mutate(vs_c14 = ifelse(jan1_2010_to_vs_months>480,"A",vs_c14)) %>%
mutate(jan1_2010_to_vs_months_c14 = ifelse(jan1_2010_to_vs_months>480,48,jan1_2010_to_vs_months))
# Set the following to true to run the various
# matching algorithms (e.g., propensity score,
# genetic matching, coarsened exact matching, etc.)
# TK rematch after final matching variables are chosen.
perform_match <- FALSE
source("./scripts/match-samples.R")
dd <- xx %>% mutate(surv = jan1_2010_to_vs_months_c14,
cens = as.integer(vs_c14=="D"),
tx = expansion_state,
ps_weight = sample(1:1000,nrow(xx),replace=TRUE),
cem_weight = sample(1:1000,nrow(xx),replace=TRUE)) %>% datadist()
options(datadist = "dd")
par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.across.state.nonelderly, treatment = expansion_state)
Pre-Intervention Survival: Across State Comparisons
par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.newly.elderly.medicare, treatment = intervention)
Pre-Intervention Survival: Within-State Sample, Newly-Elderly Medicare Controls
The following plot shows how the survival differences change when pre-intervention general health status is not used in the matching or estimation of the propensity score.
par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.newly.elderly.medicare.nohealth, treatment = intervention)
NO PRE-INTERVENTION HEALTH STATUS: Pre-Intervention Survival: Within-State Sample, Newly-Elderly Medicare Controls
par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.genpop, treatment = intervention)
Pre-Intervention Survival: Within-State Sample, Higher Income Controls
par(mfrow=c(2,4))
xx %>% look_at_survival_by_sample(match = mm.nonelderly.medicare, treatment = intervention)
Pre-Intervention Survival: Within-State Sample, Nonelderly Medicare Controls
** Given that the nonelderly Medicare sample does not appear to yield sufficient balance, I’m going to ignore them in the analysis for now.**
# Add composite health status.
source("./scripts/composite-sf12-score.R")
xx <-
xx %>% left_join(sf12f1[,c("idnumber","sf12_physicalf1","sf12_mentalf1")], "idnumber") %>% left_join(sf12f3[,c("idnumber","sf12_physicalf3","sf12_mentalf3")], "idnumber")
sf12f1.unimputed %>% select(starts_with("i")) %>% select(-idnumber) %>% filter(row_number() %in% sample(seq(nrow(sf12f1)),200)) %>% ggplot_missing() -> p.sf12f1.unimp
sf12f1 %>% select(starts_with("i")) %>% select(-idnumber) %>% filter(row_number() %in% sample(seq(nrow(sf12f1)),200)) %>% ggplot_missing() ->
p.sf12f1.imp
# Define samples
ss.ps.neareld <- mm.newly.elderly.medicare %>%
pluck("df.ps.match") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.genpop <- mm.genpop %>%
pluck("df.ps.match") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.across <- mm.across.state.nonelderly %>%
pluck("df.ps.match") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.across.full <- mm.across.state.nonelderly %>%
pluck("df.unweighted") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.neareld.nohealth <- mm.newly.elderly.medicare.nohealth %>%
pluck("df.ps.match") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.neareld.full <- mm.newly.elderly.medicare %>%
pluck("df.unweighted") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss.ps.genpop.full <- mm.genpop %>%
pluck("df.unweighted") %>%
left_join(xx,"idnumber") %>%
mutate(orig_idnumber = idnumber) %>%
mutate(idnumber = paste0(idnumber,"-",row_number()))
ss <- list(
ss.ps.neareld = ss.ps.neareld,
ss.ps.neareld.nohealth = ss.ps.neareld.nohealth,
ss.ps.neareld.full = ss.ps.neareld.full,
ss.ps.genpop = ss.ps.genpop,
ss.ps.genpop.full = ss.ps.genpop.full
)
We will first consider an between-state sample regression to estimate the differential impact of Medicaid expansion on a binary measure of being insured.
This is based on teh following regression specification:
\[ h(E(Y_{itk})) = \beta_0 + \beta_1 \textrm{Year}_t + \beta_2 \textrm{State}_k + \\ \beta_3 \textrm{Post-Expansion}_t \times \textrm{Expansion_State}_k + \beta_4 X_i \] where \(i\) indexes individuals, \(k\) indexes states, and \(t\) indexes time. \(\beta_3\) is the coefficient of interest. \(X_i\) are baseline patient characteristics (e.g., education, gender, race/ethnicity, etc.).
Due to the small number of clusters in our data (i.e., 4 expansion states and 8 nonexpansions states) we implement a permutation-based inference procedure whereby we permute state expansion status \(M = 300\) times. The density distribution of \(\hat \beta_3^m\) values from this procedure is then plotted, and the estimated coefficient in the observed data, i.e., \(\beta_3^{obs}\) is denoted by a vertical line.
The plot below provides the distribution of coefficient values for a linear probability regression specification for the binary insurance outcome that includes (black) and does not include (blue) baseline patient demographic characteristics (e.g., education, gender, marital status, employment status, race/ethnicity).
load_all("../sccs.functions")
RHS.unadj <- c("state")
RHS.adj <- c("education","sex","maritalstatus","employed","raceanalysis","state","vitalstatus_age")
f.insurance01.unadj <- fit_dd_across(insurance01, scale = FALSE, RHS = RHS.unadj, M = 300)
f.insurance01.adj <- fit_dd_across(insurance01, scale = FALSE, RHS = RHS.adj, M = 300)
f.insurance01.unadj$df %>%
ggplot(aes(x=estimate))+geom_density(colour="blue") + theme_tufte_revised() +
geom_line(data=f.insurance01.unadj$est_df,aes(x=estimate,y=y),lty=2,colour="blue")+
xlab("Value of Difference-\nIn-Difference Estimate") + ylab("Density")+
ggtitle(health_outcomes_lut["insurance01"])+
geom_dl(data = f.insurance01.unadj$est_df , aes(label = label,y=y),method=list("last.points",rot=0),
colour= "blue") +
geom_density(data=f.insurance01.adj$df,colour="black") +
geom_line(data=f.insurance01.adj$est_df,aes(x=estimate,y=y),lty=2,colour="black")+
geom_dl(data = f.insurance01.adj$est_df , aes(label = label,y=y),method=list("last.points",rot=0),colour="black") + xlim(c(-.1,.1))
analysis_ver <- "between-ver1-0"
mainDir <- file.path(here::here(),"analysis")
subDir <- file.path(mainDir,"output","fit",analysis_ver)
run_analysis <- FALSE
if (!file.exists(subDir)){
dir.create(subDir)
run_analysis <- TRUE
}
usualsource <-
c("cat_1" = "CHC or Free Clinic",
"cat_2" = "Private MD Office",
"cat_3" = "Emergency Room",
"cat_4" = "VA",
"cat_5" = "Hospital (Not ED)",
"cat_6" = "Other",
"cat_7" = "No USOC")
ex_vg_g_fr_pr <-
c("cat_1" = "1. Excellent",
"cat_2" = "2. Very Good",
"cat_3" = "3. Good" ,
"cat_4" = "4. Fair",
"cat_5" = "5. Poor")
all_most_some_little_none <-
c("cat_1" = "1 = All of the Time",
"cat_2" = "2 = Most of the Time",
"cat_3" = "3 = Some of the Time",
"cat_4" = "4 = A Little of the Time",
"cat_5" = "5 = None of the Time")
not_little_mod_quite_extreme <-
c("cat_1" = "1 = Not at all",
"cat_2" = "2 = A little bit",
"cat_3" = "3 = Moderately",
"cat_4" = "4 = Quite a bit",
"cat_5" = "5 = Extremely")
lot_little_not <-
c("cat_1" = "1 = Limited a lot",
"cat_2" = "2 = Limited a little",
"cat_3" = "3 = Not limited at all")
load_all("../sccs.functions")
## Loading sccs.functions
if (run_analysis) {
# Binary Outcomes
f.insurance01 <- fit_dd_across(insurance01, scale = FALSE, RHS = RHS.adj, M = 300)
saveRDS(f.insurance01,file=file.path(subDir,"fit_insurance01.rds"))
f.fr_pr <- fit_dd_across(sf12v2health_fr_pr,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.fr_pr,file=file.path(subDir,"fit_sf12v2health_fr_pr.rds"))
f.social <- fit_dd_across(sf12v2social_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.social,file=file.path(subDir,"fit_sf12v2social_all_most_some.rds"))
f.pain <- fit_dd_across(sf12v2pain_mod_extreme,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.pain,file=file.path(subDir,"fit_sf12v2pain_mod_extreme.rds"))
f.limitact <- fit_dd_across(sf12v2limitact_lot_little,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.limitact,file=file.path(subDir,"fit_sf12v2limitact_lot_little.rds"))
f.limitstairs <- fit_dd_across(sf12v2limitstairs_lot_little,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.limitstairs,file=file.path(subDir,"fit_sf12v2limitstairs_lot_little.rds"))
f.phyaccomplish <- fit_dd_across(sf12v2phyaccomplish_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.phyaccomplish,file=file.path(subDir,"fit_sf12v2phyaccomplish_all_most_some.rds"))
f.phylimit <- fit_dd_across(sf12v2phylimit_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.phylimit,file=file.path(subDir,"fit_sf12v2phylimit_all_most_some.rds"))
f.emaccomplish <- fit_dd_across(sf12v2emaccomplish_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.emaccomplish,file=file.path(subDir,"fit_sf12v2emaccomplish_all_most_some.rds"))
f.emcareful <- fit_dd_across(sf12v2emcareful_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.emcareful,file=file.path(subDir,"fit_sf12v2emcareful_all_most_some.rds"))
f.calm <- fit_dd_across(sf12v2calm_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.calm,file=file.path(subDir,"fit_sf12v2calm_all_most_some.rds"))
f.energy <- fit_dd_across(sf12v2energy_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.energy,file=file.path(subDir,"fit_sf12v2energy_all_most_some.rds"))
f.depressed <- fit_dd_across(sf12v2depressed_all_most_some,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.depressed,file=file.path(subDir,"fit_sf12v2depressed_all_most_some.rds"))
f.nodoccost <- fit_dd_across(nodoctorcost, scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.nodoccost,file=file.path(subDir,"fit_nodoctorcost.rds"))
f.usualsource <- fit_dd_across(usualsourceofcare_not_ed,scale = FALSE, M = 300, RHS = RHS.adj)
saveRDS(f.usualsource,file=file.path(subDir,"fit_usualsourceofcare_not_ed.rds"))
# Categorical Outcomes
f.sf12v2health <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2health,
lut = ex_vg_g_fr_pr,
M = 100
)
saveRDS(f.sf12v2health,file=file.path(subDir,"fit_sf12v2health.rds"))
sf12v2social <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2social,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2social,file=file.path(subDir,"fit_sf12v2social.rds"))
sf12v2pain <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2pain,
lut = not_little_mod_quite_extreme,
M = 100
)
saveRDS(sf12v2pain,file=file.path(subDir,"fit_sf12v2pain.rds"))
sf12v2limitact <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2limitact,
lut = lot_little_not,
M = 100
)
saveRDS(sf12v2limitact,file=file.path(subDir,"fit_sf12v2limitact.rds"))
sf12v2limitstairs <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2limitstairs,
lut = lot_little_not,
M = 100
)
saveRDS(sf12v2limitstairs,file=file.path(subDir,"fit_sf12v2limitstairs.rds"))
sf12v2phyaccomplish <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2phyaccomplish,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2phyaccomplish,file=file.path(subDir,"fit_sf12v2phyaccomplish.rds"))
sf12v2phylimit <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2phylimit,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2phylimit,file=file.path(subDir,"fit_sf12v2phylimit.rds"))
sf12v2emaccomplish <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2emaccomplish,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2emaccomplish,file=file.path(subDir,"fit_sf12v2emaccomplish.rds"))
sf12v2emcareful <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2emcareful,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2emcareful,file=file.path(subDir,"fit_sf12v2emcareful.rds"))
sf12v2calm <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2calm,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2calm,file=file.path(subDir,"fit_sf12v2calm.rds"))
sf12v2energy <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2energy,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2energy,file=file.path(subDir,"fit_sf12v2energy.rds"))
sf12v2depressed <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1),
outcome = sf12v2depressed,
lut = all_most_some_little_none,
M = 100
)
saveRDS(sf12v2depressed,file=file.path(subDir,"fit_sf12v2depressed.rds"))
usualsourceofcare <- fit_dd_transition_across(
df = ss.ps.across.full %>% filter(vitalstatus == "A" & r_1==1 & r_3==1 & usualsourceofcaref3<8),
outcome = usualsourceofcare,
lut = usualsource,
M = 100
)
saveRDS(usualsourceofcare ,file=file.path(subDir,"fit_usualsourceofcare.rds"))
} else {
# Binary Outcomes
fit_insurance01 <- readRDS(file.path(subDir,"fit_insurance01.rds"))
fit_sf12v2health_fr_pr <- readRDS(file.path(subDir,"fit_sf12v2health_fr_pr.rds"))
fit_sf12v2social_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2social_all_most_some.rds"))
fit_sf12v2pain_mod_extreme <- readRDS(file.path(subDir,"fit_sf12v2pain_mod_extreme.rds"))
fit_f12v2limitact_lot_little <- readRDS(file.path(subDir,"fit_sf12v2limitact_lot_little.rds"))
fit_sf12v2limitstairs_lot_little <- readRDS(file.path(subDir,"fit_sf12v2limitstairs_lot_little.rds"))
fit_sf12v2phyaccomplish_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2phyaccomplish_all_most_some.rds"))
fit_sf12v2phylimit_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2phylimit_all_most_some.rds"))
fit_sf12v2emaccomplish_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2emaccomplish_all_most_some.rds"))
fit_sf12v2emcareful_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2emcareful_all_most_some.rds"))
fit_sf12v2calm_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2calm_all_most_some.rds"))
fit_sf12v2energy_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2energy_all_most_some.rds"))
fit_sf12v2depressed_all_most_some <- readRDS(file.path(subDir,"fit_sf12v2depressed_all_most_some.rds"))
fit_usualsourceofcare_not_ed <- readRDS(file.path(subDir,"fit_usualsourceofcare_not_ed.rds"))
fit_nodoctorcost <- readRDS(file.path(subDir,"fit_nodoctorcost.rds"))
# Categorical Outcomes
fit_sf12v2health <- readRDS(file.path(subDir,"fit_sf12v2health.rds"))
fit_sf12v2social <- readRDS(file.path(subDir,"fit_sf12v2social.rds"))
fit_sf12v2pain <- readRDS(file.path(subDir,"fit_sf12v2pain.rds"))
fit_sf12v2limitact <- readRDS(file.path(subDir,"fit_sf12v2limitact.rds"))
fit_sf12v2limitstairs <- readRDS(file.path(subDir,"fit_sf12v2limitstairs.rds"))
fit_sf12v2phyaccomplish <- readRDS(file.path(subDir,"fit_sf12v2phyaccomplish.rds"))
fit_sf12v2phylimit <- readRDS(file.path(subDir,"fit_sf12v2phylimit.rds"))
fit_sf12v2emaccomplish <- readRDS(file.path(subDir,"fit_sf12v2emaccomplish.rds"))
fit_sf12v2emcareful <- readRDS(file.path(subDir,"fit_sf12v2emcareful.rds"))
fit_sf12v2calm <- readRDS(file.path(subDir,"fit_sf12v2calm.rds"))
fit_sf12v2energy <- readRDS(file.path(subDir,"fit_sf12v2energy.rds"))
fit_sf12v2depressed <- readRDS(file.path(subDir,"fit_sf12v2depressed.rds"))
fit_usualsourceofcare <- readRDS(file.path(subDir,"fit_usualsourceofcare.rds"))
}
We’ll begin by considering a health outcome variable that maps nicely to a binary variable, so that we can see the difference between the binary vs. categorical outcome approach.
The SF12 question on activity limitations asks whether an individul’s health now limits them in moderate activities. The possible answers are:
Let’s first define binary outcome for whether the individual answered that a physical activity limitation limited them a lot or a little. We fit this binary outcome using the standard DD regression on the between-state sample, and then compare the estimted DD coefficient to the distribution of DD coefficients we obtain by permuting state expansion status \(M = 300\) times. For illustration I will fit a DD model with only state and year controls, but no other patient level controls.
Here, we can see that our estimate of -0.025 is relatively far out in the state cluster-based estimate of the sampling distribution. (Note that the adjsuted estimate of -.023 is quite similar.)
fit_binary <- fit_dd_across(sf12v2limitact_lot_little,scale = FALSE, M = 300, RHS = RHS.unadj)
fit_binary$plot
Next we will consider the longitudinal transitions among different possible “states” for this matrix.
We begin by simply looking at the marginal distribution of the variable by expansion state status at baseline (which for our purposes is follow-up 1). What’s clear here is that expansoin vs. nonexpansion groups are nicely balanced.
tt$marginal %>% set_names(c("Expansion State",lut)) %>% kable(caption = "Marginal (Baseline) Distribution of Physical Limitation Variable")
Expansion State | 1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all |
---|---|---|---|
0 | 0.23408 | 0.27576 | 0.49016 |
1 | 0.22363 | 0.28210 | 0.49428 |
Next let’s consider the transitions from baseline to follwup by expansion state grouping. First is the observed (unadusted) transition matrix for individuals in the expansion states:
tt$health %>% filter(tx==1) %>% select(-tx) %>% mutate(health = lut) %>%
set_names(c("Physical Limitation",lut)) %>% kable(caption = "Transition Matrix: Expansion States")
Physical Limitation | 1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all |
---|---|---|---|
1 = Limited a lot | 0.58388 | 0.28322 | 0.13290 |
2 = Limited a little | 0.27547 | 0.46028 | 0.26425 |
3 = Not limited at all | 0.10843 | 0.22967 | 0.66190 |
Next is the observed transition matrix for individuals in the nonexpansion states.
tt$health %>% filter(tx==0) %>% select(-tx) %>% mutate(health = lut) %>%
set_names(c("Physical Limitation",lut)) %>% kable(caption = "Transition Matrix: Nonexpansion States")
Physical Limitation | 1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all |
---|---|---|---|
1 = Limited a lot | 0.53658 | 0.33366 | 0.12976 |
2 = Limited a little | 0.30374 | 0.46839 | 0.22787 |
3 = Not limited at all | 0.12536 | 0.23886 | 0.63579 |
Finally, we will take the difference between these two.
tt$diff %>% select(-tx) %>% mutate(health = lut) %>%
set_names(c("Difference",lut)) %>% kable(caption = "Transition Difference Matrix")
Difference | 1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all |
---|---|---|---|
1 = Limited a lot | 0.04730 | -0.05044 | 0.00314 |
2 = Limited a little | -0.02826 | -0.00811 | 0.03638 |
3 = Not limited at all | -0.01693 | -0.00919 | 0.02611 |
Now we will fit a difference-in-difference model without any covariates to try to recover and match this transition difference matrix. One way to think about this is that we could fit a separate DD model on binary outcomes for each of the transition types. However, there is a more systematic way to do it using a standard regression specification, fully interacted with the baseline health outcome measure, and fit using multiple multivariate (linear) regression (MMR). I will call this approach a multiple multivariate difference-in-differences model, or *MMDD**.
For now the MMDD model is essentially the standard \(\textrm{post} + \textrm{treatment} + \textrm{post} * \textrm{treatment}\) model, which is then fully interacted with the baseline health outcome value. We fit this model to the outcome, then utilize the predicted values from this model to construct the full (adjusted) transition probability matrix.
The transition matrix below uses a linear probability MMDD model without any patient controls. As can be seen, this approach fully recovers the observed transition probability matrix above.
dd$diff_adj %>% mutate(category = lut) %>% set_colnames(c("Adjusted Difference",lut)) %>% kable(caption = "Adjusted Transition Difference Matrix")
Adjusted Difference | 1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all |
---|---|---|---|
1 = Limited a lot | 0.04730 | -0.05044 | 0.00314 |
2 = Limited a little | -0.02826 | -0.00811 | 0.03638 |
3 = Not limited at all | -0.01693 | -0.00919 | 0.02611 |
The next step in the process is to combine the marginal outcome distributions with the transition probability matricies to be able to say something about how the likelihood of being in a given category changes with and without Medicaid expansion.
We can obtain from the table above the marginal distribution (at baseline) of the outcome for the treated group. We then use matrix algebra and multiply this by the transtion probability matrix for expansion states to recover the (observed) ex post distribution of the outcome.
A %*% B %>% set_rownames("Ex Post (Observed)") %>% set_colnames(lut) %>% kable(caption = "Observed Ex Post Distribution")
1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all | |
---|---|---|---|
Ex Post (Observed) | 0.26188 | 0.3067 | 0.43143 |
Similarly, we can construct an estimate of the counterfactual by subtracting our differences-in-differences estimate of the impact of expansion on the transitions.
A %*% (B - DD) %>% set_rownames("Ex Post (Counterfactual)") %>% set_colnames(lut) %>% kable(caption = "Counterfactual Ex Post Distribution")
1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all | |
---|---|---|---|
Ex Post (Counterfactual) | 0.26764 | 0.32481 | 0.40755 |
The difference between these two provides an estimate of the impact of the expansion on each of the outcome categories:
(A %*% B - A %*% (B - DD) ) %>% set_rownames("Ex Post (Counterfactual)") %>% set_colnames(lut) %>% kable(caption = "Expansion Effect")
1 = Limited a lot | 2 = Limited a little | 3 = Not limited at all | |
---|---|---|---|
Ex Post (Counterfactual) | -0.00576 | -0.01811 | 0.02387 |
Note that the estimated effect on the “Not Limited At All Category” of 0.024 is nearly identical to the DD estimate we obtained based on the binary outcome analysis above (-0.025). (The sign is different because that binary outcome was defined in terms of having a physical limitation, not in terms of not having a limitation.)
The next step in the process is just to fold in the state cluster-based permutation inference procedure to say something about how extreme these estimates are relative to the overall sampling distribution. This procedure takes a decent amount of time to run so the below plot is only based on M = 100 permutations; I plan to update with more for next time.
fit_sf12v2limitact$plot
Finally, we can also run repeated applications of this process to see where things converge to. (This is similar to the anlaysis that Laura did a few months back).
est_df <- fit_sf12v2limitact$estimate %>% mutate()
fit_sf12v2limitact$estimate %>%
gather(key,value,-markov_iteration,-category) %>%
mutate(label = case_when (
key == "Y_Tx1" ~ "Expansion",
key == "Y_Tx0" ~ "No\nExpansion",
key == "Y_diff" ~ "Difference"
)) %>%
ggplot(aes(x=markov_iteration)) +
geom_line(aes(y = value, lty=key)) + theme_tufte_revised() +
facet_wrap(~category) +
geom_hline(aes(yintercept = 0), lty=1, colour="grey") +
scale_linetype_manual(labels = c("Difference","Without Expansion","With Expansion"), values = c(3,2,1), guide= FALSE) +
geom_dl(aes(label = label,y=value),method = list("maxvar.qp",cex=.4)) + xlim(c(-1,12))
One interesting thing we can do is look to see how any changes in usual source of care came about. The following is the (adjusted) transition DD matrix among categories of usual source of care.
dd$diff_adj %>% mutate(category = lut) %>% set_colnames(c("Adjusted Difference",lut)) %>% kable(caption = "Adjusted Transition Difference Matrix")
Adjusted Difference | CHC or Free Clinic | Private MD Office | Emergency Room | VA | Hospital (Not ED) | Other | No USOC |
---|---|---|---|---|---|---|---|
CHC or Free Clinic | 0.01861 | 0.01634 | -0.00996 | -0.00048 | -0.00752 | 0.00066 | -0.01764 |
Private MD Office | -0.00947 | 0.03729 | -0.00491 | -0.00487 | -0.00910 | -0.00218 | -0.00676 |
Emergency Room | 0.02794 | -0.01014 | 0.00426 | -0.02259 | 0.01734 | 0.02350 | -0.04033 |
VA | -0.01891 | -0.04129 | 0.00439 | 0.05147 | -0.00005 | 0.00109 | 0.00331 |
Hospital (Not ED) | 0.06549 | 0.08299 | 0.00099 | -0.01263 | -0.10174 | -0.01435 | -0.02076 |
Other | 0.01152 | 0.01906 | 0.01049 | -0.02381 | -0.05598 | 0.02426 | 0.01447 |
No USOC | -0.11040 | 0.13053 | -0.01648 | 0.00510 | 0.00148 | 0.02623 | -0.03647 |
fit_usualsourceofcare$plot + ggtitle(health_outcomes_lut["usualsourceofcare"])
fit_nodoctorcost$plot + ggtitle(health_outcomes_lut["nodoctorcost"])
fit_sf12v2health$plot + ggtitle(health_outcomes_lut["sf12v2health"])
## Warning: Removed 60 rows containing non-finite values (stat_density).
fit_sf12v2limitact$plot + ggtitle(health_outcomes_lut["sf12v2limitact"])
fit_sf12v2limitstairs$plot + ggtitle(health_outcomes_lut["sf12v2limitstairs"])
fit_sf12v2phyaccomplish$plot + ggtitle(health_outcomes_lut["sf12v2phyaccomplish"])
fit_sf12v2phylimit$plot + ggtitle(health_outcomes_lut["sf12v2phylimit"])
fit_sf12v2emaccomplish$plot + ggtitle(health_outcomes_lut["sf12v2emaccomplish"])
fit_sf12v2emcareful$plot + ggtitle(health_outcomes_lut["sf12v2emcareful"])
fit_sf12v2pain$plot + ggtitle(health_outcomes_lut["sf12v2pain"])
fit_sf12v2calm$plot + ggtitle(health_outcomes_lut["sf12v2calm"])
fit_sf12v2energy$plot + ggtitle(health_outcomes_lut["sf12v2energy"])
fit_sf12v2depressed$plot + ggtitle(health_outcomes_lut["sf12v2depressed"])
fit_sf12v2social$plot + ggtitle(health_outcomes_lut["sf12v2social"])