library(knitr)
library(kableExtra)
library(readxl)
library(tidyverse)
-- Attaching packages ---------------------------------------------------------------------- tidyverse 1.2.1 --
v ggplot2 3.1.0     v purrr   0.2.5
v tibble  1.4.2     v dplyr   0.7.8
v tidyr   0.8.2     v stringr 1.3.1
v readr   1.1.1     v forcats 0.3.0
-- Conflicts ------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

Original Import from Public Use File

I downloaded the 2015 Public Use File in R format from http://grc.osu.edu/OMAS/2015Survey. I then exported it to a comma-separated version which I called omas2015.csv so I could import it here in a more familiar way. This is a huge file.

omas2015 <- read_csv("omas_data/omas2015.csv")
Parsed with column specification:
cols(
  .default = col_integer(),
  S10C = col_character(),
  NOCHILD_CK = col_character(),
  B4C2CON = col_character(),
  B4C2AGE = col_character(),
  B4I_7M2 = col_character(),
  B20AM3 = col_character(),
  C26CON = col_character(),
  E65A = col_character(),
  BF_28 = col_character(),
  BF_31 = col_character(),
  BF_32 = col_character(),
  G72CM6 = col_character(),
  G72CM7 = col_character(),
  J100BCON = col_character(),
  J100G1M3 = col_character(),
  POSTJ113 = col_character(),
  NJ117AM1 = col_character(),
  NJ117AM2 = col_character(),
  K98 = col_character(),
  K98A = col_character()
  # ... with 41 more columns
)
See spec(...) for full column specifications.

I then selected observations and variables from the original data set which met several inclusion and exclusion parameters, creating the “omas_431raw” data set, as shown below. I also created a .csv of this file, which I can provide as needed.

omas_431raw <- omas2015 %>% 
    filter(A1 == 1, B29BC == 1, D30 < 98, D30I < 98,
           D30BINC > 47, D30BINC < 84, D30A_UNIT == 1, 
           D30A_VALUE < 998, D45 < 98, D46 < 98, 
           !is.na(E59DAYS), E60 < 98, E62 < 98, 
           !is.na(E63DAYS), F69 < 98, H76 < 98, H77 < 98, 
           S9_REGION < 8, S14_REC_85 < 90, S15 < 3, 
           B4A == 1 | B4B == 1 | B4C == 1 | B4E == 1,
           B4A < 9, B4B < 9, B4C < 9, B4E < 9,
           H84_A3 > 100, H84_A3 < 600000, S15 < 3) %>%
    mutate(resp_ID = 1001:2007) %>%
    select(resp_ID, A1, B4A, B4B, B4C, B4E, B29BC, 
           D30, D30I, D30BINC, D30A_UNIT, D30A_VALUE, 
           D45, D46, E59DAYS, E60, E62, E63DAYS, F69, 
           H76, H77, H84_A3, Region, 
           S9_REGION, S14_REC_85, S15)

write_csv(omas_431raw, "omas_431raw.csv")

Cleanup

Then, I built the actual data set to use in 431, by recasting many variables.

omas_431 <- omas_431raw %>%
    mutate(prob_access = fct_recode(factor(B29BC), Yes = "1"),
           insurance = fct_recode(factor(A1), Yes = "1"),
           ins_employer = fct_recode(factor(B4A), Yes = "1", No = "2"),
           ins_medicare = fct_recode(factor(B4B), Yes = "1", No = "2"),
           ins_medicaid = fct_recode(factor(B4C), Yes = "1", No = "2"),
           ins_private = fct_recode(factor(B4E), Yes = "1", No = "2"),
           health_stat = fct_recode(factor(D30), E = "1", VG = "2", G = "3", F = "4", P = "5"),
           mental_30 = D30I,
           height = D30BINC,
           weight = D30A_VALUE,
           bmi = 703 * weight / (height^2),
           smoke_100 = fct_recode(factor(D45), Yes = "1", No = "2"),
           alcohol_30 = D46,
           doc_days = E59DAYS,
           hospital = E60,
           er_visits = E62,
           dent_days = E63DAYS,
           care_now = fct_recode(factor(F69), Easier = "1", Harder = "2", Same = "3"),
           care_now = fct_relevel(care_now, "Easier", "Same", "Harder"),
           marital = fct_recode(factor(H76), Married = "1", Divorced = "2", Widowed = "3",
                                Separated = "4", Never = "5", Coupled = "6"),
           education = fct_recode(factor(H77), BelowHSGrad = "2", BelowHSGrad = "3", 
                                  HSGrad = "4", SomeCollege = "5", SomeCollege = "6",
                                  CollegeGrad = "7", PostCollege = "8"),
           income = H84_A3/1000,
           county_type = fct_recode(factor(Region), Rural_App = "1", Metro = "2", 
                                    Rural_NonApp = "3", Suburban = "4"),
           ohio_region = fct_recode(factor(S9_REGION), N_Cent = "1", NE = "2", NE_Cent = "3",
                                    NW = "4", S_Cent = "5", SE = "6", SW = "7"),
           age = S14_REC_85,
           gender = fct_recode(factor(S15), M = "1", F = "2")) %>%
    select(resp_ID, prob_access, insurance, ins_employer, 
           ins_medicare, ins_medicaid, ins_private, age, 
           gender, marital, education, income, county_type, 
           ohio_region, care_now, health_stat, mental_30, height, 
           weight, bmi, smoke_100, alcohol_30, doc_days, 
           dent_days, hospital, er_visits)


write_csv(omas_431, "omas_431.csv")

Inclusion/Exclusion Criteria

The omas_431 file contains data from the 2015 Public Use File of the Ohio Medicaid Assessment Survey for 1007 respondents, and 26 variables to describe each respondent.

To be included in this omas_431 data frame, the respondent needed to meet the following criteria:

All 1007 respondents meeting these criteria are included in omas_431.

Detailed Codebook

I built a code book in Excel, and display it here, as follows.

omas_431_codes <- read_xlsx("omas_data/omas_431_codebook.xlsx")

omas_431_codes %>%
    kable() %>%
    kable_styling(bootstrap_options = c("striped", "hover"))
Variable Name type Description Source
resp_ID character Identification Code - arbitrary (codes are numerical: 1001 - 2037) Dr. Love
prob_access Yes/No Problems getting the care you needed in the past 12 months? (All responses are Yes) B29BC
insurance Yes/No Are you covered by health insurance? (All responses are Yes) A1
ins_employer Yes/No Insurance through an employer or union? B4A
ins_medicare Yes/No Insurance through Medicare? B4B
ins_medicaid Yes/No Insurance through Medicaid? B4C
ins_private Yes/No Insurance through a private plan? B4E
age quantitative Age in years (19-85, 85 and above are reported as 85) S14_REC_85
gender 2-level factor F = female, M = male S15
marital 6-level factor Married, Divorced, Widowed, Separated, Never (been married), Coupled (but unmarried) H76
education 5-level factor BelowHSGrad, HSGrad, SomeCollege, CollegeGrad, PostCollege H77 (some collapsing)
income quantitative Total family income in 2014, in thousands of dollars (observed range = 0.500 to 500.000) H84 (divided by 1000)
county_type 4-level factor Metro, Suburban, Rural_App (Rural Appalachian), Rural_NonApp (Rural Non-Appalachian) Region
ohio_region 7-level factor N_Cent = North Central, NE, NE_Cent = North East Central, NW, S_Cent, SE, SW S9_REGION
care_now 3-level factor Is health care now easier to get than 3 years ago? Easier, Same (as 3 years ago), Harder F69
health_stat 5-level factor Self-reported overall health: E = Excellent, VG = Very Good, G = Good, F = Fair, P = Poor D30
mental_30 quantitative # of days in the past 30 where your mental health prevented you from doing normal activities D30I
height quantitative height in inches D30BINC
weight quantitative weight in pounds D30A_VALUE, D30A_UNIT
bmi quantitative body-mass index Calculated
smoke_100 Yes/No Have you smoked 100 cigarettes in your life? D45
alcohol_30 quantitative # of days in the past 30 where you consumed alcohol D46
doc_days quantitative # of days since last non-emergency visit to a doctor / care professional about your health E59DAYS
dent_days quantitative # of days since you last visited a dentist E63DAYS
hospital quantitative # of times in the past 12 months where you were admitted to a hospital for an overnight stay E60
er_visits quantitative # of times in the past 12 months where you were a patient in a hospital emergency room E62

Description of omas_431

Hmisc::describe(omas_431)
omas_431 

 26  Variables      1007  Observations
---------------------------------------------------------------------------
resp_ID 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0     1007        1     1504      336     1051     1102 
     .25      .50      .75      .90      .95 
    1252     1504     1756     1906     1957 

lowest : 1001 1002 1003 1004 1005, highest: 2003 2004 2005 2006 2007
---------------------------------------------------------------------------
prob_access 
       n  missing distinct    value 
    1007        0        1      Yes 
               
Value       Yes
Frequency  1007
Proportion    1
---------------------------------------------------------------------------
insurance 
       n  missing distinct    value 
    1007        0        1      Yes 
               
Value       Yes
Frequency  1007
Proportion    1
---------------------------------------------------------------------------
ins_employer 
       n  missing distinct 
    1007        0        2 
                      
Value        Yes    No
Frequency    446   561
Proportion 0.443 0.557
---------------------------------------------------------------------------
ins_medicare 
       n  missing distinct 
    1007        0        2 
                      
Value        Yes    No
Frequency    398   609
Proportion 0.395 0.605
---------------------------------------------------------------------------
ins_medicaid 
       n  missing distinct 
    1007        0        2 
                      
Value        Yes    No
Frequency    340   667
Proportion 0.338 0.662
---------------------------------------------------------------------------
ins_private 
       n  missing distinct 
    1007        0        2 
                      
Value        Yes    No
Frequency    145   862
Proportion 0.144 0.856
---------------------------------------------------------------------------
age 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       66        1    51.65    16.53     26.3     31.0 
     .25      .50      .75      .90      .95 
    40.5     53.0     62.0     70.0     75.0 

lowest : 19 20 21 22 23, highest: 80 81 82 84 85
---------------------------------------------------------------------------
gender 
       n  missing distinct 
    1007        0        2 
                      
Value          M     F
Frequency    362   645
Proportion 0.359 0.641
---------------------------------------------------------------------------
marital 
       n  missing distinct 
    1007        0        6 
                                                                      
Value        Married  Divorced   Widowed Separated     Never   Coupled
Frequency        436       244        81        42       157        47
Proportion     0.433     0.242     0.080     0.042     0.156     0.047
---------------------------------------------------------------------------
education 
       n  missing distinct 
    1007        0        5 
                                                                      
Value      BelowHSGrad      HSGrad SomeCollege CollegeGrad PostCollege
Frequency           78         290         371         163         105
Proportion       0.077       0.288       0.368       0.162       0.104
---------------------------------------------------------------------------
income 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0      207        1    43.58    42.93     4.59     8.06 
     .25      .50      .75      .90      .95 
   14.00    30.00    54.50    95.00   128.40 

lowest :   0.500   0.700   0.714   0.731   0.733
highest: 250.000 300.000 315.000 450.000 500.000
---------------------------------------------------------------------------
county_type 
       n  missing distinct 
    1007        0        4 
                                                              
Value         Rural_App        Metro Rural_NonApp     Suburban
Frequency           176          538          137          156
Proportion        0.175        0.534        0.136        0.155
---------------------------------------------------------------------------
ohio_region 
       n  missing distinct 
    1007        0        7 
                                                                  
Value       N_Cent      NE NE_Cent      NW  S_Cent      SE      SW
Frequency       73     265      77      49     193      91     259
Proportion   0.072   0.263   0.076   0.049   0.192   0.090   0.257
---------------------------------------------------------------------------
care_now 
       n  missing distinct 
    1007        0        3 
                               
Value      Easier   Same Harder
Frequency     106    345    556
Proportion  0.105  0.343  0.552
---------------------------------------------------------------------------
health_stat 
       n  missing distinct 
    1007        0        5 
                                        
Value          E    VG     G     F     P
Frequency     59   204   301   274   169
Proportion 0.059 0.203 0.299 0.272 0.168
---------------------------------------------------------------------------
mental_30 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       24    0.705    5.725    9.042        0        0 
     .25      .50      .75      .90      .95 
       0        0        5       30       30 

lowest :  0  1  2  3  4, highest: 26 27 28 29 30
---------------------------------------------------------------------------
height 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       28    0.994    66.43    4.533     60.3     62.0 
     .25      .50      .75      .90      .95 
    64.0     66.0     69.0     72.0     74.0 

lowest : 53.97 54.00 55.00 56.00 57.00, highest: 75.00 76.00 77.00 78.00 79.00
---------------------------------------------------------------------------
weight 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0      172    0.999    187.6    55.19      120      130 
     .25      .50      .75      .90      .95 
     150      180      214      250      285 

lowest :  82  85  86  95  98, highest: 365 370 381 420 426
---------------------------------------------------------------------------
bmi 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0      616        1    29.82    8.006    20.22    21.50 
     .25      .50      .75      .90      .95 
   24.40    28.79    33.30    39.70    42.79 

lowest : 14.52406 15.33173 16.05886 16.35804 16.49898
highest: 59.06864 61.56450 63.50342 66.71374 76.21811
---------------------------------------------------------------------------
smoke_100 
       n  missing distinct 
    1007        0        2 
                      
Value        Yes    No
Frequency    600   407
Proportion 0.596 0.404
---------------------------------------------------------------------------
alcohol_30 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       24    0.803    2.833    4.582        0        0 
     .25      .50      .75      .90      .95 
       0        0        3       10       15 

lowest :  0  1  2  3  4, highest: 25 27 28 29 30
---------------------------------------------------------------------------
doc_days 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       57    0.992      152    244.8        1        3 
     .25      .50      .75      .90      .95 
       7       30       90      312      540 

lowest :     1     2     3     4     5, highest:  2920  3650  4380  5110 22630
---------------------------------------------------------------------------
dent_days 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       69    0.997     1108     1668        7       21 
     .25      .50      .75      .90      .95 
      90      240     1095     2555     4270 

lowest :     0     1     2     3     4, highest: 25550 25915 26280 28105 36427
---------------------------------------------------------------------------
hospital 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       13    0.565   0.4826   0.8197        0        0 
     .25      .50      .75      .90      .95 
       0        0        0        2        2 
                                                                      
Value          0     1     2     3     4     5     7     8     9    10
Frequency    761   143    59    23     3     8     1     2     1     2
Proportion 0.756 0.142 0.059 0.023 0.003 0.008 0.001 0.002 0.001 0.002
                            
Value         11    13    14
Frequency      1     1     2
Proportion 0.001 0.001 0.002
---------------------------------------------------------------------------
er_visits 
       n  missing distinct     Info     Mean      Gmd      .05      .10 
    1007        0       17    0.809    1.148    1.744        0        0 
     .25      .50      .75      .90      .95 
       0        0        1        3        5 
                                                                      
Value          0     1     2     3     4     5     6     7     8     9
Frequency    571   203    92    53    27    13    15     8     6     2
Proportion 0.567 0.202 0.091 0.053 0.027 0.013 0.015 0.008 0.006 0.002
                                                    
Value         10    12    13    15    18    20    21
Frequency      8     2     2     1     1     1     2
Proportion 0.008 0.002 0.002 0.001 0.001 0.001 0.002
---------------------------------------------------------------------------

Session Information

sessionInfo()
R version 3.5.1 (2018-07-02)
Platform: x86_64-w64-mingw32/x64 (64-bit)
Running under: Windows 10 x64 (build 17134)

Matrix products: default

locale:
[1] LC_COLLATE=English_United States.1252 
[2] LC_CTYPE=English_United States.1252   
[3] LC_MONETARY=English_United States.1252
[4] LC_NUMERIC=C                          
[5] LC_TIME=English_United States.1252    

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] bindrcpp_0.2.2   forcats_0.3.0    stringr_1.3.1    dplyr_0.7.8     
 [5] purrr_0.2.5      readr_1.1.1      tidyr_0.8.2      tibble_1.4.2    
 [9] ggplot2_3.1.0    tidyverse_1.2.1  readxl_1.1.0     kableExtra_0.9.0
[13] knitr_1.20      

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.0          lubridate_1.7.4     lattice_0.20-35    
 [4] assertthat_0.2.0    rprojroot_1.3-2     digest_0.6.18      
 [7] R6_2.3.0            cellranger_1.1.0    plyr_1.8.4         
[10] backports_1.1.2     acepack_1.4.1       evaluate_0.12      
[13] httr_1.3.1          highr_0.7           pillar_1.3.0       
[16] rlang_0.3.0.1       lazyeval_0.2.1      data.table_1.11.8  
[19] rstudioapi_0.8      rpart_4.1-13        Matrix_1.2-14      
[22] checkmate_1.8.5     rmarkdown_1.10      splines_3.5.1      
[25] foreign_0.8-70      htmlwidgets_1.3     munsell_0.5.0      
[28] broom_0.5.0         compiler_3.5.1      modelr_0.1.2       
[31] pkgconfig_2.0.2     base64enc_0.1-3     htmltools_0.3.6    
[34] nnet_7.3-12         tidyselect_0.2.5    htmlTable_1.12     
[37] gridExtra_2.3       Hmisc_4.1-1         viridisLite_0.3.0  
[40] crayon_1.3.4        withr_2.1.2         grid_3.5.1         
[43] nlme_3.1-137        jsonlite_1.5        gtable_0.2.0       
[46] magrittr_1.5        scales_1.0.0        cli_1.0.1          
[49] stringi_1.2.4       latticeExtra_0.6-28 xml2_1.2.0         
[52] Formula_1.2-3       RColorBrewer_1.1-2  tools_3.5.1        
[55] glue_1.3.0          hms_0.4.2           survival_2.43-1    
[58] yaml_2.2.0          colorspace_1.3-2    cluster_2.0.7-1    
[61] rvest_0.3.2         bindr_0.1.1         haven_1.1.2