Overview

This file is provided as a preliminary resource until this data is added to the critstats package. You may also use this code to gather data related to your class project, thesis, or other academic tasks beyond what is provided below. Content in this file comes from a host of different sources which you should be familiar with prior to access and analyzing any data.

An important first step is to read the codebook for the data. More information can be viewed at the bottom of the file in the references section. This file will be updated periodically.

Set up your work enviornment

Open up a new .Rmd file.

Use {r setup, include=F} in your first code chunk.

knitr::opts_chunk$set(echo = TRUE)

library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(readr) 
library(dplyr)
# access data files from MPES site
# https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
# read source file
# https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h242/h242ru.txt

Medical Expenditure Panel Survey (MEPS) data

# place the h242.dat file in the same folder as this .Rmd file.

# run the code below
meps_path <- "h242.dat"                             
source(
  "https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h242/h242ru.txt")

# view the data
head(h242)

## # A tibble: 6 × 56
##   EPCPIDX    DUPERSID PHLDRIDX ESTBIDX EPRSIDX InsurPrivIDEX PANEL    RN JOBSIDX
##   <chr>      <chr>    <chr>    <chr>   <chr>   <chr>         <dbl> <dbl> <chr>  
## 1 246001001… 2460010… 2460010… 246001… 246001… 246001001061…    24     7 246001…
## 2 246001001… 2460010… 2460010… 246001… 246001… 246001001061…    24     8 246001…
## 3 246001001… 2460010… 2460010… 246001… 246001… 246001001061…    24     9 246001…
## 4 246001801… 2460018… 2460018… 246001… 246001… 246001801041…    24     7 246001…
## 5 246001801… 2460018… 2460018… 246001… 246001… 246001801041…    24     8 246001…
## 6 246001801… 2460018… 2460018… 246001… 246001… 246001801041…    24     9 246001…
## # ℹ 47 more variables: JOBSINFR <dbl>, JOBSFILE <dbl>, FYFLG <dbl>,
## #   CMJINS <dbl>, EMPLSTAT <dbl>, PHOLDER <dbl>, DEPNDNT <dbl>, EVALCOVR <dbl>,
## #   STAT1 <dbl>, STAT2 <dbl>, STAT3 <dbl>, STAT4 <dbl>, STAT5 <dbl>,
## #   STAT6 <dbl>, STAT7 <dbl>, STAT8 <dbl>, STAT9 <dbl>, STAT10 <dbl>,
## #   STAT11 <dbl>, STAT12 <dbl>, DECPHLDR <dbl>, OUTPHLDR <dbl>, NOPUFLG <dbl>,
## #   COVROUT_M18 <dbl>, TYPEFLAG_M23 <dbl>, STEXCH <dbl>, PrivateCat_M23 <dbl>,
## #   HOSPINSX <dbl>, MSUPINSX <dbl>, DENTLINS <dbl>, DENTLINX <dbl>, …

tail(h242)

## # A tibble: 6 × 56
##   EPCPIDX    DUPERSID PHLDRIDX ESTBIDX EPRSIDX InsurPrivIDEX PANEL    RN JOBSIDX
##   <chr>      <chr>    <chr>    <chr>   <chr>   <chr>         <dbl> <dbl> <chr>  
## 1 279969401… 2799694… 2799694… 279969… 279969… 279969401031…    27     1 279969…
## 2 279969401… 2799694… 2799694… 279969… 279969… 279969401031…    27     1 279969…
## 3 279969401… 2799694… 2799694… 279969… 279969… 279969401031…    27     2 279969…
## 4 279969401… 2799694… 2799694… 279969… 279969… 279969401031…    27     2 279969…
## 5 279969401… 2799694… 2799694… 279969… 279969… 279969401031…    27     3 279969…
## 6 279969401… 2799694… 2799694… 279969… 279969… 279969401031…    27     3 279969…
## # ℹ 47 more variables: JOBSINFR <dbl>, JOBSFILE <dbl>, FYFLG <dbl>,
## #   CMJINS <dbl>, EMPLSTAT <dbl>, PHOLDER <dbl>, DEPNDNT <dbl>, EVALCOVR <dbl>,
## #   STAT1 <dbl>, STAT2 <dbl>, STAT3 <dbl>, STAT4 <dbl>, STAT5 <dbl>,
## #   STAT6 <dbl>, STAT7 <dbl>, STAT8 <dbl>, STAT9 <dbl>, STAT10 <dbl>,
## #   STAT11 <dbl>, STAT12 <dbl>, DECPHLDR <dbl>, OUTPHLDR <dbl>, NOPUFLG <dbl>,
## #   COVROUT_M18 <dbl>, TYPEFLAG_M23 <dbl>, STEXCH <dbl>, PrivateCat_M23 <dbl>,
## #   HOSPINSX <dbl>, MSUPINSX <dbl>, DENTLINS <dbl>, DENTLINX <dbl>, …

Code from the source file

Running the code fromt the source file.

# Define start and end positions to read fixed-width file  


pos_start <- c(
1, 36, 46, 56, 67, 92, 106, 108, 109, 123, 
125, 128, 129, 131, 134, 135, 136, 138, 140, 142, 
144, 146, 148, 150, 152, 154, 156, 158, 160, 162, 
163, 164, 165, 167, 169, 171, 173, 175, 177, 179, 
181, 183, 185, 188, 190, 191, 192, 200, 207, 215, 
217, 220, 222, 224, 226, 229)


pos_end <- c(
35, 45, 55, 66, 91, 105, 107, 108, 122, 124, 
127, 128, 130, 133, 134, 135, 137, 139, 141, 143, 
145, 147, 149, 151, 153, 155, 157, 159, 161, 162, 
163, 164, 166, 168, 170, 172, 174, 176, 178, 180, 
182, 184, 187, 189, 190, 191, 199, 206, 214, 216, 
219, 221, 223, 225, 228, 230)


var_names <- c(
"EPCPIDX", "DUPERSID", "PHLDRIDX", "ESTBIDX", "EPRSIDX", "InsurPrivIDEX", "PANEL", "RN", "JOBSIDX", "JOBSINFR", 
"JOBSFILE", "FYFLG", "CMJINS", "EMPLSTAT", "PHOLDER", "DEPNDNT", "EVALCOVR", "STAT1", "STAT2", "STAT3", 
"STAT4", "STAT5", "STAT6", "STAT7", "STAT8", "STAT9", "STAT10", "STAT11", "STAT12", "DECPHLDR", 
"OUTPHLDR", "NOPUFLG", "COVROUT_M18", "TYPEFLAG_M23", "STEXCH", "PrivateCat_M23", "HOSPINSX", "MSUPINSX", "DENTLINS", "DENTLINX", 
"VISIONIN", "PMEDINS", "COBRA", "PLANMETL", "COVTYPIN", "OOPELIG", "OOPPREM", "OOPPREMX", "OOPX12X", "OOPFLAG", 
"PREMLEVX", "PREMSUBZ", "ANNDEDCTP", "HSAACCT", "UPRHMO_M23", "NAMECHNG")


var_types <- c(
"c", "c", "c", "c", "c", "c", "n", "n", "c", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n", 
"n", "n", "n", "n", "n", "n")


var_types <- setNames(var_types, var_names)

# IMPORT ASCII file -----------------------

h242 <- read_fwf(                      
meps_path,                                 
     col_positions =                       
         fwf_positions(                    
             start = pos_start,            
                 end   = pos_end,          
                 col_names = var_names),   
         col_types = var_types)

Notice that the output = FALSE parameter is added to the code chunk to prevent printing of the R output values. To work witih the data locally, be sure to run the code chunk manually.

We will then save the data file. Uuncomment to run a manual save.

# save(h242, file ="h242.Rdata")

Working with the data

meps <- as_data_frame(h242)

## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
##   tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Convert character variables to numeric
meps <- meps %>%
  mutate_if(is.character, ~ as.numeric(.))

# Check the structure of the modified data frame
str(meps)

## tibble [41,617 × 56] (S3: tbl_df/tbl/data.frame)
##  $ EPCPIDX       : num [1:41617] 2.46e+34 2.46e+34 2.46e+34 2.46e+34 2.46e+34 ...
##  $ DUPERSID      : num [1:41617] 2.46e+09 2.46e+09 2.46e+09 2.46e+09 2.46e+09 ...
##  $ PHLDRIDX      : num [1:41617] 2.46e+09 2.46e+09 2.46e+09 2.46e+09 2.46e+09 ...
##  $ ESTBIDX       : num [1:41617] 2.46e+10 2.46e+10 2.46e+10 2.46e+10 2.46e+10 ...
##  $ EPRSIDX       : num [1:41617] 2.46e+24 2.46e+24 2.46e+24 2.46e+24 2.46e+24 ...
##  $ InsurPrivIDEX : num [1:41617] 2.46e+13 2.46e+13 2.46e+13 2.46e+13 2.46e+13 ...
##  $ PANEL         : num [1:41617] 24 24 24 24 24 24 24 24 24 24 ...
##  $ RN            : num [1:41617] 7 8 9 7 8 9 7 7 8 8 ...
##  $ JOBSIDX       : num [1:41617] 2.46e+13 2.46e+13 2.46e+13 2.46e+13 2.46e+13 ...
##  $ JOBSINFR      : num [1:41617] 0 0 0 0 0 0 0 0 0 0 ...
##  $ JOBSFILE      : num [1:41617] 237 237 237 237 237 237 237 237 237 237 ...
##  $ FYFLG         : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ CMJINS        : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ EMPLSTAT      : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ PHOLDER       : num [1:41617] 1 1 1 1 1 1 1 0 1 0 ...
##  $ DEPNDNT       : num [1:41617] 0 0 0 0 0 0 0 1 0 1 ...
##  $ EVALCOVR      : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ STAT1         : num [1:41617] 1 -1 -1 1 1 -1 1 1 1 1 ...
##  $ STAT2         : num [1:41617] 1 -1 -1 -1 1 -1 -1 -1 1 1 ...
##  $ STAT3         : num [1:41617] 1 1 -1 -1 1 -1 -1 -1 1 1 ...
##  $ STAT4         : num [1:41617] -1 1 -1 -1 1 -1 -1 -1 1 1 ...
##  $ STAT5         : num [1:41617] -1 1 -1 -1 1 -1 -1 -1 1 1 ...
##  $ STAT6         : num [1:41617] -1 1 -1 -1 1 -1 -1 -1 1 1 ...
##  $ STAT7         : num [1:41617] -1 1 -1 -1 1 1 -1 -1 1 1 ...
##  $ STAT8         : num [1:41617] -1 1 1 -1 -1 1 -1 -1 1 1 ...
##  $ STAT9         : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 1 1 ...
##  $ STAT10        : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 1 1 ...
##  $ STAT11        : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 -1 -1 ...
##  $ STAT12        : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 -1 -1 ...
##  $ DECPHLDR      : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
##  $ OUTPHLDR      : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
##  $ NOPUFLG       : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
##  $ COVROUT_M18   : num [1:41617] 2 2 2 2 2 2 -1 -1 -1 -1 ...
##  $ TYPEFLAG_M23  : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ STEXCH        : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ PrivateCat_M23: num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ HOSPINSX      : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ MSUPINSX      : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
##  $ DENTLINS      : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ DENTLINX      : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ VISIONIN      : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
##  $ PMEDINS       : num [1:41617] 2 2 2 1 1 1 1 1 1 1 ...
##  $ COBRA         : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ PLANMETL      : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ COVTYPIN      : num [1:41617] 1 1 1 1 1 1 2 2 2 2 ...
##  $ OOPELIG       : num [1:41617] 1 2 2 1 2 2 1 1 2 2 ...
##  $ OOPPREM       : num [1:41617] 358 -1 -1 -8 -1 ...
##  $ OOPPREMX      : num [1:41617] 358 -1 -1 216 -1 ...
##  $ OOPX12X       : num [1:41617] 4290 -1 -1 2591 -1 ...
##  $ OOPFLAG       : num [1:41617] 0 -1 -1 1 -1 -1 0 0 -1 -1 ...
##  $ PREMLEVX      : num [1:41617] 2 -1 -1 2 -1 -1 4 4 -1 -1 ...
##  $ PREMSUBZ      : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
##  $ ANNDEDCTP     : num [1:41617] 3 -1 -1 4 -1 -1 5 5 -1 -1 ...
##  $ HSAACCT       : num [1:41617] 2 -1 -1 2 -1 -1 -1 -1 -1 -1 ...
##  $ UPRHMO_M23    : num [1:41617] 2 2 2 2 2 2 1 1 1 1 ...
##  $ NAMECHNG      : num [1:41617] -1 2 2 2 2 2 2 2 2 2 ...

Subset data for only those variables you would like to use.

meps_subset <- dplyr::select(meps,
                             DUPERSID,
                             PANEL,
                             OOPX12X,
                             OOPPREMX,
                             OOPPREM,
                             OOPELIG,
                             OOPFLAG,
                             HSAACCT)
meps_subset # will print first few rows of the new dataframe

## # A tibble: 41,617 × 8
##      DUPERSID PANEL OOPX12X OOPPREMX OOPPREM OOPELIG OOPFLAG HSAACCT
##         <dbl> <dbl>   <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 2460010101    24   4290      358.    358.       1       0       2
##  2 2460010101    24     -1       -1      -1        2      -1      -1
##  3 2460010101    24     -1       -1      -1        2      -1      -1
##  4 2460018101    24   2591.     216.     -8        1       1       2
##  5 2460018101    24     -1       -1      -1        2      -1      -1
##  6 2460018101    24     -1       -1      -1        2      -1      -1
##  7 2460026101    24      0        0       0        1       0      -1
##  8 2460026103    24      0        0       0        1       0      -1
##  9 2460026101    24     -1       -1      -1        2      -1      -1
## 10 2460026103    24     -1       -1      -1        2      -1      -1
## # ℹ 41,607 more rows

We then rearrange the data by key variables.

meps_subset <- meps_subset %>% 
  select(-PANEL) %>% 
  arrange(-OOPX12X)

meps_subset

## # A tibble: 41,617 × 7
##      DUPERSID OOPX12X OOPPREMX OOPPREM OOPELIG OOPFLAG HSAACCT
##         <dbl>   <dbl>    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>
##  1 2683204101  55000.    4583.     -8        1       1       2
##  2 2683204105  55000.    4583.     -8        1       1       2
##  3 2795834101  55000.    4583.   4583.       1       0       1
##  4 2795834102  55000.    4583.   4583.       1       0       1
##  5 2683096101  36000     3000    3000        1       0       1
##  6 2683096102  36000     3000    3000        1       0       1
##  7 2683096103  36000     3000    3000        1       0       1
##  8 2683096104  36000     3000    3000        1       0       1
##  9 2683096105  36000     3000    3000        1       0       1
## 10 2683096106  36000     3000    3000        1       0       1
## # ℹ 41,607 more rows

Subset specific inquiries. Here I subset high out-of-pocket expenses.

high_oopx <- meps_subset %>% 
  dplyr::filter(OOPX12X >= 35999.00)

summary(high_oopx$OOPX12X)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   36000   36000   36000   41429   50250   55000

summary(high_oopx)

##     DUPERSID            OOPX12X         OOPPREMX       OOPPREM        OOPELIG 
##  Min.   :2.683e+09   Min.   :36000   Min.   :3000   Min.   :  -8   Min.   :1  
##  1st Qu.:2.683e+09   1st Qu.:36000   1st Qu.:3000   1st Qu.:  -8   1st Qu.:1  
##  Median :2.683e+09   Median :36000   Median :3000   Median :3000   Median :1  
##  Mean   :2.700e+09   Mean   :41429   Mean   :3452   Mean   :1937   Mean   :1  
##  3rd Qu.:2.684e+09   3rd Qu.:50250   3rd Qu.:4187   3rd Qu.:3000   3rd Qu.:1  
##  Max.   :2.796e+09   Max.   :55000   Max.   :4583   Max.   :4583   Max.   :1  
##     OOPFLAG          HSAACCT     
##  Min.   :0.0000   Min.   :1.000  
##  1st Qu.:0.0000   1st Qu.:1.000  
##  Median :0.0000   Median :1.000  
##  Mean   :0.4286   Mean   :1.143  
##  3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :1.0000   Max.   :2.000

Additional information can be found online here.

References

Agency for Healthcare Research and Quality. (n.d.). Medical Expenditure Panel Survey (MEPS). Retrieved from https://www.ahrq.gov/data/meps.html

Medical Expenditure Panel Survey (MEPS)

Nathan Alexander

Center for Applied Data Science and Analytics (CADSA)

Howard University

2024-11-16