This file is provided as a preliminary resource until this data is
added to the critstats
package. You may also use this code
to gather data related to your class project, thesis, or other academic
tasks beyond what is provided below. Content in this file comes from a
host of different sources which you should be familiar with prior to
access and analyzing any data.
An important first step is to read the codebook for the data. More information can be viewed at the bottom of the file in the references section. This file will be updated periodically.
Open up a new .Rmd file.
Use {r setup, include=F}
in your first code chunk.
knitr::opts_chunk$set(echo = TRUE)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(dplyr)
# access data files from MPES site
# https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
# read source file
# https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h242/h242ru.txt
# place the h242.dat file in the same folder as this .Rmd file.
# run the code below
meps_path <- "h242.dat"
source(
"https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h242/h242ru.txt")
# view the data
head(h242)
## # A tibble: 6 × 56
## EPCPIDX DUPERSID PHLDRIDX ESTBIDX EPRSIDX InsurPrivIDEX PANEL RN JOBSIDX
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 246001001… 2460010… 2460010… 246001… 246001… 246001001061… 24 7 246001…
## 2 246001001… 2460010… 2460010… 246001… 246001… 246001001061… 24 8 246001…
## 3 246001001… 2460010… 2460010… 246001… 246001… 246001001061… 24 9 246001…
## 4 246001801… 2460018… 2460018… 246001… 246001… 246001801041… 24 7 246001…
## 5 246001801… 2460018… 2460018… 246001… 246001… 246001801041… 24 8 246001…
## 6 246001801… 2460018… 2460018… 246001… 246001… 246001801041… 24 9 246001…
## # ℹ 47 more variables: JOBSINFR <dbl>, JOBSFILE <dbl>, FYFLG <dbl>,
## # CMJINS <dbl>, EMPLSTAT <dbl>, PHOLDER <dbl>, DEPNDNT <dbl>, EVALCOVR <dbl>,
## # STAT1 <dbl>, STAT2 <dbl>, STAT3 <dbl>, STAT4 <dbl>, STAT5 <dbl>,
## # STAT6 <dbl>, STAT7 <dbl>, STAT8 <dbl>, STAT9 <dbl>, STAT10 <dbl>,
## # STAT11 <dbl>, STAT12 <dbl>, DECPHLDR <dbl>, OUTPHLDR <dbl>, NOPUFLG <dbl>,
## # COVROUT_M18 <dbl>, TYPEFLAG_M23 <dbl>, STEXCH <dbl>, PrivateCat_M23 <dbl>,
## # HOSPINSX <dbl>, MSUPINSX <dbl>, DENTLINS <dbl>, DENTLINX <dbl>, …
tail(h242)
## # A tibble: 6 × 56
## EPCPIDX DUPERSID PHLDRIDX ESTBIDX EPRSIDX InsurPrivIDEX PANEL RN JOBSIDX
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <chr>
## 1 279969401… 2799694… 2799694… 279969… 279969… 279969401031… 27 1 279969…
## 2 279969401… 2799694… 2799694… 279969… 279969… 279969401031… 27 1 279969…
## 3 279969401… 2799694… 2799694… 279969… 279969… 279969401031… 27 2 279969…
## 4 279969401… 2799694… 2799694… 279969… 279969… 279969401031… 27 2 279969…
## 5 279969401… 2799694… 2799694… 279969… 279969… 279969401031… 27 3 279969…
## 6 279969401… 2799694… 2799694… 279969… 279969… 279969401031… 27 3 279969…
## # ℹ 47 more variables: JOBSINFR <dbl>, JOBSFILE <dbl>, FYFLG <dbl>,
## # CMJINS <dbl>, EMPLSTAT <dbl>, PHOLDER <dbl>, DEPNDNT <dbl>, EVALCOVR <dbl>,
## # STAT1 <dbl>, STAT2 <dbl>, STAT3 <dbl>, STAT4 <dbl>, STAT5 <dbl>,
## # STAT6 <dbl>, STAT7 <dbl>, STAT8 <dbl>, STAT9 <dbl>, STAT10 <dbl>,
## # STAT11 <dbl>, STAT12 <dbl>, DECPHLDR <dbl>, OUTPHLDR <dbl>, NOPUFLG <dbl>,
## # COVROUT_M18 <dbl>, TYPEFLAG_M23 <dbl>, STEXCH <dbl>, PrivateCat_M23 <dbl>,
## # HOSPINSX <dbl>, MSUPINSX <dbl>, DENTLINS <dbl>, DENTLINX <dbl>, …
Running the code fromt the source file.
# Define start and end positions to read fixed-width file
pos_start <- c(
1, 36, 46, 56, 67, 92, 106, 108, 109, 123,
125, 128, 129, 131, 134, 135, 136, 138, 140, 142,
144, 146, 148, 150, 152, 154, 156, 158, 160, 162,
163, 164, 165, 167, 169, 171, 173, 175, 177, 179,
181, 183, 185, 188, 190, 191, 192, 200, 207, 215,
217, 220, 222, 224, 226, 229)
pos_end <- c(
35, 45, 55, 66, 91, 105, 107, 108, 122, 124,
127, 128, 130, 133, 134, 135, 137, 139, 141, 143,
145, 147, 149, 151, 153, 155, 157, 159, 161, 162,
163, 164, 166, 168, 170, 172, 174, 176, 178, 180,
182, 184, 187, 189, 190, 191, 199, 206, 214, 216,
219, 221, 223, 225, 228, 230)
var_names <- c(
"EPCPIDX", "DUPERSID", "PHLDRIDX", "ESTBIDX", "EPRSIDX", "InsurPrivIDEX", "PANEL", "RN", "JOBSIDX", "JOBSINFR",
"JOBSFILE", "FYFLG", "CMJINS", "EMPLSTAT", "PHOLDER", "DEPNDNT", "EVALCOVR", "STAT1", "STAT2", "STAT3",
"STAT4", "STAT5", "STAT6", "STAT7", "STAT8", "STAT9", "STAT10", "STAT11", "STAT12", "DECPHLDR",
"OUTPHLDR", "NOPUFLG", "COVROUT_M18", "TYPEFLAG_M23", "STEXCH", "PrivateCat_M23", "HOSPINSX", "MSUPINSX", "DENTLINS", "DENTLINX",
"VISIONIN", "PMEDINS", "COBRA", "PLANMETL", "COVTYPIN", "OOPELIG", "OOPPREM", "OOPPREMX", "OOPX12X", "OOPFLAG",
"PREMLEVX", "PREMSUBZ", "ANNDEDCTP", "HSAACCT", "UPRHMO_M23", "NAMECHNG")
var_types <- c(
"c", "c", "c", "c", "c", "c", "n", "n", "c", "n",
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
"n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
"n", "n", "n", "n", "n", "n")
var_types <- setNames(var_types, var_names)
# IMPORT ASCII file -----------------------
h242 <- read_fwf(
meps_path,
col_positions =
fwf_positions(
start = pos_start,
end = pos_end,
col_names = var_names),
col_types = var_types)
Notice that the output = FALSE
parameter is added to the
code chunk to prevent printing of the R output values. To work witih the
data locally, be sure to run the code chunk manually.
We will then save the data file. Uuncomment to run a manual save.
# save(h242, file ="h242.Rdata")
meps <- as_data_frame(h242)
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## ℹ Please use `as_tibble()` (with slightly different semantics) to convert to a
## tibble, or `as.data.frame()` to convert to a data frame.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Convert character variables to numeric
meps <- meps %>%
mutate_if(is.character, ~ as.numeric(.))
# Check the structure of the modified data frame
str(meps)
## tibble [41,617 × 56] (S3: tbl_df/tbl/data.frame)
## $ EPCPIDX : num [1:41617] 2.46e+34 2.46e+34 2.46e+34 2.46e+34 2.46e+34 ...
## $ DUPERSID : num [1:41617] 2.46e+09 2.46e+09 2.46e+09 2.46e+09 2.46e+09 ...
## $ PHLDRIDX : num [1:41617] 2.46e+09 2.46e+09 2.46e+09 2.46e+09 2.46e+09 ...
## $ ESTBIDX : num [1:41617] 2.46e+10 2.46e+10 2.46e+10 2.46e+10 2.46e+10 ...
## $ EPRSIDX : num [1:41617] 2.46e+24 2.46e+24 2.46e+24 2.46e+24 2.46e+24 ...
## $ InsurPrivIDEX : num [1:41617] 2.46e+13 2.46e+13 2.46e+13 2.46e+13 2.46e+13 ...
## $ PANEL : num [1:41617] 24 24 24 24 24 24 24 24 24 24 ...
## $ RN : num [1:41617] 7 8 9 7 8 9 7 7 8 8 ...
## $ JOBSIDX : num [1:41617] 2.46e+13 2.46e+13 2.46e+13 2.46e+13 2.46e+13 ...
## $ JOBSINFR : num [1:41617] 0 0 0 0 0 0 0 0 0 0 ...
## $ JOBSFILE : num [1:41617] 237 237 237 237 237 237 237 237 237 237 ...
## $ FYFLG : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ CMJINS : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ EMPLSTAT : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ PHOLDER : num [1:41617] 1 1 1 1 1 1 1 0 1 0 ...
## $ DEPNDNT : num [1:41617] 0 0 0 0 0 0 0 1 0 1 ...
## $ EVALCOVR : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ STAT1 : num [1:41617] 1 -1 -1 1 1 -1 1 1 1 1 ...
## $ STAT2 : num [1:41617] 1 -1 -1 -1 1 -1 -1 -1 1 1 ...
## $ STAT3 : num [1:41617] 1 1 -1 -1 1 -1 -1 -1 1 1 ...
## $ STAT4 : num [1:41617] -1 1 -1 -1 1 -1 -1 -1 1 1 ...
## $ STAT5 : num [1:41617] -1 1 -1 -1 1 -1 -1 -1 1 1 ...
## $ STAT6 : num [1:41617] -1 1 -1 -1 1 -1 -1 -1 1 1 ...
## $ STAT7 : num [1:41617] -1 1 -1 -1 1 1 -1 -1 1 1 ...
## $ STAT8 : num [1:41617] -1 1 1 -1 -1 1 -1 -1 1 1 ...
## $ STAT9 : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 1 1 ...
## $ STAT10 : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 1 1 ...
## $ STAT11 : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 -1 -1 ...
## $ STAT12 : num [1:41617] -1 -1 1 -1 -1 1 -1 -1 -1 -1 ...
## $ DECPHLDR : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
## $ OUTPHLDR : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
## $ NOPUFLG : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
## $ COVROUT_M18 : num [1:41617] 2 2 2 2 2 2 -1 -1 -1 -1 ...
## $ TYPEFLAG_M23 : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ STEXCH : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ PrivateCat_M23: num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ HOSPINSX : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ MSUPINSX : num [1:41617] 2 2 2 2 2 2 2 2 2 2 ...
## $ DENTLINS : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ DENTLINX : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ VISIONIN : num [1:41617] 1 1 1 1 1 1 1 1 1 1 ...
## $ PMEDINS : num [1:41617] 2 2 2 1 1 1 1 1 1 1 ...
## $ COBRA : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ PLANMETL : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ COVTYPIN : num [1:41617] 1 1 1 1 1 1 2 2 2 2 ...
## $ OOPELIG : num [1:41617] 1 2 2 1 2 2 1 1 2 2 ...
## $ OOPPREM : num [1:41617] 358 -1 -1 -8 -1 ...
## $ OOPPREMX : num [1:41617] 358 -1 -1 216 -1 ...
## $ OOPX12X : num [1:41617] 4290 -1 -1 2591 -1 ...
## $ OOPFLAG : num [1:41617] 0 -1 -1 1 -1 -1 0 0 -1 -1 ...
## $ PREMLEVX : num [1:41617] 2 -1 -1 2 -1 -1 4 4 -1 -1 ...
## $ PREMSUBZ : num [1:41617] -1 -1 -1 -1 -1 -1 -1 -1 -1 -1 ...
## $ ANNDEDCTP : num [1:41617] 3 -1 -1 4 -1 -1 5 5 -1 -1 ...
## $ HSAACCT : num [1:41617] 2 -1 -1 2 -1 -1 -1 -1 -1 -1 ...
## $ UPRHMO_M23 : num [1:41617] 2 2 2 2 2 2 1 1 1 1 ...
## $ NAMECHNG : num [1:41617] -1 2 2 2 2 2 2 2 2 2 ...
Subset data for only those variables you would like to use.
meps_subset <- dplyr::select(meps,
DUPERSID,
PANEL,
OOPX12X,
OOPPREMX,
OOPPREM,
OOPELIG,
OOPFLAG,
HSAACCT)
meps_subset # will print first few rows of the new dataframe
## # A tibble: 41,617 × 8
## DUPERSID PANEL OOPX12X OOPPREMX OOPPREM OOPELIG OOPFLAG HSAACCT
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2460010101 24 4290 358. 358. 1 0 2
## 2 2460010101 24 -1 -1 -1 2 -1 -1
## 3 2460010101 24 -1 -1 -1 2 -1 -1
## 4 2460018101 24 2591. 216. -8 1 1 2
## 5 2460018101 24 -1 -1 -1 2 -1 -1
## 6 2460018101 24 -1 -1 -1 2 -1 -1
## 7 2460026101 24 0 0 0 1 0 -1
## 8 2460026103 24 0 0 0 1 0 -1
## 9 2460026101 24 -1 -1 -1 2 -1 -1
## 10 2460026103 24 -1 -1 -1 2 -1 -1
## # ℹ 41,607 more rows
We then rearrange the data by key variables.
meps_subset <- meps_subset %>%
select(-PANEL) %>%
arrange(-OOPX12X)
meps_subset
## # A tibble: 41,617 × 7
## DUPERSID OOPX12X OOPPREMX OOPPREM OOPELIG OOPFLAG HSAACCT
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2683204101 55000. 4583. -8 1 1 2
## 2 2683204105 55000. 4583. -8 1 1 2
## 3 2795834101 55000. 4583. 4583. 1 0 1
## 4 2795834102 55000. 4583. 4583. 1 0 1
## 5 2683096101 36000 3000 3000 1 0 1
## 6 2683096102 36000 3000 3000 1 0 1
## 7 2683096103 36000 3000 3000 1 0 1
## 8 2683096104 36000 3000 3000 1 0 1
## 9 2683096105 36000 3000 3000 1 0 1
## 10 2683096106 36000 3000 3000 1 0 1
## # ℹ 41,607 more rows
Subset specific inquiries. Here I subset high out-of-pocket expenses.
high_oopx <- meps_subset %>%
dplyr::filter(OOPX12X >= 35999.00)
summary(high_oopx$OOPX12X)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 36000 36000 36000 41429 50250 55000
summary(high_oopx)
## DUPERSID OOPX12X OOPPREMX OOPPREM OOPELIG
## Min. :2.683e+09 Min. :36000 Min. :3000 Min. : -8 Min. :1
## 1st Qu.:2.683e+09 1st Qu.:36000 1st Qu.:3000 1st Qu.: -8 1st Qu.:1
## Median :2.683e+09 Median :36000 Median :3000 Median :3000 Median :1
## Mean :2.700e+09 Mean :41429 Mean :3452 Mean :1937 Mean :1
## 3rd Qu.:2.684e+09 3rd Qu.:50250 3rd Qu.:4187 3rd Qu.:3000 3rd Qu.:1
## Max. :2.796e+09 Max. :55000 Max. :4583 Max. :4583 Max. :1
## OOPFLAG HSAACCT
## Min. :0.0000 Min. :1.000
## 1st Qu.:0.0000 1st Qu.:1.000
## Median :0.0000 Median :1.000
## Mean :0.4286 Mean :1.143
## 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :2.000
Additional information can be found online here.
Agency for Healthcare Research and Quality. (n.d.). Medical Expenditure Panel Survey (MEPS). Retrieved from https://www.ahrq.gov/data/meps.html