library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
medicare <- read_csv("Medicare_Physician_Other_Practitioners_by_Geography_and_Service_2022.csv")
## Rows: 270673 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): Rndrng_Prvdr_Geo_Lvl, Rndrng_Prvdr_Geo_Cd, Rndrng_Prvdr_Geo_Desc, H...
## dbl (8): Tot_Rndrng_Prvdrs, Tot_Benes, Tot_Srvcs, Tot_Bene_Day_Srvcs, Avg_Sb...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Answer: 270,673 rows × 15 columns
str(medicare)
## spc_tbl_ [270,673 × 15] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Rndrng_Prvdr_Geo_Lvl : chr [1:270673] "State" "State" "State" "State" ...
## $ Rndrng_Prvdr_Geo_Cd : chr [1:270673] "9E" "9E" "9E" "9E" ...
## $ Rndrng_Prvdr_Geo_Desc: chr [1:270673] "Foreign Country" "Foreign Country" "Foreign Country" "Foreign Country" ...
## $ HCPCS_Cd : chr [1:270673] "J1885" "G0439" "G0416" "G0283" ...
## $ HCPCS_Desc : chr [1:270673] "Injection, ketorolac tromethamine, per 15 mg" "Annual wellness visit, includes a personalized prevention plan of service (pps), subsequent visit" "Surgical pathology, gross and microscopic examinations, for prostate needle biopsy, any method" "Electrical stimulation (unattended), to one or more areas for indication(s) other than wound care, as part of a"| __truncated__ ...
## $ HCPCS_Drug_Ind : chr [1:270673] "Y" "N" "N" "N" ...
## $ Place_Of_Srvc : chr [1:270673] "O" "O" "F" "O" ...
## $ Tot_Rndrng_Prvdrs : num [1:270673] 6 3 3 2 3 3 3 3 2 1 ...
## $ Tot_Benes : num [1:270673] 21 37 89 19 13 14 38 51 50 19 ...
## $ Tot_Srvcs : num [1:270673] 29 37 89 93 13 14 38 54 347 41 ...
## $ Tot_Bene_Day_Srvcs : num [1:270673] 23 37 89 93 13 14 38 54 347 36 ...
## $ Avg_Sbmtd_Chrg : num [1:270673] 39.8 226.4 821.9 41.7 947.5 ...
## $ Avg_Mdcr_Alowd_Amt : num [1:270673] 0.564 119.932 178.407 9.294 178.261 ...
## $ Avg_Mdcr_Pymt_Amt : num [1:270673] 0.375 119.932 142.263 7.025 178.261 ...
## $ Avg_Mdcr_Stdzd_Amt : num [1:270673] 0.373 128.837 139.189 7.065 183.48 ...
## - attr(*, "spec")=
## .. cols(
## .. Rndrng_Prvdr_Geo_Lvl = col_character(),
## .. Rndrng_Prvdr_Geo_Cd = col_character(),
## .. Rndrng_Prvdr_Geo_Desc = col_character(),
## .. HCPCS_Cd = col_character(),
## .. HCPCS_Desc = col_character(),
## .. HCPCS_Drug_Ind = col_character(),
## .. Place_Of_Srvc = col_character(),
## .. Tot_Rndrng_Prvdrs = col_double(),
## .. Tot_Benes = col_double(),
## .. Tot_Srvcs = col_double(),
## .. Tot_Bene_Day_Srvcs = col_double(),
## .. Avg_Sbmtd_Chrg = col_double(),
## .. Avg_Mdcr_Alowd_Amt = col_double(),
## .. Avg_Mdcr_Pymt_Amt = col_double(),
## .. Avg_Mdcr_Stdzd_Amt = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
summary(medicare)
## Rndrng_Prvdr_Geo_Lvl Rndrng_Prvdr_Geo_Cd Rndrng_Prvdr_Geo_Desc
## Length:270673 Length:270673 Length:270673
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
## HCPCS_Cd HCPCS_Desc HCPCS_Drug_Ind Place_Of_Srvc
## Length:270673 Length:270673 Length:270673 Length:270673
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Tot_Rndrng_Prvdrs Tot_Benes Tot_Srvcs Tot_Bene_Day_Srvcs
## Min. : 1.0 Min. : 11 Min. : 11 Min. : 11
## 1st Qu.: 11.0 1st Qu.: 30 1st Qu.: 40 1st Qu.: 38
## Median : 29.0 Median : 106 Median : 162 Median : 143
## Mean : 266.8 Mean : 5343 Mean : 23595 Mean : 10396
## 3rd Qu.: 95.0 3rd Qu.: 586 3rd Qu.: 1102 3rd Qu.: 831
## Max. :601911.0 Max. :21459588 Max. :103325664 Max. :90436622
## Avg_Sbmtd_Chrg Avg_Mdcr_Alowd_Amt Avg_Mdcr_Pymt_Amt Avg_Mdcr_Stdzd_Amt
## Min. : 0.0 Min. : 0.00 Min. : 0.00 Min. : 0.00
## 1st Qu.: 127.0 1st Qu.: 35.13 1st Qu.: 27.78 1st Qu.: 27.60
## Median : 440.6 Median : 110.84 Median : 85.52 Median : 85.03
## Mean : 1309.7 Mean : 291.80 Mean : 232.25 Mean : 230.06
## 3rd Qu.: 1606.7 3rd Qu.: 315.65 3rd Qu.: 250.72 3rd Qu.: 249.34
## Max. :99509.8 Max. :58494.73 Max. :46612.60 Max. :46577.95
The following table represents the variable classification in the dataset. The Rndrng_Prvdr_Geo_Lvl, Rndrng_Prvdr_Geo_Cd, Rndrng_Prvdr_Geo_Desc, HCPCS_Drug_Ind, Place_Of_Srvc, and HCPCS_Cd are either binary or nominal variables. The Tot_Rndrng_Prvdrs, Tot_Benes, Avg_Sbmtd_Chrg, Avg_Mdcr_Alowd_Amt, and Avg_Mdcr_Pymt_Amt are either discrete or continuous variables. The details are provided below:
Name | Data Type in R | Variable Classification | Reasoning |
---|---|---|---|
Rndrng_Prvdr_Geo_Lvl |
character | Binary | Geographic levels (“State” or “Foreign Country”) without inherent ordering |
Rndrng_Prvdr_Geo_Cd |
character | Nominal | Represents FIPS code identifiers without a meaningful order |
Rndrng_Prvdr_Geo_Desc |
character | Nominal | Geographic regions (e.g., “California”, “Oregon”, etc.) without inherent ordering |
HCPCS_Drug_Ind |
character | Binary | Indicates whether HCPCS code represents a drug listed on Medicare Part B Drug ASP File (‘Y’ or ‘N’) |
Place_Of_Srvc |
character | Binary | Indicates if service was in a facility (‘F’) or non-facility (‘O’) |
Tot_Rndrng_Prvdrs |
numeric | Discrete | Represents the number of rendering providers (can only be discrete numbers) |
Tot_Benes |
numeric | Discrete | Represents the number of beneficiaries (can only be discrete numbers) |
Avg_Sbmtd_Chrg |
numeric | Continuous | Represents the average submitted charge (this is a continuous amount) |
Avg_Mdcr_Alowd_Amt |
numeric | Continuous | Represents the average allowed Medicare pay-back amount (this is a continuous amount) |
Avg_Mdcr_Pymt_Amt |
numeric | Continuous | Represents the average Medicare pay-back amount (this is a continuous amount) |
Avg_Mdcr_Stdzd_Amt |
numeric | Continuous | Represents the average Medicare standardized pay-back amount (this is a continuous amount) |
HCPCS_Cd |
character | Nominal | Represents Healthcare Common Procedure Coding System codes (does not have numeric values or ordering) |
The following questions could be answered/explored from this dataset:
ggplot(medicare, aes(Tot_Rndrng_Prvdrs, Tot_Benes, color = Rndrng_Prvdr_Geo_Lvl)) +
geom_point() +
scale_x_continuous(trans="log2") +
scale_y_continuous(trans="log2")
ggplot(medicare, aes(Tot_Benes, Tot_Srvcs, color = Rndrng_Prvdr_Geo_Lvl)) +
geom_point() +
scale_x_continuous(trans="log2") +
scale_y_continuous(trans="log2")
ggplot(medicare, aes(Avg_Sbmtd_Chrg, Avg_Mdcr_Pymt_Amt, color = Rndrng_Prvdr_Geo_Lvl)) +
geom_point()
ggplot(medicare, aes(x = Rndrng_Prvdr_Geo_Lvl, y = Tot_Rndrng_Prvdrs)) +
geom_boxplot() +
scale_y_continuous(trans="log2")
ggplot(medicare, aes(x = Place_Of_Srvc, y = Avg_Mdcr_Pymt_Amt, fill = Rndrng_Prvdr_Geo_Lvl)) +
geom_boxplot() +
scale_y_continuous(trans="log2")
## Warning in scale_y_continuous(trans = "log2"): log-2 transformation introduced
## infinite values.
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_boxplot()`).
# Difference between submitted amount and allowed aid
medicare <- mutate(medicare, Avg_Sbmtd_Alowd_Dif = Avg_Sbmtd_Chrg - Avg_Mdcr_Alowd_Amt)
head(medicare)
## # A tibble: 6 × 16
## Rndrng_Prvdr_Geo_Lvl Rndrng_Prvdr_Geo_Cd Rndrng_Prvdr_Geo_Desc HCPCS_Cd
## <chr> <chr> <chr> <chr>
## 1 State 9E Foreign Country J1885
## 2 State 9E Foreign Country G0439
## 3 State 9E Foreign Country G0416
## 4 State 9E Foreign Country G0283
## 5 State 9E Foreign Country G0121
## 6 State 9E Foreign Country G0105
## # ℹ 12 more variables: HCPCS_Desc <chr>, HCPCS_Drug_Ind <chr>,
## # Place_Of_Srvc <chr>, Tot_Rndrng_Prvdrs <dbl>, Tot_Benes <dbl>,
## # Tot_Srvcs <dbl>, Tot_Bene_Day_Srvcs <dbl>, Avg_Sbmtd_Chrg <dbl>,
## # Avg_Mdcr_Alowd_Amt <dbl>, Avg_Mdcr_Pymt_Amt <dbl>,
## # Avg_Mdcr_Stdzd_Amt <dbl>, Avg_Sbmtd_Alowd_Dif <dbl>
The following chart demonstrates that the difference between the Medicare aid allowed versus the actual adjusted bill from the allowed amount and submitted amount. The plot demonstrates that Medicare does not often supply enough coverage to patients as there is a large difference between the aid received and the actual bill.
filtered_medicare <- medicare %>%
filter(Avg_Mdcr_Pymt_Amt > 0, Avg_Sbmtd_Alowd_Dif > 0) %>%
arrange(desc(Avg_Mdcr_Pymt_Amt))
ggplot(filtered_medicare, aes(x = Avg_Mdcr_Pymt_Amt, y = Avg_Sbmtd_Alowd_Dif, color = Rndrng_Prvdr_Geo_Lvl)) +
geom_point(alpha = 0.7) +
scale_x_continuous(trans = "log2", labels = scales::comma_format()) +
scale_y_continuous(trans = "log2", labels = scales::comma_format()) +
labs(
title = "Scatter Plot of Medicare Aid vs. Total Bill Difference",
x = "Average Medicare Aid Amount (log2 scale)",
y = "Average Submitted Bill Difference (log2 scale)",
color = "Provider Geography Level"
)
***