You will be looking at a subset of a United States medical expenditures dataset with information on costs for different medical conditions and in different areas of the country.
You should do the following:
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
Inpatient_Charges <- read_csv("Inpatient_Charges.csv",
show_col_types = FALSE)
Inpatient_Charges <- janitor::clean_names(Inpatient_Charges)
Inpatient_Charges_Sample <- Inpatient_Charges %>%
filter(provider_state %in% c("TX","CA","NY","FL","WA")) %>%
group_by(provider_state,drg_definition) %>%
sample_frac(0.10)
Inpatient_Charges_Sample %>% tabyl(drg_definition) %>% arrange(desc(n)) %>% head(10) %>% kable()
drg_definition | n | percent |
---|---|---|
871 - SEPTICEMIA OR SEVERE SEPSIS W/O MV 96+ HOURS W MCC | 84 | 0.0144878 |
292 - HEART FAILURE & SHOCK W CC | 80 | 0.0137979 |
194 - SIMPLE PNEUMONIA & PLEURISY W CC | 78 | 0.0134529 |
470 - MAJOR JOINT REPLACEMENT OR REATTACHMENT OF LOWER EXTREMITY W/O MCC | 78 | 0.0134529 |
872 - SEPTICEMIA OR SEVERE SEPSIS W/O MV >96 HOURS W/O MCC | 78 | 0.0134529 |
291 - HEART FAILURE & SHOCK W MCC | 77 | 0.0132804 |
392 - ESOPHAGITIS, GASTROENT & MISC DIGEST DISORDERS W/O MCC | 77 | 0.0132804 |
690 - KIDNEY & URINARY TRACT INFECTIONS W/O MCC | 77 | 0.0132804 |
603 - CELLULITIS W/O MCC | 75 | 0.0129355 |
190 - CHRONIC OBSTRUCTIVE PULMONARY DISEASE W MCC | 74 | 0.0127630 |
Inpatient_Charges_Sample %>% head(1) %>% t() %>% kable()
drg_definition | 001 - HEART TRANSPLANT OR IMPLANT OF HEART ASSIST SYSTEM W MCC |
provider_id | 50454 |
provider_name | UCSF MEDICAL CENTER |
provider_street_address | 505 PARNASSUS AVE, BOX 0296 |
provider_city | SAN FRANCISCO |
provider_state | CA |
provider_zip_code | 94143 |
hospital_referral_region_hrr_description | CA - San Francisco |
total_discharges | 19 |
average_covered_charges | 1578240 |
average_total_payments | 394118.9 |
average_medicare_payments | 355075.8 |
g1 <- ggplot(Inpatient_Charges_Sample, aes(x=average_total_payments,
y=average_covered_charges))+
geom_point()+
stat_smooth()
g1
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g2 <- ggplot(Inpatient_Charges_Sample, aes(x=average_total_payments,
y=average_covered_charges,
group=provider_state))+
geom_point()+
stat_smooth(aes(colour=provider_state,fill=provider_state))
g2
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g3 <- ggplot(Inpatient_Charges_Sample, aes(x=average_total_payments,
y=average_covered_charges))+
geom_point()+
stat_smooth(aes(colour=provider_state))
g3+theme(legend.position="bottom")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
g4 <- ggplot(Inpatient_Charges_Sample, aes(x=average_total_payments,
y=average_covered_charges, colour=drg_definition))+
geom_point()+
facet_grid(. ~ provider_state)
g4+theme(legend.position="none") +
theme(axis.text.x = element_text(angle = 90, hjust = 1))