We imported 2 csv files from Milestone 3 i.e., 1. medicare_modified, which contains state-level medicare enrolment, utilisation, total medicare spending, and a spending per capita comparable. 2. mortality_rate that contains state-level merged data on total deaths and total population for age groups 65 plus with a mortality rate per 1000 comparale.

library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
medicare_modified <- read_csv('./data_sets_for_milestone_4/medicare_modified.csv')
## Rows: 51 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): residence_state
## dbl (4): total_traditional_medicare_part_a_and_or_part_b_enrollees, total_me...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
mortality_rate <- read_csv('./data_sets_for_milestone_4/mortality_rate.csv')
## Rows: 51 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): residence_state
## dbl (3): total_deaths_65_plus, total_population_65_plus, mortality_rate_per_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
joined_data <- medicare_modified %>%
  left_join(mortality_rate, by = "residence_state")

str(joined_data)
## spc_tbl_ [51 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ residence_state                                             : chr [1:51] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ total_traditional_medicare_part_a_and_or_part_b_enrollees   : num [1:51] 528983 105831 767513 430724 3436742 ...
##  $ total_medicare_part_a_and_or_part_b_persons_with_utilization: num [1:51] 512627 90717 720762 409441 3009051 ...
##  $ total_medicare_part_a_and_or_part_b_program_payments        : num [1:51] 5.65e+09 1.05e+09 7.75e+09 4.27e+09 4.33e+10 ...
##  $ medicare_spending_per_person                                : num [1:51] 10688 9939 10092 9919 12586 ...
##  $ total_deaths_65_plus                                        : num [1:51] 46667 3365 57512 27283 238984 ...
##  $ total_population_65_plus                                    : num [1:51] 888817 97663 1333046 528101 5957092 ...
##  $ mortality_rate_per_1000                                     : num [1:51] 52.5 34.5 43.1 51.7 40.1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   residence_state = col_character(),
##   ..   total_traditional_medicare_part_a_and_or_part_b_enrollees = col_double(),
##   ..   total_medicare_part_a_and_or_part_b_persons_with_utilization = col_double(),
##   ..   total_medicare_part_a_and_or_part_b_program_payments = col_double(),
##   ..   medicare_spending_per_person = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
head(joined_data) 
## # A tibble: 6 × 8
##   residence_state total_traditional_medicare_part_a_and…¹ total_medicare_part_…²
##   <chr>                                             <dbl>                  <dbl>
## 1 Alabama                                          528983                 512627
## 2 Alaska                                           105831                  90717
## 3 Arizona                                          767513                 720762
## 4 Arkansas                                         430724                 409441
## 5 California                                      3436742                3009051
## 6 Colorado                                         518796                 467294
## # ℹ abbreviated names:
## #   ¹​total_traditional_medicare_part_a_and_or_part_b_enrollees,
## #   ²​total_medicare_part_a_and_or_part_b_persons_with_utilization
## # ℹ 5 more variables:
## #   total_medicare_part_a_and_or_part_b_program_payments <dbl>,
## #   medicare_spending_per_person <dbl>, total_deaths_65_plus <dbl>,
## #   total_population_65_plus <dbl>, mortality_rate_per_1000 <dbl>

Visualization 1: Boxplot of Mortality Rates by Expenditure Quartiles

This visualization divides states into four quartiles based on their per-person Medicare spending and shows the distribution of mortality rates within each quartile. It allows us to identify whether higher spending correlates with lower mortality rates by comparing the median and range of mortality rates across expenditure levels.

library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(stringr)

# We Need to create quartiles for boxplot
joined_data_quartiles <- joined_data %>% 
  mutate(Expenditure_Quartile = ntile(medicare_spending_per_person, 4),
         mortality_rate_per_1000 = round(mortality_rate_per_1000, 2)) %>% 
  mutate(Expenditure_Quartile = case_when(
    Expenditure_Quartile == 1 ~ "Q1",
    Expenditure_Quartile == 2 ~ "Q2", 
    Expenditure_Quartile == 3 ~ "Q3",
    Expenditure_Quartile == 4 ~ "Q4"
    ))

head(joined_data_quartiles$Expenditure_Quartile)
## [1] "Q3" "Q2" "Q2" "Q2" "Q4" "Q1"
ggplot(joined_data_quartiles, aes(x = Expenditure_Quartile, y = mortality_rate_per_1000)) + 
  geom_boxplot(fill = "lightblue", alpha = 0.7, outlier.color = "black") + 
  labs(title = "Mortality Rates by Medicare Expenditure Quartiles", 
       x = "Expenditure Quartiles",
       y = "Mortality Rate (per 1,000)",
       caption = "Data Source: Medicare and Mortality Datasets"
       ) + 
  theme_minimal()

### Additional interactive plot 

interactive_boxplot <- plot_ly(data = joined_data_quartiles, y = ~mortality_rate_per_1000, color = ~Expenditure_Quartile, type = "box") %>% 
  layout(title = 'Mortality Rates by Medicare Expenditure Quartiles', 
         xaxis = list(title = 'Expenditure Quartiles'), yaxis = list(title = 'Mortality Rate'))
interactive_boxplot
## Warning: `arrange_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `arrange()` instead.
## ℹ See vignette('programming') for more help
## ℹ The deprecated feature was likely used in the plotly package.
##   Please report the issue at <https://github.com/ropensci/plotly/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Visualization 2: Table 1

This table lists the top 10 states with the highest mortality rates per 1000 people, alongside their Medicare spending and enrollment. It highlights states where mortality remains high despite significant Medicare expenditures, suggesting potential inefficiencies in healthcare spending or disparities in healthcare access.

# Let us fix and edit column names for table 
joined_data_clean <- joined_data %>% 
  rename(
    State = residence_state,
    Medicare_Enrollees = total_traditional_medicare_part_a_and_or_part_b_enrollees,
    Medicare_Utilization = total_medicare_part_a_and_or_part_b_persons_with_utilization,
    Medicare_Spending = total_medicare_part_a_and_or_part_b_program_payments,
    Spending_Per_Person = medicare_spending_per_person,
    Mortality_Rate = mortality_rate_per_1000,
    Total_Deaths_65Plus = total_deaths_65_plus,
    Population_65Plus = total_population_65_plus
  )


data_table <- joined_data_clean %>% 
  select(State, Medicare_Enrollees, Medicare_Spending, Mortality_Rate) %>% 
  mutate(Mortality_Rate = round(Mortality_Rate, 2)) %>% 
  arrange(desc(Mortality_Rate)) %>% 
  head(10)


medicare_data_table <- data_table %>% 
   kable("html", col.names = c("State", "Medicare Enrollees", "Medicare Spending ($)", "Mortality Rate (per 1,000)"),
        caption = "Top 10 States with Highest Mortality Rates and Medicare Spending") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE) %>%
  footnote(general = "Data Source: Medicare and Mortality datasets")

medicare_data_table
Top 10 States with Highest Mortality Rates and Medicare Spending
State Medicare Enrollees Medicare Spending ($) Mortality Rate (per 1,000)
West Virginia 254195 2671150063 54.80
Mississippi 431483 4931158545 54.26
Oklahoma 528377 5988040719 54.10
Kentucky 536961 5434363793 52.87
Alabama 528983 5653598984 52.50
Arkansas 430724 4272207750 51.66
Tennessee 766844 7583702280 51.63
Ohio 1227265 13045771320 50.87
Indiana 775334 8340501710 49.70
Louisiana 473575 5517151179 49.40
Note:
Data Source: Medicare and Mortality datasets

Visualisation 3: Table 2: US Medicare Spending Per Capita and Mortality Rates; All states

Visualization 4; Scatter Plot comparing Medicare spending and mortality rates

The scatter plot highlights the relationship between Medicare spending per person and mortality rates per 1000. It includes a red trendline to indicate the overall correlation, revealing whether higher spending results in better health outcomes (lower mortality rates) or if other factors influence this relationship.

library(plotly)

scatter_plot <- plot_ly(
  data = joined_data_clean,
  x = ~`Spending Per Person ($)`,               
  y = ~`Mortality Rate per 1000`,               
  type = 'scatter',
  mode = 'markers',
  marker = list(size = 10, color = 'blue', opacity = 0.7),  
  text = ~paste(
    "State: ", State, "<br>",
    "Spending: $", `Spending Per Person ($)`, "<br>",
    "Mortality Rate: ", `Mortality Rate per 1000`, " per 1000"
  ),                                           
  hoverinfo = 'text'                           
) %>%
  add_lines(
    x = ~`Spending Per Person ($)`,
    y = fitted(lm(`Mortality Rate per 1000` ~ `Spending Per Person ($)`, data = joined_data_clean)),
    line = list(color = 'red', dash = 'dash'),
    name = "Trendline"
  ) %>%
  layout(
    title = "Relationship Between Medicare Spending and Mortality Rates",
    xaxis = list(title = "Medicare Spending Per Person ($)"),
    yaxis = list(title = "Mortality Rate (per 1000)"),
    showlegend = FALSE
  )

scatter_plot
## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...