We imported 2 csv files from Milestone 3 i.e., 1. medicare_modified, which contains state-level medicare enrolment, utilisation, total medicare spending, and a spending per capita comparable. 2. mortality_rate that contains state-level merged data on total deaths and total population for age groups 65 plus with a mortality rate per 1000 comparale.

library(readr)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

medicare_modified <- read_csv('./data_sets_for_milestone_4/medicare_modified.csv')

## Rows: 51 Columns: 5

## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): residence_state
## dbl (4): total_traditional_medicare_part_a_and_or_part_b_enrollees, total_me...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

mortality_rate <- read_csv('./data_sets_for_milestone_4/mortality_rate.csv')

## Rows: 51 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): residence_state
## dbl (3): total_deaths_65_plus, total_population_65_plus, mortality_rate_per_...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

joined_data <- medicare_modified %>%
  left_join(mortality_rate, by = "residence_state")

str(joined_data)

## spc_tbl_ [51 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ residence_state                                             : chr [1:51] "Alabama" "Alaska" "Arizona" "Arkansas" ...
##  $ total_traditional_medicare_part_a_and_or_part_b_enrollees   : num [1:51] 528983 105831 767513 430724 3436742 ...
##  $ total_medicare_part_a_and_or_part_b_persons_with_utilization: num [1:51] 512627 90717 720762 409441 3009051 ...
##  $ total_medicare_part_a_and_or_part_b_program_payments        : num [1:51] 5.65e+09 1.05e+09 7.75e+09 4.27e+09 4.33e+10 ...
##  $ medicare_spending_per_person                                : num [1:51] 10688 9939 10092 9919 12586 ...
##  $ total_deaths_65_plus                                        : num [1:51] 46667 3365 57512 27283 238984 ...
##  $ total_population_65_plus                                    : num [1:51] 888817 97663 1333046 528101 5957092 ...
##  $ mortality_rate_per_1000                                     : num [1:51] 52.5 34.5 43.1 51.7 40.1 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   residence_state = col_character(),
##   ..   total_traditional_medicare_part_a_and_or_part_b_enrollees = col_double(),
##   ..   total_medicare_part_a_and_or_part_b_persons_with_utilization = col_double(),
##   ..   total_medicare_part_a_and_or_part_b_program_payments = col_double(),
##   ..   medicare_spending_per_person = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>

head(joined_data)

## # A tibble: 6 × 8
##   residence_state total_traditional_medicare_part_a_and…¹ total_medicare_part_…²
##   <chr>                                             <dbl>                  <dbl>
## 1 Alabama                                          528983                 512627
## 2 Alaska                                           105831                  90717
## 3 Arizona                                          767513                 720762
## 4 Arkansas                                         430724                 409441
## 5 California                                      3436742                3009051
## 6 Colorado                                         518796                 467294
## # ℹ abbreviated names:
## #   ¹total_traditional_medicare_part_a_and_or_part_b_enrollees,
## #   ²total_medicare_part_a_and_or_part_b_persons_with_utilization
## # ℹ 5 more variables:
## #   total_medicare_part_a_and_or_part_b_program_payments <dbl>,
## #   medicare_spending_per_person <dbl>, total_deaths_65_plus <dbl>,
## #   total_population_65_plus <dbl>, mortality_rate_per_1000 <dbl>

Visualization 1: Boxplot of Mortality Rates by Expenditure Quartiles

This visualization divides states into four quartiles based on their per-person Medicare spending and shows the distribution of mortality rates within each quartile. It allows us to identify whether higher spending correlates with lower mortality rates by comparing the median and range of mortality rates across expenditure levels.

library(knitr)
library(kableExtra)

## 
## Attaching package: 'kableExtra'

## The following object is masked from 'package:dplyr':
## 
##     group_rows

library(ggplot2)
library(plotly)

## 
## Attaching package: 'plotly'

## The following object is masked from 'package:ggplot2':
## 
##     last_plot

## The following object is masked from 'package:stats':
## 
##     filter

## The following object is masked from 'package:graphics':
## 
##     layout

library(stringr)

# We Need to create quartiles for boxplot
joined_data_quartiles <- joined_data %>% 
  mutate(Expenditure_Quartile = ntile(medicare_spending_per_person, 4),
         mortality_rate_per_1000 = round(mortality_rate_per_1000, 2)) %>% 
  mutate(Expenditure_Quartile = case_when(
    Expenditure_Quartile == 1 ~ "Q1",
    Expenditure_Quartile == 2 ~ "Q2", 
    Expenditure_Quartile == 3 ~ "Q3",
    Expenditure_Quartile == 4 ~ "Q4"
    ))

head(joined_data_quartiles$Expenditure_Quartile)

## [1] "Q3" "Q2" "Q2" "Q2" "Q4" "Q1"

ggplot(joined_data_quartiles, aes(x = Expenditure_Quartile, y = mortality_rate_per_1000)) + 
  geom_boxplot(fill = "lightblue", alpha = 0.7, outlier.color = "black") + 
  labs(title = "Mortality Rates by Medicare Expenditure Quartiles", 
       x = "Expenditure Quartiles",
       y = "Mortality Rate (per 1,000)",
       caption = "Data Source: Medicare and Mortality Datasets"
       ) + 
  theme_minimal()

### Additional interactive plot 

interactive_boxplot <- plot_ly(data = joined_data_quartiles, y = ~mortality_rate_per_1000, color = ~Expenditure_Quartile, type = "box") %>% 
  layout(title = 'Mortality Rates by Medicare Expenditure Quartiles', 
         xaxis = list(title = 'Expenditure Quartiles'), yaxis = list(title = 'Mortality Rate'))
interactive_boxplot

## Warning: `arrange_()` was deprecated in dplyr 0.7.0.
## ℹ Please use `arrange()` instead.
## ℹ See vignette('programming') for more help
## ℹ The deprecated feature was likely used in the plotly package.
##   Please report the issue at <https://github.com/ropensci/plotly/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Visualization 2: Table 1

This table lists the top 10 states with the highest mortality rates per 1000 people, alongside their Medicare spending and enrollment. It highlights states where mortality remains high despite significant Medicare expenditures, suggesting potential inefficiencies in healthcare spending or disparities in healthcare access.

# Let us fix and edit column names for table 
joined_data_clean <- joined_data %>% 
  rename(
    State = residence_state,
    Medicare_Enrollees = total_traditional_medicare_part_a_and_or_part_b_enrollees,
    Medicare_Utilization = total_medicare_part_a_and_or_part_b_persons_with_utilization,
    Medicare_Spending = total_medicare_part_a_and_or_part_b_program_payments,
    Spending_Per_Person = medicare_spending_per_person,
    Mortality_Rate = mortality_rate_per_1000,
    Total_Deaths_65Plus = total_deaths_65_plus,
    Population_65Plus = total_population_65_plus
  )


data_table <- joined_data_clean %>% 
  select(State, Medicare_Enrollees, Medicare_Spending, Mortality_Rate) %>% 
  mutate(Mortality_Rate = round(Mortality_Rate, 2)) %>% 
  arrange(desc(Mortality_Rate)) %>% 
  head(10)


medicare_data_table <- data_table %>% 
   kable("html", col.names = c("State", "Medicare Enrollees", "Medicare Spending ($)", "Mortality Rate (per 1,000)"),
        caption = "Top 10 States with Highest Mortality Rates and Medicare Spending") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE) %>%
  footnote(general = "Data Source: Medicare and Mortality datasets")

medicare_data_table

Top 10 States with Highest Mortality Rates and Medicare Spending
State	Medicare Enrollees	Medicare Spending ($)	Mortality Rate (per 1,000)
West Virginia	254195	2671150063	54.80
Mississippi	431483	4931158545	54.26
Oklahoma	528377	5988040719	54.10
Kentucky	536961	5434363793	52.87
Alabama	528983	5653598984	52.50
Arkansas	430724	4272207750	51.66
Tennessee	766844	7583702280	51.63
Ohio	1227265	13045771320	50.87
Indiana	775334	8340501710	49.70
Louisiana	473575	5517151179	49.40
Note:
Data Source: Medicare and Mortality datasets

Visualisation 3: Table 2: US Medicare Spending Per Capita and Mortality Rates; All states

This table provides a comprehensive overview of all states, displaying Medicare enrollees, Medicare utilization, spending per person, and mortality rate per 1000 people. It allows for state-by-state comparisons to identify patterns, outliers, or trends in spending and health outcomes.

library(DT)
library(dplyr)

joined_data_clean <- joined_data_clean %>%
  select(-Medicare_Spending, -Total_Deaths_65Plus, -Population_65Plus) %>%
  rename(
    `Spending Per Person ($)` = Spending_Per_Person,
    `Mortality Rate per 1000` = Mortality_Rate,
    `Medicare Enrollees` = Medicare_Enrollees,
    `Medicare Utilization` = Medicare_Utilization
  )

joined_data_clean$'Spending Per Person ($)' <- round(joined_data_clean$'Spending Per Person ($)', 0)
joined_data_clean$'Mortality Rate per 1000' <- round(joined_data_clean$'Mortality Rate per 1000', 0)

datatable(
  joined_data_clean,
  rownames = FALSE, 
  options = list(
    dom = 'Bfrtip',                    
    buttons = c('csv', 'excel', 'pdf'),
    pageLength = 10,                
    autoWidth = TRUE,                  
    scrollX = TRUE,                    
    columnDefs = list(
      list(className = 'dt-center', targets = 1:4)
    )
  ),
  extensions = c('Buttons'),           
  class = "cell-border stripe",   
  caption = "US Medicare Spending Per Capita and Mortality Rates"
)

Visualization 4; Scatter Plot comparing Medicare spending and mortality rates

The scatter plot highlights the relationship between Medicare spending per person and mortality rates per 1000. It includes a red trendline to indicate the overall correlation, revealing whether higher spending results in better health outcomes (lower mortality rates) or if other factors influence this relationship.

library(plotly)

scatter_plot <- plot_ly(
  data = joined_data_clean,
  x = ~`Spending Per Person ($)`,               
  y = ~`Mortality Rate per 1000`,               
  type = 'scatter',
  mode = 'markers',
  marker = list(size = 10, color = 'blue', opacity = 0.7),  
  text = ~paste(
    "State: ", State, "<br>",
    "Spending: $", `Spending Per Person ($)`, "<br>",
    "Mortality Rate: ", `Mortality Rate per 1000`, " per 1000"
  ),                                           
  hoverinfo = 'text'                           
) %>%
  add_lines(
    x = ~`Spending Per Person ($)`,
    y = fitted(lm(`Mortality Rate per 1000` ~ `Spending Per Person ($)`, data = joined_data_clean)),
    line = list(color = 'red', dash = 'dash'),
    name = "Trendline"
  ) %>%
  layout(
    title = "Relationship Between Medicare Spending and Mortality Rates",
    xaxis = list(title = "Medicare Spending Per Person ($)"),
    yaxis = list(title = "Mortality Rate (per 1000)"),
    showlegend = FALSE
  )

scatter_plot

## A marker object has been specified, but markers is not in the mode
## Adding markers to the mode...

Milestone 4 Group 10

2024-11-25

Visualization 1: Boxplot of Mortality Rates by Expenditure Quartiles

Visualization 2: Table 1

Visualisation 3: Table 2: US Medicare Spending Per Capita and Mortality Rates; All states

This table provides a comprehensive overview of all states, displaying Medicare enrollees, Medicare utilization, spending per person, and mortality rate per 1000 people. It allows for state-by-state comparisons to identify patterns, outliers, or trends in spending and health outcomes.

Visualization 4; Scatter Plot comparing Medicare spending and mortality rates