libraries
library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(patchwork)
library(ggplot2)
library(maps)
library(shiny)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ plotly::filter() masks dplyr::filter(), stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::map() masks maps::map()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(gganimate)
data cleaning
state_phys <- read_csv("Desktop/stat 3280/state_phys.csv")
## Rows: 52 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (6): Employer, Non-Group, Medicaid, Medicare, Military, Uninsured
## num (1): phys
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
state_phys
## # A tibble: 52 × 8
## state phys Employer `Non-Group` Medicaid Medicare Military Uninsured
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Alabama 13702 0.463 0.062 0.207 0.163 0.021 0.084
## 2 Alaska 2137 0.438 0.054 0.23 0.115 0.059 0.105
## 3 Arizona 19640 0.466 0.053 0.204 0.165 0.014 0.098
## 4 Arkansas 8007 0.424 0.057 0.253 0.16 0.015 0.092
## 5 California 125566 0.47 0.066 0.272 0.12 0.008 0.064
## 6 Colorado 17387 0.522 0.068 0.187 0.136 0.02 0.067
## 7 Connecticut 16304 0.513 0.05 0.227 0.148 0.006 0.056
## 8 Delaware 3554 0.479 0.042 0.216 0.187 0.008 0.068
## 9 District of… 6105 0.589 0.043 0.252 0.08 0.008 0.027
## 10 Florida 65555 0.405 0.111 0.177 0.181 0.019 0.107
## # ℹ 42 more rows
population2023 <- read_csv("Desktop/stat 3280/population2023.csv")
## New names:
## Rows: 51 Columns: 2
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): table with row headers in column A and column headers in rows 3 thr... num
## (1): ...2
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
pop2023 <- population2023 %>% rename(state = "table with row headers in column A and column headers in rows 3 through 4. (leading dots indicate sub-parts)") %>% rename(population = ...2)
state_phys <- state_phys[-52, ]
state_phys$state <- pop2023$state
state_phys <- left_join(state_phys, pop2023, by ="state")
state_phys$percent <- NULL
state_phys$percent_phys <- state_phys$phys / state_phys$population
state_phys$insured <- 1 - state_phys$Uninsured
state_phys$percent_un <- state_phys$Uninsured * 100
state_phys$state <- gsub("\\.", "", state_phys$state)
state_phys$percent_un <- state_phys$Uninsured * 100
states_data <- state_phys[-52, ]
states_data <- states_data %>%
mutate(phys_per_100k = (phys / population) * 100000)
#states_data<-states_data %>% rename(state_code = "State Code")
firstPlotData <- states_data %>%
arrange(desc(phys_per_100k)) %>%
mutate(Rank = row_number(),
Group = ifelse(Rank <= n() / 2, "Top Half", "Bottom Half"))
#plot3
us_census_bureau_regions_and_divisions <- read_csv("Desktop/stat 3280/us census bureau regions and divisions.csv")
## Rows: 51 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): State, State Code, Region, Division
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
us_census_bureau_regions_and_divisions <- us_census_bureau_regions_and_divisions %>% rename("state"="State")
median_household_income_by_state_2024 <- read_csv("Desktop/stat 3280/median-household-income-by-state-2024.csv")
## Rows: 52 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (1): MedianHouseholdIncome2021
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
median_income <- median_household_income_by_state_2024[-52, ]
states_data <- left_join(states_data, median_income, by ="state")
states_data <- left_join(states_data, us_census_bureau_regions_and_divisions, by = "state")
#plot4
states_map <- map_data("state")
states_data$state <- trimws(tolower(states_data$state))
states_data <- states_data %>%
mutate(phys_per_100k = (phys / population) * 100000)
states_data <- states_data %>%
rename("state_code" = "State Code")
map_data <- left_join(states_map, states_data, by = c("region" = "state"))
bottom_10 <- states_data %>%
arrange(phys_per_100k) %>%
slice_head(n = 10)
top_10 <- states_data %>%
arrange(desc(phys_per_100k)) %>%
slice_head(n = 10)
final2 <- ggplot(top_10, aes(x = reorder(state_code, phys_per_100k), y = phys_per_100k, fill = phys_per_100k)) +
scale_fill_gradient(low = "#DA70D6", high = "#2c7bb6") +
geom_bar(stat = "identity", width = 0.7) +
labs(title = "Most Physicians",
x = "State",
y = "Physicians",
fill = "Physicians per 100k") +
theme_minimal() +
coord_flip() +
theme(legend.position = "right",
axis.text.y = element_text(size = 8), plot.title = element_text(hjust = 0.5))
final3 <- ggplot(bottom_10, aes(x = reorder(state_code, phys_per_100k), y = phys_per_100k, fill = phys_per_100k)) +
scale_fill_gradient(low = "#d7191c", high = "#DA70D6") +
geom_bar(stat = "identity", width = 0.7) +
labs(title = "Least Physicians",
x = "State",
y = "Physicians",
fill = "Physicians per 100k") +
theme_minimal() +
coord_flip() +
theme(legend.position = "right",
axis.text.y = element_text(size = 8), plot.title = element_text(hjust = 0.5))
final_plot <- final2+ final3+plot_layout(widths = c(1, 0.75), heights = c(0.75, 1))+
plot_annotation(title = "Amount of Physicians per 100,000 Individuals for each State")
final_plot
hoverMedian <- states_data %>%
mutate(hover_text = paste(
"State: ", state_code,
"<br>Median Income: $", format(MedianHouseholdIncome2021, big.mark = ","),
"<br>Uninsured: ", percent_un, "%",
"<br>Population: ", format(population, big.mark = ",")
))
median_plot <- ggplot(hoverMedian, aes(x = MedianHouseholdIncome2021, y = percent_un, size = population, color = Region, text = hover_text)) +
geom_point(alpha = 0.6) + # Adjust transparency of points
scale_size_continuous(range = c(4, 20)) +
scale_color_manual(values = c("Northeast" = "blue", "South" = "red", "Midwest" = "darkgreen", "West" = "purple")) +
scale_y_continuous(
limits = c(0, 30), # Ensure the range is from 0 to 30
breaks = seq(0, 30, by = 2) # Set breaks every 2 units
)+
labs(
title = "Median Household Income vs Percentage Uninsured",
x = "Median Household Income (USD)",
y = "Percentage Uninsured (%)",
size = "Population Size",
color = "Region"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold"),
legend.position = "top"
) +
guides(
size = "none"
)
plotly_plot <- ggplotly(median_plot, tooltip = "text")%>%
layout( yaxis = list(range = c(0, 18)))
plotly_plot
hoverMap <- map_data %>%
mutate(hover_text2 = paste(
"State: ", `state_code`,
"<br>Uninsured: ", format(percent_un, digits = 2), "%",
"<br>Population: ", format(population, big.mark = ",")
))
uninsured_map <- ggplot(hoverMap, aes(long, lat, group = group, fill = percent_un, text = hover_text2)) +
geom_polygon(color = "white") + # Draw state borders in white
scale_fill_gradient(low = "orange", high = "navy") +
labs(
title = "Percentage of Uninsured Population by State",
fill = "Uninsured %",
xlab = "Longitude",
ylab = "Latitiude"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
legend.position = "right"
)
un_plotly <- ggplotly(uninsured_map, tooltip = "text")
un_plotly
#states_data <- states_data %>% rename(state_code = "State Code")
insurance_long_data <- states_data %>%
gather(key = "Coverage_Type", value = "Percentage", Employer, "Non-Group", Medicaid, Medicare, Uninsured)
northeast_data <- insurance_long_data[insurance_long_data$Region == "Northeast", ]
northeast_plot<- ggplot(northeast_data, aes(x = state_code, y = Percentage, fill = Coverage_Type)) +
geom_bar(stat = "identity", position = "dodge") + # position = "dodge" makes bars side by side
labs(
title = "Northeast States",
x = "State",
y = "Percentage",
fill = "Coverage:"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45,hjust = 1), # Rotate x-axis labels for better readability
legend.position = "bottom"
)
south <- insurance_long_data[insurance_long_data$Region == "South", ]
south_plot<- ggplot(south, aes(x = state_code, y = Percentage, fill = Coverage_Type)) +
geom_bar(stat = "identity", position = "dodge") + # position = "dodge" makes bars side by side
labs(
title = "Southern States",
x = "State",
y = "Percentage",
fill = "Coverage:"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45,hjust = 1), # Rotate x-axis labels for better readability
legend.position = "bottom"
)
midwest <- insurance_long_data[insurance_long_data$Region == "Midwest", ]
midwest_plot<- ggplot(midwest, aes(x = state_code, y = Percentage, fill = Coverage_Type)) +
geom_bar(stat = "identity", position = "dodge") + # position = "dodge" makes bars side by side
labs(
title = "Midwest States",
x = "State",
y = "Percentage",
fill = "Coverage:"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45,hjust = 1), # Rotate x-axis labels for better readability
legend.position = "bottom"
)
west <- insurance_long_data[insurance_long_data$Region == "West", ]
west_plot<-ggplot(west, aes(x = state_code, y = Percentage, fill = Coverage_Type)) +
geom_bar(stat = "identity", position = "dodge") + # position = "dodge" makes bars side by side
labs(
title = "Western States",
x = "State",
y = "Percentage",
fill = "Coverage:"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45,hjust = 1), # Rotate x-axis labels for better readability
legend.position = "bottom"
)
all <- northeast_plot + south_plot+west_plot+midwest_plot
all
combined_plot <- northeast_plot + south_plot+west_plot+midwest_plot +
plot_annotation(title = "Comparison of Insurance Types Across States") &
theme(
legend.title = element_text(size = 6), # Smaller title
legend.text = element_text(size = 5.5), # Smaller legend text
legend.key.size = unit(0.2, "cm") # Reduce key size
)
combined_plot
health equity
library(tidyr)
health_equity <- read_csv("Desktop/stat 3280/Health_Equity_State.csv")
## Rows: 336 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): State, HCHE Measure ID, HCHE Description, Score, Start Date, End Date
## dbl (1): Footnote
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
health_equity <- left_join(health_equity, us_census_bureau_regions_and_divisions, by = c("State" = "State Code"))
health_equity <- health_equity %>%
filter(!State %in% c("VI", "GU", "MP", "AS", "PR"))
colnames(health_equity)
## [1] "State" "HCHE Measure ID" "HCHE Description" "Score"
## [5] "Footnote" "Start Date" "End Date" "state"
## [9] "Region" "Division"
health_equity <- health_equity %>%
rename("HCHE" = "HCHE Description")
health_equity <- health_equity %>%
mutate(HCHE = case_when(
HCHE == "Percentage of facilities, in the state, reporting HCHE that scored 0 out of 5 points" ~ "0",
HCHE == "Percentage of facilities, in the state, reporting HCHE that scored 1 out of 5 points" ~ "1",
HCHE == "Percentage of facilities, in the state, reporting HCHE that scored 2 out of 5 points" ~ "2",
HCHE == "Percentage of facilities, in the state, reporting HCHE that scored 3 out of 5 points" ~ "3",
HCHE == "Percentage of facilities, in the state, reporting HCHE that scored 4 out of 5 points" ~ "4",
HCHE == "Percentage of facilities, in the state, reporting HCHE that scored 5 out of 5 points" ~ "5",
TRUE ~ HCHE
))
cleaning code for shiny app- health equity
library(RColorBrewer)
health_equity <- health_equity %>%
mutate(Score_Range = case_when(
HCHE %in% c(0, 1) ~ "0-1",
HCHE %in% c(2, 3) ~ "2-3",
HCHE %in% c(4, 5) ~ "4-5",
TRUE ~ as.character(HCHE) # If there are other values, keep them as they are
))
health_equity <- health_equity %>%
mutate(Score = as.numeric(Score))
health_equity_aggregated <- health_equity %>%
group_by(State, Region, Score_Range) %>%
summarise(Score = sum(Score), .groups = "drop")
library(shiny)
ui <- fluidPage(
titlePanel("Hospital Score Distribution by State"),
sidebarLayout(
sidebarPanel(
checkboxGroupInput(
inputId = "scores",
label = "Select Score Ranges to Display:",
choices = c("0-1", "2-3", "4-5"),
selected = c("0-1", "2-3", "4-5")
),
checkboxGroupInput(
inputId = "region",
label = "Select Region:",
choices = c(unique(health_equity$Region)),
selected = "West"
)
),
mainPanel(
plotlyOutput("interactivePlot", height = "400px", width = "100%")
)
)
)
server <- function(input, output, session) {
# Reactive data based on inputs
filtered_data <- reactive({
data <- health_equity_aggregated
if ("All" %in% input$region) {
data <- data %>%
filter(Score_Range %in% input$scores)
} else {
data <- data %>%
filter(Region %in% input$region, Score_Range %in% input$scores)
}
return(data)
})
dynamic_title <- reactive({
region_text <- if ("All" %in% input$region) {
"All Regions"
} else {
paste(input$region, collapse = ", ")
}
paste("Hospital Scores in", region_text)
})
max_score <- max(health_equity$Score, na.rm = TRUE)
# Stacked Bar Plot
output$interactivePlot <- renderPlotly({
plot<- filtered_data() %>%
ggplot(aes(x = State, y = Score, fill = as.factor(Score_Range), text = paste(
"State: ", State, "<br>",
"Score: ", Score_Range, "<br>",
"Percent: ", Score, "%"
))) +
scale_fill_manual(values = c(
"0-1" = "#008080",
"2-3" = "#DA70D6",
"4-5" ="#fc8d59"
))+
geom_bar(stat = "identity", position = "stack") +
labs(
title = dynamic_title(),
x = "State",
y = "Percentage",
fill = "Score Range"
) +
theme_minimal() +
theme(
axis.text.x = element_text(angle = 45, hjust = 1),
plot.title = element_text(size = 16, face = "bold", hjust = 0.5)
)
ggplotly(plot, tooltip = "text")
})
}
shinyApp(ui, server)
-fixes axis names and labels and colors
library(readr)
time_series <- read_csv("Desktop/stat 3280/statistic_id1425936_cumulative-increase-in-ppi-of-health-insurance-services-in-the-us-2014-2024.csv")
## New names:
## Rows: 119 Columns: 6
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (6): Cumulative percent change in producer price index (PPI) for health ...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
time_series <- time_series[-1, ] %>% rename(time = "Cumulative percent change in producer price index (PPI) for health insurance services in the U.S. from June 2014 to March 2024") %>%
rename(overall = "...2", medicare = "...3" , medicaid = "...4" , private = "...5")
time_series$time <- as.Date(time_series$time, format = "%m/%d/%Y")
time_series <- time_series %>% rename(Medicaid = medicaid)
long_data <- time_series %>%
pivot_longer(cols = starts_with("Overall"):starts_with("Private"),
names_to = "Variable", values_to = "Percentage")
long_data$time <- as.factor(long_data$time)
long_data$Percentage <- as.numeric(long_data$Percentage)
#long_data <- long_data %>% rename(medicaid = Medicaid)
# Create the animated plot with play and pause buttons
plot_ly(long_data,
x = ~Variable,
y = ~Percentage,
frame = ~time,
color = ~Variable,
type = 'bar') %>%
layout(
title = 'Change Over Time',
yaxis = list(range = c(-max(abs(long_data$Percentage)), max(abs(long_data$Percentage))), title = "Percentage", dtick=5),
xaxis = list(title = "Insurance Type"),
updatemenus = list(
list(
type = "buttons",
showactive = TRUE,
x = -0.1, # Adjust the position of the buttons
xanchor = "left",
y = -.4,
yanchor = "top",
buttons = list(
list(
method = "animate",
args = list(NULL, list(frame = list(duration = 50, redraw = TRUE), fromcurrent = TRUE))
)
)
)
)
)
unemployed <- read_csv("Desktop/stat 3280/unemployed.csv")
## New names:
## Rows: 51 Columns: 3
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (2): state, ...3 dbl (1): rate
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...3`
poverty_rate_by_state_2024 <- read_csv("Desktop/stat 3280/poverty-rate-by-state-2024.csv")
## Rows: 51 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): state
## dbl (2): PovertyRatesPopulationBelowPovertyLevel, PovertyRatesPercentOfPopul...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pov_and_unemployed <- left_join(poverty_rate_by_state_2024, unemployed, by ="state") %>%
rename(pov_rate = PovertyRatesPercentOfPopulationBelowPovertyLevel)
pov_and_unemployed$state <- tolower(pov_and_unemployed$state)
states_data <- left_join(states_data, pov_and_unemployed, by = "state")
region_colors <- c(
"Northeast" = "navyblue", "South" = "red", "Midwest" = "forestgreen", "West" = "purple"
)
ui <- fluidPage(
titlePanel("Variable Comparison Line Graph with Regression Line Option"),
sidebarLayout(
sidebarPanel(
selectInput(
inputId = "x_var",
label = "Select X-axis Variable:",
choices = c(
"Unemployed Rate" = "rate",
"Poverty Rate" = "pov_rate"
),
selected = "rate"
),
selectInput(
inputId = "y_var",
label = "Select Y-axis Variable:",
choices = c(
"Number of Physicians" = "phys",
"Percent Uninsured" = "percent_un"
),
selected = "phys"
),
checkboxGroupInput(
inputId = "region",
label = "Select Region:",
choices = c("All", unique(states_data$Region)), # Add "All"
selected = "All" # Default selection
),
checkboxInput(
inputId = "add_regression",
label = "Add Regression Line",
value = FALSE
)
),
mainPanel(
plotOutput("lineGraph")
)
)
)
# Server
server <- function(input, output, session) {
# Reactive data filtering
filtered_data <- reactive({
if ("All" %in% input$region) {
states_data
} else {
states_data %>%
filter(Region %in% input$region)
}
})
# Render plot
output$lineGraph <- renderPlot({
req(input$x_var, input$y_var) # Ensure inputs are available
x_label <- ifelse(input$y_var == "rate", "Unemployment Rate",
ifelse(input$y_var == "pov_rate", "Poverty rate", input$x_var))
y_label <- ifelse(input$y_var == "phys", "Number of Physicians",
ifelse(input$y_var == "percent_un", "Percent Uninsured", input$y_var))
plot <- filtered_data() %>%
ggplot(aes_string(x = input$x_var, y = input$y_var, color = "Region")) +
geom_point(size=3) +
scale_color_manual(values = region_colors) + # Apply fixed colors
labs(
title = paste("Comparison of", x_label, "and", y_label, "by Region"),
x = x_label,
y = y_label,
color = "Region"
) +
theme_minimal()
if (input$add_regression) {
plot <- plot + geom_smooth(method = "lm", se = F, color = "black", linetype = "dotted")
}
plot
})
}
# Run the app
shinyApp(ui, server)
funding <- read_csv("Desktop/stat 3280/Per person state public health funding.csv")
## Rows: 255 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Location, Data
## dbl (1): TimeFrame
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
funding_filterd <- funding %>%
filter(!(TimeFrame %in% c(2019,2020,2021, 2022)))
funding_filterd$Data <- as.numeric(funding_filterd$Data)
## Warning: NAs introduced by coercion
funding_filterd <- funding_filterd %>%
arrange(desc(Data))
ggplot(funding_filterd, aes(x = reorder(Location, -Data), y = Data, fill=Data)) +
geom_bar(stat = "identity") +
scale_fill_gradient(
low = "#d7191c",
high = "#2c7bb6"
)+
labs(
title = "State Spending Per Person",
x = "State",
y = "Spending Rate ($)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate labels for readability
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_bar()`).