This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.
# Load required libraries
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(plotly) # for interactivity
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Load dataset
data("us_rent_income")
# Define state colors
state_colors <- c(
"Alabama" = "#9CBA7F",
"Alaska" = "#77BFC7",
"Arizona" = "#DBD5B5",
"Arkansas" = "#F2A490",
"California" = "#FFD966",
"Colorado" = "#F5E1A4",
"Connecticut" = "#C7CEEA",
"Delaware" = "#A2D2FF",
"Florida" = "#97D8A3",
"Georgia" = "#FF9AA2",
"Hawaii" = "#D7B9FF",
"Idaho" = "#F7A6A6",
"Illinois" = "#FFAF65",
"Indiana" = "#FFD76E",
"Iowa" = "#B6E2D4",
"Kansas" = "#9CD6A4",
"Kentucky" = "#F4D35E",
"Louisiana" = "#EE6C4D",
"Maine" = "#70ABAF",
"Maryland" = "#D2FDFF",
"Massachusetts" = "#FFDDD2",
"Michigan" = "#A5CAD2",
"Minnesota" = "#C2FFD9",
"Mississippi" = "#FF9A8B",
"Missouri" = "#FFD65E",
"Montana" = "#FFB167",
"Nebraska" = "#D7EEF7",
"Nevada" = "#FFD966",
"New Hampshire" = "#FFAEAE",
"New Jersey" = "#A2D2FF",
"New Mexico" = "#FFB167",
"New York" = "#FFBE86",
"North Carolina" = "#FFE8D2",
"North Dakota" = "#C2FFD9",
"Ohio" = "#F5E1A4",
"Oklahoma" = "#F4D35E",
"Oregon" = "#9CD6A4",
"Pennsylvania" = "#FFD76E",
"Rhode Island" = "#FFDDD2",
"South Carolina" = "#FFBE86",
"South Dakota" = "#B6E2D4",
"Tennessee" = "#FFD65E",
"Texas" = "#F2A490",
"Utah" = "#D7B9FF",
"Vermont" = "#A5CAD2",
"Virginia" = "#C7CEEA",
"Washington" = "#9CBA7F",
"West Virginia" = "#FFAF65",
"Wisconsin" = "#70ABAF",
"Wyoming" = "#EE6C4D",
"District of Columbia" = "#D2FDFF",
"Puerto Rico" = "#F7A6A6"
)
# Sort data by descending order of estimate
us_rent_income <- us_rent_income %>%
arrange(desc(estimate))
# Calculate cumulative percentage
us_rent_income$cum_percent <- cumsum(us_rent_income$estimate) / sum(us_rent_income$estimate) * 100
# Plotting
p <- ggplot(us_rent_income, aes(x = reorder(NAME, -estimate), y = estimate, fill = NAME, text = paste("State: ", NAME, "<br>Median Household Income: ", estimate))) +
geom_bar(stat = "identity") + # Bar plot with each bar filled according to state
theme_minimal() + # Minimalistic theme
labs(
title = "Median Household Income by State", # Title of the plot
x = "State", # X-axis label
y = "Median Household Income per Year " # Y-axis label
) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1, size = 6), # Rotate x-axis labels and decrease font size
axis.title = element_text(size = 12, face = "bold"), # Customize axis title
plot.title = element_text(size = 16, face = "bold", hjust = 0.5)) + # Customize plot title
scale_fill_manual(values = state_colors) + # Assign state colors to bars
geom_line(aes(x = reorder(NAME, -estimate), y = cum_percent * max(estimate), group = 1), color = "red") + # Cumulative percentage line
geom_text(aes(label = cum_percent, y = cum_percent * max(estimate), group = 1), size = 3, color = "red", hjust = -0.2, vjust = -0.5, angle = 0, show.legend = FALSE) # Cumulative percentage labels
# Convert ggplot to plotly for interactivity
ggplotly(p, tooltip = c("text"))