library(tidyverse)
library(lubridate)
library(dplyr)
library(DT)
library(plotly)Stat220 Project 4s
This project reads and combines multiple yearly CSV files containing U.S. billion-dollar disaster data into a single cleaned dataset. It standardizes variable names and types, parses dates, and extracts the year from each file name. It then creates a summary that ocunts the number of disasters by typoe each year for a stacked bar chart, and a summary that calculates the total CPI adjusted damages per year. The second summary computes annual total damages, confidence interval bounds, and a 5-year moving average, which are visualized in a time-series plot showing overall damage trends over time.
read_disaster_data <- function(path) {
readr::read_csv(
file = path,
skip = 3,
col_names = c(
"name",
"disaster_type",
"start_date",
"end_date",
"damages_adjusted",
"damages_unadjusted",
"deaths"
),
col_types = readr::cols(
name = readr::col_character(),
disaster_type = readr::col_factor(),
start_date = readr::col_date(format = "%Y%m%d"),
end_date = readr::col_date(format = "%Y%m%d"),
damages_adjusted = readr::col_double(),
damages_unadjusted = readr::col_double(),
deaths = readr::col_integer()
),
show_col_types = FALSE
) |>
mutate(
name = stringr::str_remove(name, "\\s*\\(.*\\)$"),
duration = as.integer(end_date - start_date) + 1,
year = readr::parse_number(basename(path))
)
}files <- list.files("data", pattern = "\\.csv$", full.names = TRUE)
all_disasters <- purrr::map_dfr(files, read_disaster_data)
readr::write_csv(all_disasters, "all_disasters.csv")all_disasters |>
select(year, name, disaster_type, start_date, end_date,
damages_adjusted, deaths, duration) |>
arrange(desc(year), desc(damages_adjusted)) |>
mutate(damages_adjusted = scales::dollar(damages_adjusted, scale = 1/1000,
suffix = "B", accuracy = 0.1)) |>
datatable(
filter = "top", # per-column filters
extensions = "Buttons",
options = list(
dom = "Bfrtip",
buttons = c("csv", "excel"), # download buttons
pageLength = 15,
order = list(list(0, "desc"))
),
colnames = c("Year", "Event Name", "Type", "Start", "End",
"Damages (Adj.)", "Deaths", "Duration (days)")
)disaster_counts <- all_disasters |>
count(year, disaster_type, name = "n_disasters")
p1 <- ggplot(disaster_counts, aes(x = year, y = n_disasters, fill = disaster_type,
text = paste0("Year: ", year, "<br>Type: ", disaster_type,
"<br>Count: ", n_disasters))) +
geom_col(width = 0.85, color = "grey20", linewidth = 0.1) +
scale_x_continuous(breaks = seq(min(disaster_counts$year),
max(disaster_counts$year), by = 4)) +
labs(title = "Billion-Dollar Disasters by Type and Year",
x = "Year", y = "Number of Disasters", fill = "Disaster Type") +
theme_minimal(base_size = 11) +
theme(panel.grid.minor = element_blank(), legend.position = "bottom")
ggplotly(p1, tooltip = "text")yearly_damages <- all_disasters |>
group_by(year) |>
summarise(
total_damages_adjusted = sum(damages_adjusted, na.rm = TRUE),
se = sd(damages_adjusted, na.rm = TRUE) / sqrt(n()),
.groups = "drop"
) |>
mutate(
total_damages_bill = total_damages_adjusted / 1000,
ci_low_bill = pmax((total_damages_adjusted - 1.96 * se) / 1000, 0),
ci_high_bill = (total_damages_adjusted + 1.96 * se) / 1000,
avg_5yr_bill = as.numeric(stats::filter(total_damages_bill, rep(1/5, 5), sides = 1))
)
ggplot(yearly_damages, aes(x = year)) +
geom_line(aes(y = total_damages_bill), color = "navy", linewidth = 1) +
geom_point(aes(y = total_damages_bill), color = "navy", size = 2) +
geom_line(aes(y = ci_low_bill), color = "gray45", linetype = "dashed") +
geom_line(aes(y = ci_high_bill), color = "gray45", linetype = "dashed") +
geom_line(aes(y = avg_5yr_bill), color = "tomato", linewidth = 1, na.rm = TRUE) +
scale_x_continuous(
breaks = seq(min(yearly_damages$year), max(yearly_damages$year), by = 4)
) +
scale_y_continuous(
labels = scales::label_number(accuracy = 0.1, suffix = "B")
) +
labs(
title = "Total CPI-Adjusted Disaster Damges by Year",
x = "Year",
y = "Total Damages (Billions USD)"
) +
theme_minimal(base_size = 11) +
theme_minimal()I chose to use plotly to make my bar plot interactive so that the viewer could receive more information when going through my report. I thought it would be nice for the viewer to be able to hover over a given bar and understand the specifics of what it’s showing without having to connect the legend to the y and x axis. Also, because the y axis doesn’t show the specific count, plotly lets the user see the exact count of disasters. I also chose to create a data table, I worked with a lot of long and complicated data sets for this project, I thought it would also be helpful to the veiwer if it was easy for them to navigate the data set for any additional information they were looking for.