library(tidyverse)
library(lubridate)
library(ggplot2)
repdata <- read_csv("repdata_data_StormData 2.csv")
repdata <- repdata %>%
mutate(
BGN_DATE = mdy_hms(BGN_DATE),
YEAR = year(BGN_DATE),
DAMAGE_TOTAL = coalesce(PROPDMG, 0) + coalesce(CROPDMG, 0),
EVENT_TYPE = EVTYPE
)
repdata %>%
group_by(YEAR, EVENT_TYPE) %>%
summarise(count = n(), .groups = "drop") %>%
ggplot(aes(x = YEAR, y = count, color = EVENT_TYPE)) +
geom_line(alpha = 0.6, linewidth = 0.8, show.legend = FALSE) +
labs(title = "Events per year and type",
x = "Year", y = "Number of events")

repdata %>%
group_by(EVENT_TYPE) %>%
summarise(total_damage = sum(DAMAGE_TOTAL, na.rm = TRUE)) %>%
slice_max(total_damage, n = 10) %>%
ggplot(aes(x = reorder(EVENT_TYPE, total_damage), y = total_damage / 1e6)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 events by total damage",
x = "event type", y = "Damage (in millions of USD)")

repdata %>%
filter(EVENT_TYPE == "TORNADO") %>%
ggplot(aes(x = MAG)) +
geom_histogram(binwidth = 1, fill = "tomato", color = "white") +
labs(title = "Distribution of tornado magnitude",
x = "magnitude", y = "frequency")

repdata %>%
group_by(STATE) %>%
summarise(
events = n(),
damage = sum(DAMAGE_TOTAL, na.rm = TRUE)
) %>%
slice_max(events, n = 10) %>%
ggplot(aes(x = reorder(STATE, events), y = events)) +
geom_col(fill = "darkgreen") +
coord_flip() +
labs(title = "Top 10 states by number of incidents",
x = "state", y = "events")
