library(tidyverse)
library(lubridate)
library(ggplot2)
repdata <- read_csv("repdata_data_StormData 2.csv")
repdata <- repdata %>%
  mutate(
    BGN_DATE = mdy_hms(BGN_DATE),
    YEAR = year(BGN_DATE),
    DAMAGE_TOTAL = coalesce(PROPDMG, 0) + coalesce(CROPDMG, 0),
    EVENT_TYPE = EVTYPE
  )
repdata %>%
  group_by(YEAR, EVENT_TYPE) %>%
  summarise(count = n(), .groups = "drop") %>%
  ggplot(aes(x = YEAR, y = count, color = EVENT_TYPE)) +
  geom_line(alpha = 0.6, linewidth = 0.8, show.legend = FALSE) +
  labs(title = "Events per year and type",
       x = "Year", y = "Number of events")

repdata %>%
  group_by(EVENT_TYPE) %>%
  summarise(total_damage = sum(DAMAGE_TOTAL, na.rm = TRUE)) %>%
  slice_max(total_damage, n = 10) %>%
  ggplot(aes(x = reorder(EVENT_TYPE, total_damage), y = total_damage / 1e6)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(title = "Top 10 events by total damage",
       x = "event type", y = "Damage (in millions of USD)")

repdata %>%
  filter(EVENT_TYPE == "TORNADO") %>%
  ggplot(aes(x = MAG)) +
  geom_histogram(binwidth = 1, fill = "tomato", color = "white") +
  labs(title = "Distribution of tornado magnitude",
       x = "magnitude", y = "frequency")

repdata %>%
  group_by(STATE) %>%
  summarise(
    events = n(),
    damage = sum(DAMAGE_TOTAL, na.rm = TRUE)
  ) %>%
  slice_max(events, n = 10) %>%
  ggplot(aes(x = reorder(STATE, events), y = events)) +
  geom_col(fill = "darkgreen") +
  coord_flip() +
  labs(title = "Top 10 states by number of incidents",
       x = "state", y = "events")