Synopsis

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
storm_data <- read.csv("data/repdata_data_StormData.csv")

health_impact <- storm_data |>
    group_by(EVTYPE) |>
    summarize(
        fatalities = sum(FATALITIES, na.rm = TRUE),
        injuries = sum(INJURIES, na.rm = TRUE)
    ) |>
    mutate(total_health = fatalities + injuries) |>
    arrange(desc(total_health))

head(health_impact, 10)
## # A tibble: 10 × 4
##    EVTYPE            fatalities injuries total_health
##    <chr>                  <dbl>    <dbl>        <dbl>
##  1 TORNADO                 5633    91346        96979
##  2 EXCESSIVE HEAT          1903     6525         8428
##  3 TSTM WIND                504     6957         7461
##  4 FLOOD                    470     6789         7259
##  5 LIGHTNING                816     5230         6046
##  6 HEAT                     937     2100         3037
##  7 FLASH FLOOD              978     1777         2755
##  8 ICE STORM                 89     1975         2064
##  9 THUNDERSTORM WIND        133     1488         1621
## 10 WINTER STORM             206     1321         1527

Results

top_health <- health_impact |>
    slice_max(total_health, n = 10)

barplot(
    top_health$total_health,
    names.arg = top_health$EVTYPE,
    las = 2,
    cex.names = 0.7,
    main = "Top 10 Weather Events by Health Impact",
    ylab = "Fatalities + Injuries"
)