Synopsis

This report explores the NOAA Storm Database (1950–2011) to identify which event types are most harmful to population health and which have the greatest economic consequences in the United States. Health impact is measured as the sum of fatalities and injuries. Economic impact is measured as the sum of property and crop damages after converting NOAA damage exponent codes (K/M/B) into numeric multipliers.

Data Processing

The dataset was loaded from the original raw CSV file provided for the assignment. To improve performance, only the variables required for the analysis were imported.

library(data.table)

file <- "/home/rstudio/Reproducible Research/week2/repdata_data_StormData1.csv"

# Keep only needed columns
cols <- c("EVTYPE","FATALITIES","INJURIES",
          "PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")

dat <- fread(file, select = cols, showProgress = TRUE)

dim(dat)
## [1] 902297      7
str(dat)
## Classes 'data.table' and 'data.frame':   902297 obs. of  7 variables:
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  - attr(*, ".internal.selfref")=<externalptr>

3) Convert damage exponents to real dollars

NOAA stores damages as a number (e.g., 2.5) plus an exponent code (K/M/B). We convert them into multipliers: K=1,000; M=1,000,000; B=1,000,000,000. Unknown/blank codes are treated as multiplier 1 (conservative).

exp_to_mult <- function(x){
  x <- toupper(trimws(x))
  mult <- rep(1, length(x))
  mult[x == "K"] <- 1e3
  mult[x == "M"] <- 1e6
  mult[x == "B"] <- 1e9

  # Some datasets contain digits (0-8) meaning 10^digit
  is_digit <- grepl("^[0-9]$", x)
  mult[is_digit] <- 10^(as.numeric(x[is_digit]))

  # Treat NA as 1 (safe default)
  mult[is.na(mult)] <- 1

  mult
}

dat$PROP_MULT <- exp_to_mult(dat$PROPDMGEXP)
dat$CROP_MULT <- exp_to_mult(dat$CROPDMGEXP)

dat$PROP_USD <- dat$PROPDMG * dat$PROP_MULT
dat$CROP_USD <- dat$CROPDMG * dat$CROP_MULT

dat$ECON_USD <- dat$PROP_USD + dat$CROP_USD
dat$HEALTH   <- dat$FATALITIES + dat$INJURIES

summary(dat$ECON_USD)
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.00e+00 0.00e+00 0.00e+00 5.29e+05 1.00e+03 1.15e+11
summary(dat$HEALTH)
##      Min.   1st Qu.    Median      Mean   3rd Qu.      Max. 
##    0.0000    0.0000    0.0000    0.1725    0.0000 1742.0000

4) Aggregate by event type

library(dplyr)

df <- as.data.frame(dat)  # dplyr lavora più tranquillo su data.frame

health_by_event <- df %>%
  group_by(EVTYPE) %>%
  summarise(
    fatalities = sum(FATALITIES, na.rm = TRUE),
    injuries   = sum(INJURIES,   na.rm = TRUE),
    health     = sum(HEALTH,     na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(health))

econ_by_event <- df %>%
  group_by(EVTYPE) %>%
  summarise(
    prop_usd = sum(PROP_USD, na.rm = TRUE),
    crop_usd = sum(CROP_USD, na.rm = TRUE),
    econ_usd = sum(ECON_USD, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  arrange(desc(econ_usd))

head(health_by_event, 10)
## # A tibble: 10 x 4
##    EVTYPE            fatalities injuries health
##    <chr>                  <dbl>    <dbl>  <dbl>
##  1 TORNADO                 5633    91346  96979
##  2 EXCESSIVE HEAT          1903     6525   8428
##  3 TSTM WIND                504     6957   7461
##  4 FLOOD                    470     6789   7259
##  5 LIGHTNING                816     5230   6046
##  6 HEAT                     937     2100   3037
##  7 FLASH FLOOD              978     1777   2755
##  8 ICE STORM                 89     1975   2064
##  9 THUNDERSTORM WIND        133     1488   1621
## 10 WINTER STORM             206     1321   1527
head(econ_by_event, 10)
## # A tibble: 10 x 4
##    EVTYPE                 prop_usd    crop_usd      econ_usd
##    <chr>                     <dbl>       <dbl>         <dbl>
##  1 FLOOD             144657709807   5661968450 150319678257 
##  2 HURRICANE/TYPHOON  69305840000   2607872800  71913712800 
##  3 TORNADO            56947380676.   414953270  57362333946.
##  4 STORM SURGE        43323536000         5000  43323541000 
##  5 HAIL               15735267018.  3025954473  18761221491.
##  6 FLASH FLOOD        16822673978.  1421317100  18243991078.
##  7 DROUGHT             1046106000  13972566000  15018672000 
##  8 HURRICANE          11868319010   2741910000  14610229010 
##  9 RIVER FLOOD         5118945500   5029459000  10148404500 
## 10 ICE STORM           3944927860   5022113500   8967041360

Results

Question 1: Most harmful events for population health

We rank event types by total (fatalities + injuries) and show the top 10.

library(dplyr)
library(ggplot2)

topH <- health_by_event %>%
  slice_head(n = 10) %>%
  mutate(EVTYPE = reorder(as.character(EVTYPE), health))

ggplot(topH, aes(x = EVTYPE, y = health)) +
  geom_col(fill = "#2b8cbe") +
  coord_flip() +
  labs(title = "Top 10 event types by population health impact",
       x = "Event type",
       y = "Fatalities + Injuries")

Figure 1. Bar chart of the 10 event types with the largest combined number of fatalities and injuries.

Question 2: Events with greatest economic consequences

We rank event types by total damage (property + crop) and show the top 10.

library(dplyr)
library(ggplot2)

# Display in billions for readability
topE <- econ_by_event %>%
  slice_head(n = 10) %>%
  mutate(
    EVTYPE   = reorder(as.character(EVTYPE), econ_usd),
    econ_bil = econ_usd / 1e9
  )

ggplot(topE, aes(x = EVTYPE, y = econ_bil)) +
  geom_col(fill = "#31a354") +
  coord_flip() +
  labs(
    title = "Top 10 event types by economic impact",
    x = "Event type",
    y = "Total damage (billion USD)"
  )

Figure 2. Bar chart of the 10 event types with the largest total economic damage (property + crop).

Quick tables (optional, no extra figures)

library(dplyr)

as.data.frame(health_by_event) %>% slice_head(n = 10)
##               EVTYPE fatalities injuries health
## 1            TORNADO       5633    91346  96979
## 2     EXCESSIVE HEAT       1903     6525   8428
## 3          TSTM WIND        504     6957   7461
## 4              FLOOD        470     6789   7259
## 5          LIGHTNING        816     5230   6046
## 6               HEAT        937     2100   3037
## 7        FLASH FLOOD        978     1777   2755
## 8          ICE STORM         89     1975   2064
## 9  THUNDERSTORM WIND        133     1488   1621
## 10      WINTER STORM        206     1321   1527
as.data.frame(econ_by_event)   %>% slice_head(n = 10)
##               EVTYPE     prop_usd    crop_usd     econ_usd
## 1              FLOOD 144657709807  5661968450 150319678257
## 2  HURRICANE/TYPHOON  69305840000  2607872800  71913712800
## 3            TORNADO  56947380676   414953270  57362333946
## 4        STORM SURGE  43323536000        5000  43323541000
## 5               HAIL  15735267018  3025954473  18761221491
## 6        FLASH FLOOD  16822673978  1421317100  18243991078
## 7            DROUGHT   1046106000 13972566000  15018672000
## 8          HURRICANE  11868319010  2741910000  14610229010
## 9        RIVER FLOOD   5118945500  5029459000  10148404500
## 10         ICE STORM   3944927860  5022113500   8967041360