This analysis explores the NOAA Storm Database to identify the types of severe weather events in the United States that are most harmful to population health and those with the greatest economic consequences. The analysis involves loading and processing the raw data, summarizing key statistics, and presenting the results through tables and visualizations. Key findings indicate that tornadoes are the most harmful to population health, while floods have the greatest economic impact.

Data Loading and Preprocessing

In this section, we load the dataset containing storm data. We use the read.csv function to read the CSV file into R and preprocess the data for analysis.

# Load the required libraries
suppressPackageStartupMessages({
  library(dplyr)
  library(ggplot2)
  library(knitr)
})

# Read the data
storm_data <- read.csv("repdata_data_StormData.csv")
str(storm_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
# Subset the data
subset_data <- storm_data %>%
     dplyr::select(`EVTYPE`, `FATALITIES`, `INJURIES`, `PROPDMG`, `PROPDMGEXP`, `CROPDMG`, `CROPDMGEXP`)

# Convert PROPDMGEXP and CROPDMGEXP to numeric
convert_exp <- function(exp) {
    exp <- as.character(exp)
    exp[exp == "H" | exp == "h"] <- 100
    exp[exp == "K" | exp == "k"] <- 1000
    exp[exp == "M" | exp == "m"] <- 1e6
    exp[exp == "B" | exp == "b"] <- 1e9
    exp[exp %in% c("", "-", "?", "+")] <- 1
    return(as.numeric(exp))
}

subset_data$PROPDMGEXP <- convert_exp(subset_data$PROPDMGEXP)
subset_data$CROPDMGEXP <- convert_exp(subset_data$CROPDMGEXP)

# Calculate total damage
subset_data <- subset_data %>%
    mutate(PROPDMGTOTAL = PROPDMG * PROPDMGEXP,
           CROPDMGTOTAL = CROPDMG * CROPDMGEXP)

Results

Most Harmful Events to Population Health

This section identifies the weather events that have caused the most fatalities and injuries.

# Summarize the total fatalities and injuries by event type
health_impact <- subset_data %>%
    group_by(EVTYPE) %>%
    summarize(Total_Fatalities = sum(FATALITIES), Total_Injuries = sum(INJURIES)) %>%
    arrange(desc(Total_Fatalities), desc(Total_Injuries))

# Display top 10 events by fatalities and injuries
kable(head(health_impact, 10))
EVTYPE Total_Fatalities Total_Injuries
TORNADO 5633 91346
EXCESSIVE HEAT 1903 6525
FLASH FLOOD 978 1777
HEAT 937 2100
LIGHTNING 816 5230
TSTM WIND 504 6957
FLOOD 470 6789
RIP CURRENT 368 232
HIGH WIND 248 1137
AVALANCHE 224 170

Figure 1 & 2: Top Events by Health Impact

# Plot top 10 events by fatalities
top_health_impact <- health_impact %>% top_n(10, Total_Fatalities)
ggplot(top_health_impact, aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
    geom_bar(stat = "identity") +
    labs(title = "Top 10 Weather Events by Fatalities", x = "Event Type", y = "Total Fatalities") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Plot top 10 events by injuries
health_impact <- health_impact %>% top_n(10, Total_Injuries)
ggplot(health_impact, aes(x = reorder(EVTYPE, -Total_Injuries), y = Total_Injuries)) +
    geom_bar(stat = "identity") +
    labs(title = "Top 10 Weather Events by Injuries", x = "Event Type", y = "Total Injuries") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Events with the Greatest Economic Consequences

This section identifies the weather events that have caused the most economic damage.

# Summarize the total economic damage by event type
economic_impact <- subset_data %>%
    group_by(EVTYPE) %>%
    summarize(Total_Property_Damage = sum(PROPDMGTOTAL), Total_Crop_Damage = sum(CROPDMGTOTAL), Total_Economic_Damage = sum(PROPDMGTOTAL + CROPDMGTOTAL)) %>%
    arrange(desc(Total_Economic_Damage))

# Display top 10 events by economic damage
kable(head(economic_impact, 10))
EVTYPE Total_Property_Damage Total_Crop_Damage Total_Economic_Damage
FLOOD 144657709807 5661968450 150319678257
HURRICANE/TYPHOON 69305840000 2607872800 71913712800
TORNADO 56937161054 414953110 57352114164
STORM SURGE 43323536000 5000 43323541000
HAIL 15732267427 3025954453 18758221880
FLASH FLOOD 16140812294 1421317100 17562129394
DROUGHT 1046106000 13972566000 15018672000
HURRICANE 11868319010 2741910000 14610229010
RIVER FLOOD 5118945500 5029459000 10148404500
ICE STORM 3944927810 5022113500 8967041310

Figure 3: Top Events by Economic Impact

# Plot top 10 events by economic damage
top_economic_impact <- economic_impact %>% top_n(10, Total_Economic_Damage)
ggplot(top_economic_impact, aes(x = reorder(EVTYPE, -Total_Economic_Damage), y = Total_Economic_Damage)) +
    geom_bar(stat = "identity") +
    labs(title = "Top 10 Weather Events by Economic Damage", x = "Event Type", y = "Total Economic Damage") +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))