Synopsis

This analysis examines severe weather events recorded in the NOAA Storm Database to determine which types of events are most harmful to population health and have the greatest economic consequences. The analysis focuses on the number of fatalities and injuries as indicators of health impact and property and crop damage for economic impact. Data processing involves loading the raw dataset, cleaning, and aggregating relevant information for analysis. The results reveal insights into the most significant weather events, which can inform resource allocation for emergency management.

Data Processing

# Load necessary libraries
library(dplyr)
library(ggplot2)
# Load the data
data <- read.table(bzfile("repdata_data_StormData.csv.bz2"), 
                   header = TRUE, sep = ",")

# Inspect the structure of the data
str(data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
# Check unique values in the PROPDMGEXP (Property Damage Exponent) variable
data %>% count(PROPDMGEXP) %>% arrange(desc(n))
##    PROPDMGEXP      n
## 1             465934
## 2           K 424665
## 3           M  11330
## 4           0    216
## 5           B     40
## 6           5     28
## 7           1     25
## 8           2     13
## 9           ?      8
## 10          m      7
## 11          H      6
## 12          +      5
## 13          7      5
## 14          3      4
## 15          4      4
## 16          6      4
## 17          -      1
## 18          8      1
## 19          h      1
# Check unique values in the CROPDMGEXP (Crop Damage Exponent) variable
data %>% count(CROPDMGEXP) %>% arrange(desc(n))
##   CROPDMGEXP      n
## 1            618413
## 2          K 281832
## 3          M   1994
## 4          k     21
## 5          0     19
## 6          B      9
## 7          ?      7
## 8          2      1
## 9          m      1
# Convert damage exponent columns to uppercase for standardization
data$PROPDMGEXP <- toupper(data$PROPDMGEXP)  
data$CROPDMGEXP <- toupper(data$CROPDMGEXP)

# Map exponent values to numeric multipliers
data$PROPDMGEXP[data$PROPDMGEXP == "H"] <- 100
data$PROPDMGEXP[data$PROPDMGEXP == "K"] <- 1000
data$PROPDMGEXP[data$PROPDMGEXP == "M"] <- 1000000
data$PROPDMGEXP[data$PROPDMGEXP == "B"] <- 1000000000
data$PROPDMGEXP[!(data$PROPDMGEXP %in% c(100, 1000, 1000000, 1000000000))] <- 0
data$PROPDMGEXP <- as.numeric(data$PROPDMGEXP)

data$CROPDMGEXP[data$CROPDMGEXP == "H"] <- 100
data$CROPDMGEXP[data$CROPDMGEXP == "K"] <- 1000
data$CROPDMGEXP[data$CROPDMGEXP == "M"] <- 1000000
data$CROPDMGEXP[data$CROPDMGEXP == "B"] <- 1000000000
data$CROPDMGEXP[!(data$CROPDMGEXP %in% c(100, 1000, 1000000, 1000000000))] <- 0
data$CROPDMGEXP <- as.numeric(data$CROPDMGEXP)

# Calculate total property and crop damage
data$TOTAL_PROPDMG <- data$PROPDMG * data$PROPDMGEXP
data$TOTAL_CROPDMG <- data$CROPDMG * data$CROPDMGEXP

Results

Most Harmful Events with Respect to Population Health

# Summarize fatalities and injuries by event type
health_impact <- data %>%
        group_by(EVTYPE) %>%
        summarize(FATALITIES = sum(FATALITIES, na.rm = TRUE),
                  INJURIES = sum(INJURIES, na.rm = TRUE)) %>%
        arrange(desc(FATALITIES), desc(INJURIES))

# Select top 10 harmful events
top_health_events <- head(health_impact, 10)

# Create a plot for fatalities and injuries
ggplot(top_health_events, aes(x = reorder(EVTYPE, INJURIES))) +
        geom_col(aes(y = INJURIES, fill = "Injuries"), alpha = 0.5) +
        geom_col(aes(y = FATALITIES, fill = "Fatalities"), alpha = 0.5) +
        labs(title = "Top 10 Most Harmful Weather Events (Fatalities and Injuries)",
             x = "Event Type",
             y = "Count",
             fill = "Health Impact") +
        scale_fill_manual(values = c("Injuries" = "blue", "Fatalities" = "red")) +
        coord_flip() +
        theme(plot.title = element_text(hjust = 0.5))

Economic Consequences of Severe Weather Events

# Summarize economic damages by event type
economic_impact <- data %>%
        group_by(EVTYPE) %>%
        summarize(TOTAL_PROPDMG = sum(TOTAL_PROPDMG, na.rm = TRUE),
                  TOTAL_CROPDMG = sum(TOTAL_CROPDMG, na.rm = TRUE)) %>%
        arrange(desc(TOTAL_PROPDMG), desc(TOTAL_CROPDMG))

# Select top 10 events for economic damage
top_economic_events <- head(economic_impact, 10)

# Divide total damages by 1e6 to convert to millions
top_economic_events$TOTAL_PROPDMG <- top_economic_events$TOTAL_PROPDMG / 1e6
top_economic_events$TOTAL_CROPDMG <- top_economic_events$TOTAL_CROPDMG / 1e6

# Create a plot for economic damage
ggplot(top_economic_events, aes(x = reorder(EVTYPE, TOTAL_PROPDMG))) +
        geom_col(aes(y = TOTAL_PROPDMG, fill = "Property Damage"), alpha = 0.5) +
        geom_col(aes(y = TOTAL_CROPDMG, fill = "Crop Damage"), alpha = 0.5) +
        labs(title = "Top 10 Weather Events by Economic Damage",
             x = "Event Type",
             y = "Total Damage (Millions USD)",
             fill = "Damage Type") +
        scale_fill_manual(values = c("Property Damage" = "green", "Crop Damage" = "orange")) +
        coord_flip() +
        theme(plot.title = element_text(hjust = 0.5))

Conclusion

This analysis identifies tornadoes as the most harmful severe weather events in terms of public health, resulting in the highest number of fatalities and injuries. In contrast, floods emerge as the severe weather events with the greatest economic consequences, causing significant property and crop damage.