Synopsis

This analysis focuses on identifying and comparing the most impactful natural disasters in the U.S. based on two main criteria: their consequences on human health and their economic damages. Using data from the NOAA Storm Database, we explore event types associated with the highest fatalities, injuries, and property damage. By examining both average and total figures, this report provides insights into the leading contributors to societal harm and economic loss.

Data Processing

Data Description: The data set being used is derived from the NOAA Storm Database ranging from the 1950s to 2011; for the purposes of this project the version uploaded to the course platform is used to ensure consistency. Download Storm Data

Loading the data

storm_data <- read.csv("./repdata-data-StormData.csv")

names(storm_data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

From the documentation from NOAA, the following descriptions pertinent to the analysis are available: “Damage. Property damage estimates should be entered as actual dollar amounts” the variables i have identified that correspond to damage are:

PROPDMG (amount). and PROPDMGEXP(multiplier, Alphabetical characters used to signify magnitude: include “K” for thousands, “M” for millions, and “B” for billions) CROPDMG (amount) and CROPDMGEXP(multiplier, Alphabetical characters used to signify magnitude same as PROPDMGEXP)

variables relating to the demographic health:

FATALITIES indicates fatalities attributed to the event

INJURIES indicates injuries attributed to the event

and the category of the event EVTYPE

simplifying the data frame to only include data relevant to population health and economic consequences:

relevant_df <- storm_data[, c("EVTYPE", "INJURIES", "FATALITIES", "CROPDMG", "CROPDMGEXP", "PROPDMG", "PROPDMGEXP")]
names(relevant_df)
## [1] "EVTYPE"     "INJURIES"   "FATALITIES" "CROPDMG"    "CROPDMGEXP"
## [6] "PROPDMG"    "PROPDMGEXP"

Looking for the largest offenders for fatalities and injuries

Creating a table that summarizes the average and cumulative mortality for each event

# Calculate average fatalities and injuries for each EVTYPE
avg_fatalities <- aggregate(FATALITIES ~ EVTYPE, data = relevant_df, FUN = mean, na.rm = TRUE)
avg_injuries <- aggregate(INJURIES ~ EVTYPE, data = relevant_df, FUN = mean, na.rm = TRUE)

# Total fatalities and injuries by EVTYPE
total_fatalities <- aggregate(FATALITIES ~ EVTYPE, data = relevant_df, FUN = sum, na.rm = TRUE)
total_injuries <- aggregate(INJURIES ~ EVTYPE, data = relevant_df, FUN = sum, na.rm = TRUE)

population_df <- merge(avg_fatalities, avg_injuries, by = "EVTYPE")
population_df <- merge(population_df, total_fatalities, by = "EVTYPE")
population_df <- merge(population_df, total_injuries, by = "EVTYPE")

sorting the population data frame by highest A) average fatality rate B) average injury rate C) total fatalities D) total injuries

# Load necessary libraries
library(tidyr)

# Sort the dataframe for each category: average and total fatalities, injuries
sorted_by_avg_fatalities <- population_df[order(-population_df$FATALITIES.x), ]
sorted_by_avg_injuries <- population_df[order(-population_df$INJURIES.x), ]
sorted_by_total_fatalities <- population_df[order(-population_df$FATALITIES.y), ]
sorted_by_total_injuries <- population_df[order(-population_df$INJURIES.y), ]

# Get the top 5 for each category
top5_avg_fatalities <- sorted_by_avg_fatalities[1:5, c("EVTYPE", "FATALITIES.x", "INJURIES.x")]
top5_avg_injuries <- sorted_by_avg_injuries[1:5, c("EVTYPE", "FATALITIES.x", "INJURIES.x")]
top5_total_fatalities <- sorted_by_total_fatalities[1:5, c("EVTYPE", "FATALITIES.y", "INJURIES.y")]
top5_total_injuries <- sorted_by_total_injuries[1:5, c("EVTYPE", "FATALITIES.y", "INJURIES.y")]

# Reshape data for plotting using gather
top5_avg_fatalities_long <- gather(top5_avg_fatalities, key = "Type", value = "Value", -EVTYPE)
top5_avg_fatalities_long$Type <- factor(top5_avg_fatalities_long$Type, levels = c("FATALITIES.x", "INJURIES.x"), labels = c("Fatalities", "Injuries"))

top5_avg_injuries_long <- gather(top5_avg_injuries, key = "Type", value = "Value", -EVTYPE)
top5_avg_injuries_long$Type <- factor(top5_avg_injuries_long$Type, levels = c("FATALITIES.x", "INJURIES.x"), labels = c("Fatalities", "Injuries"))

top5_total_fatalities_long <- gather(top5_total_fatalities, key = "Type", value = "Value", -EVTYPE)
top5_total_fatalities_long$Type <- factor(top5_total_fatalities_long$Type, levels = c("FATALITIES.y", "INJURIES.y"), labels = c("Fatalities", "Injuries"))

top5_total_injuries_long <- gather(top5_total_injuries, key = "Type", value = "Value", -EVTYPE)
top5_total_injuries_long$Type <- factor(top5_total_injuries_long$Type, levels = c("FATALITIES.y", "INJURIES.y"), labels = c("Fatalities", "Injuries"))

Calcualting the average and total economic cost of each event type

Converting Damage Amounts Based on Multipliers

Natural disasters use specific multipliers for property (PROPDMGEXP) and crop (CROPDMGEXP) damage values, like “K” for thousands, “M” for millions, and “B” for billions. function to convert these multipliers into actual numeric values.

# Function to convert damage amounts based on multiplier 
convert_damage <- function(damage, exponent) {
  if (exponent == "K") {
    return(damage * 1e3)
  } else if (exponent == "M") {
    return(damage * 1e6)
  } else if (exponent == "B") {
    return(damage * 1e9)
  } else {
    return(damage)
  }
}

This function will be applied to each row in the dataset to ensure property and crop damage values are converted into their actual amounts.

Applying the Conversion to Damage Columns

Now, we will apply the convert_damage function to the PROPDMG and CROPDMG columns using the respective multipliers in PROPDMGEXP and CROPDMGEXP. We will create new columns to hold the actual damage values.

# Apply the conversion to property and crop damage columns without using mutate
PROPDMG_ACTUAL <- mapply(convert_damage, relevant_df$PROPDMG, relevant_df$PROPDMGEXP)
CROPDMG_ACTUAL <- mapply(convert_damage, relevant_df$CROPDMG, relevant_df$CROPDMGEXP)

# Add the converted damage columns to the original data frame
relevant_df$PROPDMG_ACTUAL <- PROPDMG_ACTUAL
relevant_df$CROPDMG_ACTUAL <- CROPDMG_ACTUAL

Now, the relevant_df dataset will contain two new columns: PROPDMG_ACTUAL for property damage and CROPDMG_ACTUAL for crop damage in actual dollar amounts.

Calculating Total Damage for Each Event Type

Next, we will calculate the total damage for each event type by summing both the property and crop damage amounts. This will help us compare the overall economic consequences of each event type.

# Calculate the total economic damage (property + crop damage) for each event type without using mutate
TOTAL_DAMAGE <- relevant_df$PROPDMG_ACTUAL + relevant_df$CROPDMG_ACTUAL

# Add the calculated total damage to the original data frame
relevant_df$TOTAL_DAMAGE <- TOTAL_DAMAGE

This creates a new column TOTAL_DAMAGE that contains the combined property and crop damage for each event type.

Summarizing the Damage by Event Type

We will now summarize the total damage by event type (EVTYPE). This will allow us to identify which events caused the most economic damage overall, including both property and crop damage.

# Summarize the total damage by event type
damage_summary <- aggregate(
  cbind(PROPDMG_ACTUAL, CROPDMG_ACTUAL, TOTAL_DAMAGE) ~ EVTYPE, 
  data = relevant_df, 
  FUN = sum, 
  na.rm = TRUE
)

# Sort the damage_summary manually by TOTAL_DAMAGE in descending order
damage_summary_sorted <- damage_summary[order(-damage_summary$TOTAL_DAMAGE), ]

# View the top 10 events with the greatest economic consequences
head(damage_summary_sorted, 10) 
##                EVTYPE PROPDMG_ACTUAL CROPDMG_ACTUAL TOTAL_DAMAGE
## 170             FLOOD   144657709807     5661968450 150319678257
## 411 HURRICANE/TYPHOON    69305840000     2607872800  71913712800
## 834           TORNADO    56925660790      414953270  57340614060
## 670       STORM SURGE    43323536000           5000  43323541000
## 244              HAIL    15727367053     3025537890  18752904943
## 153       FLASH FLOOD    16140812067     1421317100  17562129167
## 95            DROUGHT     1046106000    13972566000  15018672000
## 402         HURRICANE    11868319010     2741910000  14610229010
## 590       RIVER FLOOD     5118945500     5029459000  10148404500
## 427         ICE STORM     3944927860     5022113500   8967041360
# Summarize the average damage by event type
damage_summary_avg <- aggregate(
  cbind(PROPDMG_ACTUAL, CROPDMG_ACTUAL, TOTAL_DAMAGE) ~ EVTYPE, 
  data = relevant_df, 
  FUN = mean,  # Use mean to calculate averages
  na.rm = TRUE  # Remove NA values
)

# Sort the damage_summary_avg manually by TOTAL_DAMAGE in descending order
damage_summary_sorted_avg <- damage_summary_avg[order(-damage_summary_avg$TOTAL_DAMAGE), ]

# View the top 10 events with the greatest average economic consequences
head(damage_summary_sorted_avg, 10)
##                         EVTYPE PROPDMG_ACTUAL CROPDMG_ACTUAL TOTAL_DAMAGE
## 842 TORNADOES, TSTM WIND, HAIL     1600000000   2.500000e+06   1602500000
## 299  HEAVY RAIN/SEVERE WEATHER     1250000000   0.000000e+00   1250000000
## 411          HURRICANE/TYPHOON      787566364   2.963492e+07    817201282
## 408             HURRICANE OPAL      350316224   1.000001e+06    351316226
## 670                STORM SURGE      165990559   1.915709e+01    165990579
## 954                 WILD FIRES      156025000   0.000000e+00    156025000
## 136          EXCESSIVE WETNESS              0   1.420000e+08    142000000
## 409  HURRICANE OPAL/HIGH WINDS      100000000   1.000000e+07    110000000
## 604        SEVERE THUNDERSTORM       92720000   1.538462e+04     92735385
## 402                  HURRICANE       68208730   1.575810e+07     83966833

The damage_summary data frame will show the total damage for each event type, sorted by the highest total damage. This will help us identify which event types have the greatest economic impact.

Results

Across the United States, which types of events are most harmful with respect to population health?

Creating a plot for the top contributors to population health disturbances:

# Load necessary libraries
library(gridExtra)
library(grid)

# Set up plotting area for 2x2 grid (base R)
par(mfrow = c(2, 2), mar = c(11, 6, 3, 2)) 

# Plot 1: Top 5 by average fatalities
barplot(top5_avg_fatalities$FATALITIES.x, 
        names.arg = top5_avg_fatalities$EVTYPE,
        col = "steelblue", 
        main = "A) Top 5 Events by Average Fatalities",
        xlab = "", 
        ylab = "Count", 
        las = 2,  # Rotate x-axis labels
        cex.names = 0.8)  # Adjust the size of event type labels

# Plot 2: Top 5 by average injuries (Use the correct data)
barplot(top5_avg_injuries$INJURIES.x, 
        names.arg = top5_avg_injuries$EVTYPE,
        col = "darkorange", 
        main = "B) Top 5 Events by Average Injuries",
        xlab = "", 
        ylab = "Count", 
        las = 2, 
        cex.names = 0.8)

# Plot 3: Top 5 by total fatalities
barplot(top5_total_fatalities$FATALITIES.y, 
        names.arg = top5_total_fatalities$EVTYPE,
        col = "darkred", 
        main = "C) Top 5 Events by Total Fatalities",
        xlab = "", 
        ylab = "Count", 
        las = 2, 
        cex.names = 0.8)

# Plot 4: Top 5 by total injuries (Use the correct data)
barplot(top5_total_injuries$INJURIES.y, 
        names.arg = top5_total_injuries$EVTYPE,
        col = "green", 
        main = "D) Top 5 Events by Total Injuries",
        xlab = "", 
        ylab = "Count", 
        las = 2, 
        cex.names = 0.8)

Figure 1 Comparing the averages and totals of fatalities and injuries for the top 5 event types

From the data, we can see that while the average rates for both fatalities and injuries ted to have varying causation, that include hurricanes, heat/cold waves and tornadoes, the Tornado category really takes a commanding presence in the cumulative fatality and injury rates.

Looking into the data collection standards, one weather pattern can cause many tornado touchdowns which means that in one period many different tornado entries for the same weather event. This leads to a dilution of the average fatality and injury rates, while we can see that tornados are clearly the largest contributors to the totals.

Nonetheless, tornadoes are still one of the contributing factors in the highest average fatality rate, so we can safely conclude that tornadoes have historically caused the most deaths in the US.

Looking into other events such as excessive heat I expect the sahare of fatalities and injuries to increase for this type of event as climate change causes an increase in sudden extreme temperature swings.

Across the United States, which types of events have the greatest economic consequences?

# Prepare the data for the bar plot (Total Damage)
top10_data_total <- head(damage_summary_sorted, 10)
damage_matrix_total <- t(as.matrix(top10_data_total[, c("TOTAL_DAMAGE", "CROPDMG_ACTUAL", "PROPDMG_ACTUAL")]))

# Prepare the data for the bar plot (Average Damage)
top10_data_avg <- head(damage_summary_sorted_avg, 10)
damage_matrix_avg <- t(as.matrix(top10_data_avg[, c("TOTAL_DAMAGE", "CROPDMG_ACTUAL", "PROPDMG_ACTUAL")]))

# Set up a 2x1 layout for the stacked panel (one plot on top of the other)
par(mfrow = c(2, 1), mar = c(7, 6, 3, 2)) 

# Plot 1: Total Damage by Event Type
barplot(
  damage_matrix_total, 
  beside = TRUE,  
  col = c("darkred", "darkgreen", "steelblue"),  
  names.arg = top10_data_total$EVTYPE,  
  main = "A) Total Damage by Event Type",
  xlab = "",
  ylab = "Damage (in billions)",
  las = 2,  
  cex.names = 0.6, 
  legend.text = c("Total Damage", "Crop Damage", "Property Damage"),
  args.legend = list(x = "topright", bty = "n"),  
  axes = FALSE 
)

# Customize Y-axis for Total Damage to show values in billions (B)
yticks <- axTicks(2) 
yticks_labels <- sapply(yticks, function(x) {
  paste0(round(x / 1e9, 1), "B") 
})
axis(2, at = yticks, labels = yticks_labels)

# Plot 2: Average Damage by Event Type
barplot(
  damage_matrix_avg, 
  beside = TRUE, 
  col = c("darkred", "darkgreen", "steelblue"), 
  names.arg = top10_data_avg$EVTYPE,  
  main = "B) Average Damage by Event Type",
  xlab = "",
  ylab = "Damage (in billions)",
  las = 2,  
  cex.names = 0.6, 
  legend.text = c("Total Damage", "Crop Damage", "Property Damage"),
  args.legend = list(x = "topright", bty = "n"), 
  axes = FALSE  
)

# Customize Y-axis for Average Damage to show values in billions (B)
yticks <- axTicks(2) 
yticks_labels <- sapply(yticks, function(x) {
  paste0(round(x / 1e9, 1), "B")
})
axis(2, at = yticks, labels = yticks_labels)

Figure 2 Comparing the averages and totals of damages for the top 10 event types When evaluating the economic costs of natural disasters, Tornadoes and Inclement/excessive rain/ extreme weather emerge as the leading contributors to both average property and crop damage, while when breaking down by category Droughts and Excessive wetness do the most damage in agriculture, as one might expect.

However the totals are decisively indicating that flooding is the single largest source of inclement weather damage, followed by a cluster of hurricane, tornado and storm surge damages which have a similar total. Rising sea levels will only increase the totals of flooding and storm surge.