Introduction

This project explores the influence of weather events on the USA economy. We will check out the impact on population health and the economic consequences of weather events. The analysis will answer the question of which event type has the highest impact. As a data source, we use the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database.

Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The project answers the following questions:

The project starts with initial data loading and reading, proceeds to data preprocessing, and finally presents results through plots.

Data Processing

# Load and preprocess data
con <- bzfile("repdata_data_StormData.csv.bz2", "r")

StormData10000 <- read.table(con, header = TRUE, sep = ",", quote = "\"", nrows = 10000)
classes <- sapply(StormData10000, class)

StormDataExtract <- read.table(
  con,
  header = TRUE,
  sep = ",",
  quote = "\"",
  colClasses = c(
    "NULL", "factor", "NULL", "NULL", "NULL", "NULL",
    "factor", "factor", "NULL", "NULL", "NULL",
    "NULL", "NULL", "NULL", "NULL", "NULL", "NULL",
    "NULL", "NULL", "NULL", "NULL", "NULL",
    "numeric", "numeric", "numeric", "factor", "numeric",
    "factor", "NULL", "NULL", "NULL", "NULL",
    "NULL", "NULL", "NULL", "NULL", "NULL"
  ),
  blank.lines.skip = FALSE
)

colnames(StormDataExtract) <- c(
  "BGN_DATE", "STATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG",
  "PROPDMGEXP", "CROPDMG", "CROPDMGEXP"
)

# Calculating property and crop damage
nbrrows <- nrow(StormDataExtract)

PropDamage <- numeric(nbrrows)
CropDamage <- numeric(nbrrows)

for (i in 1:nbrrows) {
  # Property Damage
  if (is.na(StormDataExtract$PROPDMG[i])) {
    PropDamage[i] <- 0
  } else if (StormDataExtract$PROPDMGEXP[i] %in% c("K", "k")) {
    PropDamage[i] <- StormDataExtract$PROPDMG[i] * 1000
  } else if (StormDataExtract$PROPDMGEXP[i] %in% c("M", "m")) {
    PropDamage[i] <- StormDataExtract$PROPDMG[i] * 1e6
  } else if (StormDataExtract$PROPDMGEXP[i] %in% c("B", "b")) {
    PropDamage[i] <- StormDataExtract$PROPDMG[i] * 1e9
  } else {
    PropDamage[i] <- StormDataExtract$PROPDMG[i]
  }

  # Crop Damage
  if (is.na(StormDataExtract$CROPDMG[i])) {
    CropDamage[i] <- 0
  } else if (StormDataExtract$CROPDMGEXP[i] %in% c("K", "k")) {
    CropDamage[i] <- StormDataExtract$CROPDMG[i] * 1000
  } else if (StormDataExtract$CROPDMGEXP[i] %in% c("M", "m")) {
    CropDamage[i] <- StormDataExtract$CROPDMG[i] * 1e6
  } else if (StormDataExtract$CROPDMGEXP[i] %in% c("B", "b")) {
    CropDamage[i] <- StormDataExtract$CROPDMG[i] * 1e9
  } else {
    CropDamage[i] <- StormDataExtract$CROPDMG[i]
  }
}

StormData <- cbind(StormDataExtract, PropDamage, CropDamage)

library(lattice)

# Fatalities by event
FatalitiesByEvent <- tapply(StormData$FATALITIES, StormData$EVTYPE, sum)
FatalitiesByEvent <- sort(FatalitiesByEvent, decreasing = TRUE)

# Property Damage by Event
PropertyDamageByEvent <- tapply(StormData$PropDamage, StormData$EVTYPE, sum)
PropertyDamageByEvent <- PropertyDamageByEvent / 1e9
PropertyDamageByEvent <- sort(PropertyDamageByEvent, decreasing = TRUE)

# Bar plots
barchart(
  FatalitiesByEvent[1:12],
  col = palette(),
  main = "Twelve Most Destructive Weather Events Based on Fatalities",
  xlab = "Total Fatalities"
)

barchart(
  PropertyDamageByEvent[1:12],
  col = palette(),
  main = "Twelve Most Destructive Weather Events Based on Property Damage",
  xlab = "Total Damage in Billions"
)

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
# Summarized Data
TotalDamage <- StormData$PropDamage + StormData$CropDamage
SummarizedData <- data.frame(
  Year = as.numeric(format(as.Date(StormData$BGN_DATE, format = "%m/%d/%Y"), "%Y")),
  Fatalities = StormData$FATALITIES,
  TotalDamage = TotalDamage / 1e9
)

SummarizedData <- aggregate(cbind(Fatalities, TotalDamage) ~ Year, data = SummarizedData, sum)

# Plot
ggplot(SummarizedData, aes(x = Year)) +
  geom_bar(aes(y = TotalDamage, fill = "Total Damage"), stat = "identity", color = "blue") +
  geom_line(aes(y = Fatalities / 10, color = "Fatalities"), size = 1) +
  scale_y_continuous(
    name = "Total Damage (USD Billions)",
    sec.axis = sec_axis(~ . * 10, name = "Fatalities (scaled)")
  ) +
  labs(
    title = "Total Damage and Fatalities by Year (1950-2011)",
    x = "Year Event Began"
  ) +
  theme_minimal() +
  scale_fill_manual(values = c("blue")) +
  scale_color_manual(values = c("red"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.