Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The events in the database start in the year 1950 and end in November 2011.

This project will focus on the type of weather event that are most harmful with respect to population health by mwasuring injuries and fatalities and have the greatest economic consequences by measuring economic loss due to property damage and crop damage in the United States.

Data Processing

library(dplyr)
library(ggplot2)
weatherdata <- read.csv("repdata-data-StormData.csv")

This chunk of code will process the data and assess total fatality and injury counts in each event type.

eventVSinjury <- weatherdata |> group_by(EVTYPE) |> summarise(injury = sum(INJURIES, na.rm = TRUE)) |> filter(injury > 0) |> arrange(desc(injury))

eventVSfatality <- weatherdata |> group_by(EVTYPE) |> summarise(fatality = sum(FATALITIES, na.rm = TRUE)) |> filter(fatality > 0) |> arrange(desc(fatality))

This chunk of code filters the dataset to remove out all observations with 0 property damage and crop damage and converts the damage exponent (DMGEXP) column to integers

For the sake of simplicity and relevance, the multipliers (DMGEXP) indicating B/b as billion, M/m as million, K/k as thousand, H/h as hundred will be taken into consideration

exponents <- c("K", "k", "M", "m", "B", "b", "H", "h")

# CLEANING THE PROPERTY DAMAGE DATA
cleanpropdmg <- weatherdata |> filter(PROPDMG > 0) |> filter(PROPDMGEXP %in% exponents)

cleanpropdmg$PROPDMGEXP <- gsub(pattern = "M", replacement = 1000000, x = cleanpropdmg$PROPDMGEXP, ignore.case = TRUE)

cleanpropdmg$PROPDMGEXP <- gsub(pattern = "K", replacement = 1000, x = cleanpropdmg$PROPDMGEXP, ignore.case = TRUE)

cleanpropdmg$PROPDMGEXP <- gsub(pattern = "B", replacement = 1000000000, x = cleanpropdmg$PROPDMGEXP, ignore.case = TRUE)

cleanpropdmg$PROPDMGEXP <- gsub(pattern = "H", replacement = 100, x = cleanpropdmg$PROPDMGEXP, ignore.case = TRUE)

cleanpropdmg$PROPDMGEXP <- as.integer(cleanpropdmg$PROPDMGEXP)


# CLEANING THE CROP DAMAGE DATA
cleancropdmg <- weatherdata |> filter(CROPDMG > 0) |> filter(CROPDMGEXP %in% exponents)

cleancropdmg$CROPDMGEXP <- gsub(pattern = "M", replacement = 1000000, x = cleancropdmg$CROPDMGEXP, ignore.case = TRUE)

cleancropdmg$CROPDMGEXP <- gsub(pattern = "K", replacement = 1000, x = cleancropdmg$CROPDMGEXP, ignore.case = TRUE)

cleancropdmg$CROPDMGEXP <- gsub(pattern = "B", replacement = 1000000000, x = cleancropdmg$CROPDMGEXP, ignore.case = TRUE)

cleancropdmg$CROPDMGEXP <- as.integer(cleancropdmg$CROPDMGEXP)

This chunk of code will process the data and assess total property damage and crop damage in each event type.

eventVSproperty <- cleanpropdmg |> group_by(EVTYPE) |> summarise(propertyloss = sum(PROPDMG*PROPDMGEXP)) |>arrange(desc(propertyloss))

eventVScrop <- cleancropdmg |> group_by(EVTYPE) |> summarise(croploss = sum(CROPDMG*CROPDMGEXP)) |> arrange(desc(croploss))

# MERGING THE CROP AND PROPERTY DATAFRAME TO GET A NET LOSS DUE TO AN EVENT
eventvseconomic <- merge(eventVScrop, eventVSproperty, by = "EVTYPE")
eventvseconomic$totalloss <- eventvseconomic$propertyloss + eventvseconomic$croploss
eventvseconomic <- eventvseconomic |> arrange(desc(totalloss))

Results

injurygraph <- ggplot(eventVSinjury[1:5,])
injurygraph + geom_col(aes(x=EVTYPE, y=injury)) + labs(title="Top 5 Severe Weather events having the most cases of injuries", x = "Event type", y="Injuries")

fatalitygraph <- ggplot(eventVSfatality[1:5,])
fatalitygraph + geom_col(aes(x=EVTYPE, y=fatality)) + labs(title="Top 5 Severe Weather events having the most cases of fatalities", x = "Event type", y="Fatalities")

economicgraph <- ggplot(eventvseconomic[1:5,])
economicgraph + geom_col(aes(x=EVTYPE, y=totalloss)) + labs(title="Top 5 Severe Weather events having the most economic consequence", x = "Event type", y = "Amount (In Billion USD") + scale_y_continuous(breaks = c(0, 5e+10, 1e+11, 1.5e+11), labels = c("0", "5", "10", "15"))