This analysis explores the NOAA Storm Database to determine which weather events are most harmful to population health and which have the greatest economic consequences in the United States between 1950 and 2011.
# Load the data
stormData <- read.csv("~/Reproducible Research/week2/repdata_data_StormData1.csv")
# Check dimensions
dim(stormData)
## [1] 902297 37
# Select relevant columns only
stormData <- stormData[, c("EVTYPE", "FATALITIES", "INJURIES",
"PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
# Check for missing values
sum(is.na(stormData))
## [1] 0
library(ggplot2)
# Aggregate fatalities and injuries by event type
fatalities <- aggregate(FATALITIES ~ EVTYPE, stormData, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, stormData, sum)
# Get top 10 for each
top10fatalities <- head(fatalities[order(-fatalities$FATALITIES), ], 10)
top10injuries <- head(injuries[order(-injuries$INJURIES), ], 10)
# Plot fatalities
print(
ggplot(top10fatalities, aes(x = reorder(EVTYPE, FATALITIES), y = FATALITIES)) +
geom_bar(stat = "identity", fill = "tomato") +
coord_flip() +
xlab("Event Type") +
ylab("Total Fatalities") +
ggtitle("Top 10 Weather Events by Fatalities (1950-2011)") +
theme_bw()
)
# Plot injuries
print(
ggplot(top10injuries, aes(x = reorder(EVTYPE, INJURIES), y = INJURIES)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
xlab("Event Type") +
ylab("Total Injuries") +
ggtitle("Top 10 Weather Events by Injuries (1950-2011)") +
theme_bw()
)
### Question 2: Which Events Have the Greatest Economic Consequences?
# Function to convert damage exponent to numeric multiplier
convertExp <- function(exp) {
exp <- toupper(as.character(exp))
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9, 1)))
}
# Calculate actual damage values
stormData$PROPDMGVAL <- stormData$PROPDMG * convertExp(stormData$PROPDMGEXP)
stormData$CROPDMGVAL <- stormData$CROPDMG * convertExp(stormData$CROPDMGEXP)
stormData$TOTALDMG <- stormData$PROPDMGVAL + stormData$CROPDMGVAL
# Aggregate total damage by event type
totalDamage <- aggregate(TOTALDMG ~ EVTYPE, stormData, sum)
# Get top 10
top10damage <- head(totalDamage[order(-totalDamage$TOTALDMG), ], 10)
# Plot
print(
ggplot(top10damage, aes(x = reorder(EVTYPE, TOTALDMG), y = TOTALDMG / 1e9)) +
geom_bar(stat = "identity", fill = "darkgreen") +
coord_flip() +
xlab("Event Type") +
ylab("Total Damage (Billions USD)") +
ggtitle("Top 10 Weather Events by Economic Damage (1950-2011)") +
theme_bw()
)