Looking at the NOAA Storm Database for the US, this analysis aims to look at the human and economic costs of servere weather events.
Data is downloaded from a url (https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2)
library(ggplot2)
storm <- read.csv("repdata_data_StormData.csv.bz2")
In the plots below - it shows tornados are the most harmful with the most injuries and deaths
injuries <- aggregate(storm$INJURIES, by = list(EVTYPE = storm$EVTYPE), sum)
injuries <- injuries[order(injuries$x, decreasing = TRUE), ]
fatalities <- aggregate(storm$FATALITIES, by = list(EVTYPE = storm$EVTYPE), sum)
fatalities <- fatalities[order(fatalities$x, decreasing = TRUE), ]
ggplot(injuries[1:8, ], aes(EVTYPE, y = x)) +
geom_bar(stat = "identity", fill = "skyblue") +
xlab("Event") +
ylab("Number of injuries") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Injuries from different events")
ggplot(fatalities[1:8, ], aes(EVTYPE, y = x)) +
geom_bar(stat = "identity", fill = "skyblue") +
xlab("Event") +
ylab("Number of fatalities") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Fatalities from different events")
In the plot below - it shows that floods have the greatest economic consequenses
economic_data <- storm[, c("EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
# deal with NAs
economic_data[economic_data$PROPDMGEXP %in% c("", "+", "-", "?"), "PROPDMGEXP"] <- "0"
economic_data[economic_data$CROPDMGEXP %in% c("", "+", "-", "?"), "CROPDMGEXP"] <- "0"
economic_data[is.na(economic_data$PROPDMG), "PROPDMG"] <- 0
economic_data[is.na(economic_data$CROPDMG), "CROPDMG"] <- 0
# substitute letters with numbers
economic_data$PROPDMGEXP <- toupper(economic_data$PROPDMGEXP)
economic_data$CROPDMGEXP <- toupper(economic_data$CROPDMGEXP)
economic_data[economic_data$PROPDMGEXP == "B", "PROPDMGEXP"] <- 9
economic_data[economic_data$CROPDMGEXP == "B", "CROPDMGEXP"] <- 9
economic_data[economic_data$PROPDMGEXP == "M", "PROPDMGEXP"] <- 6
economic_data[economic_data$CROPDMGEXP == "M", "CROPDMGEXP"] <- 6
economic_data[economic_data$PROPDMGEXP == "K", "PROPDMGEXP"] <- 3
economic_data[economic_data$CROPDMGEXP == "K", "CROPDMGEXP"] <- 3
economic_data[economic_data$PROPDMGEXP == "H", "PROPDMGEXP"] <- 2
economic_data[economic_data$CROPDMGEXP == "H", "CROPDMGEXP"] <- 2
# Sum damage total cost
economic_data <- within(economic_data, TOTALDMG <- PROPDMG * 10^as.numeric(PROPDMGEXP) + CROPDMG * 10^as.numeric(CROPDMGEXP))
damage <- aggregate(economic_data$TOTALDMG, by = list(EVTYPE = economic_data$EVTYPE),
FUN = sum)
damage <- damage[order(damage$x, decreasing = TRUE), ]
DamageSubset <- damage[1:8, ]
ggplot(DamageSubset, aes(EVTYPE, y = x)) +
geom_bar(stat = "identity", fill = "skyblue") +
xlab("Event Type") +
ylab("Damage (in Dollars ($)") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
ggtitle("Economic cost per event type")