Setting up the defaults:
Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
This section shows the r codes to load and pre-process the raw data for data analysis. This is done since there are some events in the dataset that are incorrectly spelled that might be erroneously considered as other event type.
stormData <- read.csv("repdata-data-StormData.csv.bz2")
stormDataDim <- dim(stormData)
The raw data contains 902297 rows and 37 columns with 985 unique event types.
processedData <- stormData[,c("EVTYPE","FATALITIES", "INJURIES", "PROPDMG", "CROPDMG")]
processedData$EVTYPE <- toupper(processedData$EVTYPE)
library(stringr)
processedData$EVTYPE <- str_trim(processedData$EVTYPE, side = c("both"))
processedData$EVTYPE <- gsub("^THUNDERSTORMW","THUNDERSTORMS", processedData$EVTYPE)
processedData$EVTYPE <- gsub(".*TSTM","THUNDERSTORM", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNERSTORM","THUNDERSTORM", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^TUNDERSTORM","THUNDERSTORM", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNDESTORM","THUNDERSTORM", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUDERSTORM WINDS$","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNDEERSTORM WINDS$","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNDERESTORM WINDS$","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNDERSTROM WIND.*","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNDERTORM.*","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^THUNDERTSORM.*","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub(".*THUNDERSTORM.*","THUNDERSTORM WINDS", processedData$EVTYPE)
processedData$EVTYPE <- gsub("LIGHTNING.*","LIGHTNING", processedData$EVTYPE)
processedData$EVTYPE <- gsub("AVALANCE*","AVALANCHE", processedData$EVTYPE)
processedData$EVTYPE <- gsub("AVALANCHEHE*","AVALANCHE", processedData$EVTYPE)
processedData$EVTYPE <- gsub(".*HURRICANE.*","HURRICANE", processedData$EVTYPE)
processedData$EVTYPE <- gsub(".*FLASH.*","FLASH FLOOD", processedData$EVTYPE)
processedData$EVTYPE <- gsub("^FLOOD.*","FLOOD", processedData$EVTYPE)
processedDataDim <- dim(processedData)
uniqueProcessedData <- nrow(unique(processedData$EVTYPE))
The new/processed data set contains 902297 rows and 5 columns with 723 unique event types.
This section shows the r codes to get the total number of fatalities, injuries, property damage and crop damage for every event type.
Step:
1.1. Convert the Event Type (EVTYPE) column from string to factor.
processedData$EVTYPE <- as.factor(processedData$EVTYPE)
1.2. Count the number of fatalities per Event Type
fatalitiesByEventType <- aggregate(FATALITIES ~ EVTYPE, processedData, FUN = sum)
1.3. Get the Top 10 Events that cause fatality
top10FatalEvent <- head(fatalitiesByEventType[order(-fatalitiesByEventType$FATALITIES), ], 10)
top10FatalEvent$EVTYPE <- factor(top10FatalEvent$EVTYPE, levels = top10FatalEvent$EVTYPE)
1.4. Count the number of injuries per Event Type
injuriesByEventType <- aggregate(INJURIES ~ EVTYPE, processedData, FUN = sum)
1.5. Get the Top 10 Events that cause injuries
top10InjuryEvent <- head(injuriesByEventType[order(-injuriesByEventType$INJURIES), ], 10)
top10InjuryEvent$EVTYPE <- factor(top10InjuryEvent$EVTYPE, levels = top10InjuryEvent$EVTYPE)
2.1. Get the total number of damages with relation to properties per event type.
propertyDamage <- aggregate(PROPDMG ~ EVTYPE, processedData, FUN = sum)
top10PropertyDamage <- head(propertyDamage[order(-propertyDamage$PROPDMG), ], 10)
2.2. Get the total number of damages with relation to crops per event type.
cropDamage <- aggregate(CROPDMG ~ EVTYPE, processedData, FUN = sum)
top10CropDamage <- head(cropDamage[order(-cropDamage$CROPDMG), ], 10)
2.3. Merge Crop and Property Damage
library(tidyr)
economicDamage <- merge(top10PropertyDamage, top10CropDamage, by.x = "EVTYPE", by.y = "EVTYPE", all = FALSE)
economicDamage <- gather(economicDamage, DamageType, Damage, -EVTYPE)
library(ggplot2)
ggplot(top10FatalEvent, aes(EVTYPE, FATALITIES, fill=EVTYPE))+ guides(fill=FALSE)+geom_bar(stat="identity")+xlab("EVENT TYPE") + ylab("# OF FATALITIES") + ggtitle("Top 10 Events that Cause Fatality Across U.S.") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
The figure above shows the top 10 events or severe weather conditions that cause fatalities across the United States. It also shows that TORNADO has the most number of fatality at 5633 reported fatalities.
ggplot(top10InjuryEvent, aes(x=factor(EVTYPE), y=INJURIES, fill=EVTYPE))+ guides(fill=FALSE)+geom_bar(stat="identity")+xlab("EVENT TYPE") + ylab("# OF Injuries") + ggtitle("Top 10 Events that Cause Injuries Across U.S.") + theme(axis.text.x = element_text(angle = 45, hjust = 1))
The figure above illustrates the top 10 events or severe weather conditions that cause injuries across the United States. It also shows that TORNADO has the most number of injuries at 91346 reported injuries.
options(scipen = 6)
ggplot(economicDamage, aes(EVTYPE, y=Damage, fill=EVTYPE)) + guides(fill=FALSE) + geom_bar(stat="identity") + xlab("EVENT TYPE") + ylab("Damage") + ggtitle("Top 6 Events that have the Greatest Economic Consequences Across U.S.") + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + facet_wrap(~DamageType, ncol=1)
The figure above shows the top 6 events or severe weather conditions that have the greatest economic consequences across the United States. It also shows that HAIL has the highest incurred damages, in terms of crops, at 579596. On the other hand, it also shows that TORNADO has the highest incurred damages, in terms of property, at 3212258.