Assignment 2 : Storm Analysis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Reading the csv file

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
storm.data <- read.csv("stormData.csv")

Normalize all the data and selecting only the required columns

reduced.storm.data <- 
                storm.data[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG")]

reduced.storm.data$EVTYPE <- 
                gsub("^HEAT$", "EXCESSIVE HEAT", reduced.storm.data$EVTYPE)
reduced.storm.data$EVTYPE <- 
                gsub("^TSTM WIND$", "THUNDERSTORM WIND", reduced.storm.data$EVTYPE)
reduced.storm.data$EVTYPE <- 
                gsub("^THUNDERSTORM WIND$", "THUNDERSTORM WINDS", reduced.storm.data$EVTYPE)

For FATALITIES we aggregate and find the top 10 among them.

agg.fatalities.data <-
                aggregate(
                                reduced.storm.data$FATALITIES, 
                                by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.fatalities.data) = c("event.type", "fatality.total")
fatalities.sorted <- 
                agg.fatalities.data[order(-agg.fatalities.data$fatality.total),] 
top.fatalities <- fatalities.sorted[1:10,]
top.fatalities$event.type <- 
                factor(
                                top.fatalities$event.type, levels=top.fatalities$event.type, 
                                ordered=TRUE)

We repeat the same for INJURIES

agg.injuries.data <-
                aggregate(
                                reduced.storm.data$INJURIES, 
                                by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.injuries.data) = c("event.type", "injury.total")
injuries.sorted <- agg.injuries.data[order(-agg.injuries.data$injury.total),] 
top.injuries <- injuries.sorted[1:10,]
top.injuries$event.type <- 
                factor(
                                top.injuries$event.type, levels=top.injuries$event.type, 
                                ordered=TRUE)

and also for property damage

agg.prop.dmg.data <-
                aggregate(
                                reduced.storm.data$PROPDMG, 
                                by=list(reduced.storm.data$EVTYPE), FUN=sum, na.rm=TRUE)
colnames(agg.prop.dmg.data) = c("event.type", "prop.dmg.total")
prop.dmg.sorted <- agg.prop.dmg.data[order(-agg.prop.dmg.data$prop.dmg.total),] 
top.prop.dmg <- prop.dmg.sorted[1:10,]
top.prop.dmg$event.type <- 
                factor(
                                top.prop.dmg$event.type, levels=top.prop.dmg$event.type, 
                                ordered=TRUE)

Pictorial Representaion of the same

ggplot(data=top.fatalities, aes(x=event.type, y=fatality.total)) + 
                geom_bar(stat="identity") + xlab("Event type") + ylab("Total fatalities") + 
                ggtitle("Fatalities By Event Type") +
                theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data=top.injuries, aes(x=event.type, y=injury.total)) + 
                geom_bar(stat="identity") + xlab("Event type") + ylab("Total injuries") + 
                ggtitle("Injuries By Event Type") +
                theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(data=top.prop.dmg, aes(x=event.type, y=prop.dmg.total)) + 
                geom_bar(stat="identity") + xlab("Event type") + 
                ylab("Total property damage") +  ggtitle("Property Damage By Event Type") + 
                theme(axis.text.x = element_text(angle = 45, hjust = 1))

Tornado caused maximum damage