This analysis utilizes the strom data (U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database) and analyze its impact. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The data provides impact of strom from 1952. The impact is categorized into two parts:
The data is loaded and cleansed to be utilized in the later steps. The analysis specific cleansing are taken care in the results section. The date might be important later on as it will be used for filtering, so it is converted to proper date from a factor.
stromdf <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))# Requires you have ./repdata-data-StormData.csv.bz2 (current working directory)
stromdf$BGN_DATE = as.Date(stromdf$BGN_DATE, "%m/%d/%Y %H:%M:%S")
There are really two level of data cleaning specific to this analysis:
require("dplyr")
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.1.2
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# only interested in the key events (related to the fatalities)
keystromEvents <- stromdf %>% filter(INJURIES!=0 | FATALITIES != 0 ) %>% mutate(BGN_DATE_YEAR = as.integer(format(BGN_DATE, "%Y")) ) %>% filter(BGN_DATE_YEAR > 1995)
#str(keystromEvents)
fatalEventsGroup <- keystromEvents %>% group_by(EVTYPE) %>% summarize(injTotal = sum(INJURIES), fatalTotal = sum(FATALITIES)) %>% arrange(desc(fatalTotal),desc(injTotal)) %>% top_n(6)
## Selecting by fatalTotal
# make EVTYPE an ordered factor
fatalEventsGroup$EVTYPE <- factor(fatalEventsGroup$EVTYPE, levels = fatalEventsGroup$EVTYPE, ordered = TRUE)
injuriesEventsGroup<- keystromEvents %>% group_by(EVTYPE) %>% summarize(injTotal = sum(INJURIES), fatalTotal = sum(FATALITIES)) %>% arrange(desc(injTotal),desc(fatalTotal))%>% top_n(6)
## Selecting by fatalTotal
# make EVTYPE an ordered factor
injuriesEventsGroup$EVTYPE <- factor(injuriesEventsGroup$EVTYPE, levels = injuriesEventsGroup$EVTYPE, ordered = TRUE)
require("ggplot2")
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.1.2
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.1.3
## Loading required package: grid
fatalPlot<-qplot(fatalEventsGroup$EVTYPE, fatalEventsGroup$fatalTotal, data = fatalEventsGroup, main = "Top 6 Events By Fatalities",geom="bar", xlab="Event Types", ylab="Fatalities Total", stat="identity", fill=I("red")) + coord_flip()
injuriesPlot<-qplot(injuriesEventsGroup$EVTYPE, injuriesEventsGroup$injTotal, data = injuriesEventsGroup, main = "Top 6 Events By Injuries", geom="bar", xlab="", ylab="Injuries Total",stat="identity", fill=I("blue")) + coord_flip()
grid.arrange(fatalPlot, injuriesPlot, ncol=2)
As seen above Excessive Heat and the Tornado are the top storm types which causes fatalities and injuries.
library("xtable")
## Warning: package 'xtable' was built under R version 3.1.3
print(xtable(fatalEventsGroup), type='html')
| EVTYPE | injTotal | fatalTotal | |
|---|---|---|---|
| 1 | EXCESSIVE HEAT | 6391.00 | 1797.00 |
| 2 | TORNADO | 20667.00 | 1511.00 |
| 3 | FLASH FLOOD | 1674.00 | 887.00 |
| 4 | LIGHTNING | 4141.00 | 651.00 |
| 5 | FLOOD | 6758.00 | 414.00 |
| 6 | RIP CURRENT | 209.00 | 340.00 |
There are really two level of data cleaning specific to this analysis:
economic.events <- stromdf %>% filter(PROPDMG!=0 | CROPDMG != 0 ) %>% mutate(BGN_DATE_YEAR = as.integer(format(BGN_DATE, "%Y")) ) %>% filter(BGN_DATE_YEAR > 1995)
#str(economic.events)
economic.byEvents.propdmg <- economic.events %>% group_by(EVTYPE) %>% summarize(propTotal = sum(PROPDMG), cropTotal = sum(CROPDMG)) %>% arrange(desc(propTotal),desc(cropTotal)) %>% top_n(6)
## Selecting by cropTotal
# make EVTYPE an ordered factor
economic.byEvents.propdmg$EVTYPE <- factor(economic.byEvents.propdmg$EVTYPE, levels = economic.byEvents.propdmg$EVTYPE, ordered = TRUE)
economic.byEvents.cropdmg<- economic.events %>% group_by(EVTYPE) %>% summarize(propTotal = sum(PROPDMG), cropTotal = sum(CROPDMG)) %>% arrange(desc(cropTotal),desc(propTotal)) %>% top_n(6)
## Selecting by cropTotal
economic.byEvents.cropdmg$EVTYPE <- factor(economic.byEvents.cropdmg$EVTYPE, levels = economic.byEvents.cropdmg$EVTYPE, ordered = TRUE)
require("ggplot2")
library(gridExtra)
propPlot<-qplot(economic.byEvents.propdmg$EVTYPE, economic.byEvents.propdmg$propTotal, data = economic.byEvents.propdmg, main = "Top 6 Events By Property Damage",geom="bar", xlab="Event Types", ylab="Property Damage Total", stat="identity", fill=I("green")) + coord_flip()
cropPlot<-qplot(economic.byEvents.cropdmg$EVTYPE, economic.byEvents.cropdmg$cropTotal, data = economic.byEvents.cropdmg, main = "Top 6 Events By Crop Damage",geom="bar", xlab="", ylab="Crop Damage Total",stat="identity", fill=I("red")) + coord_flip()
grid.arrange(propPlot, cropPlot, ncol=2)
As seen above Wind and the Hail are the top storm types which causes property damage and crop damage.