This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The analysis suggests that tornados and floods have the maximum impact over public health and economy respectively.
The data for this assignment come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size
## Reading csv file previusly downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2
myWeather <- read.csv(bzfile("./repdatadataStormData.csv.bz2"),sep=",",header = T)
Although the NOAA database contains data from 1950 through 2011, we will use only the data from 1996 and later. Prior to 1996, only tornado, thunderstorm, wind, and hail events were recorded. Beginning in 1998, 48 weather event types are intended to be reported.
Additionally, property values increased rapidly after the mid 1990s, and population grew rapidly and concentrated increasingly in flood and storm prone coastal areas.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
myWeather$Date <- as.Date(myWeather$BGN_DATE,format="%m/%d/%Y")
myWeather_post_1998 <- filter(myWeather, myWeather$Date>="1998-01-01")
##Fatalities subset
Fatalities<- aggregate(FATALITIES ~ EVTYPE,myWeather_post_1998, sum)
names(Fatalities) <- c("Event", "Fatalities")
##Injuries subset
Injuries<- aggregate(INJURIES ~ EVTYPE,myWeather_post_1998, sum)
names(Injuries) <- c("Event", "Injuries")
##taking the 10 most harmful events.
FatalitiesOrdered <- Fatalities[order(-Fatalities$Fatalities),][1:10,]
InjuriesOrdered <- Injuries[order(-Injuries$Injuries),][1:10,]
library(ggplot2)
## frequency bar plot for fatalities and injuries
fat_plot<-ggplot(aes(x=Event,y=Fatalities),data=FatalitiesOrdered)+
geom_bar(stat = "identity",aes(fill = Fatalities))+
theme(axis.text.x=element_text(angle=90, hjust=1))+
labs(title="10 Most harmful events by Fatalities (1998 - 2011)", x="Type of event", y="Number of Fatalities")
inj_plot<-ggplot(aes(x=Event,y=Injuries),data=InjuriesOrdered)+
geom_bar(stat = "identity",aes(fill = Injuries))+
theme(axis.text.x=element_text(angle=90, hjust=1))+
labs(title="10 Most harmful events by Injuries (1998 - 2011)", x="Type of event", y="Number of Injuries")
## Print plots
library(grid)
layout <- matrix(seq(, 2), ncol=1, nrow=2)
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
print(fat_plot, vp=viewport(layout.pos.row = 1, layout.pos.col = 1))
print(inj_plot, vp=viewport(layout.pos.row = 2, layout.pos.col = 1))
## result
paste("We can see that Excessive Heat cause most deaths with", max(FatalitiesOrdered$Fatalities), "fatalities and Tornado with", max(InjuriesOrdered$Injuries), "injuries between 1998 - 2011")
## [1] "We can see that Excessive Heat cause most deaths with 1681 fatalities and Tornado with 18929 injuries between 1998 - 2011"
##subset
money<-myWeather_post_1998[myWeather_post_1998$PROPDMG>0&myWeather_post_1998$PROPDMGEXP %in% c("K","M","B"),c("EVTYPE","PROPDMG","PROPDMGEXP")]
## Standarizing K,M;B to actual money (Billion)
multiplier<-data.frame(PROPDMGEXP=c("K","M","B"),multiplier=c(1000,1000000,1000000000))
money<-merge(money,multiplier,by= "PROPDMGEXP")
money$REAL_MONEY<-money$PROPDMG*money$multiplier/1000000000
##Summaries for damages
money_damage<- aggregate(REAL_MONEY ~ EVTYPE,money, sum)
money_damage_ordered <- money_damage[order(-money_damage$REAL_MONEY),][1:10,]
##Plot
ggplot(aes(x=EVTYPE,y=REAL_MONEY),data=money_damage_ordered)+
geom_bar(stat = "identity",aes(fill = REAL_MONEY))+
theme(axis.text.x=element_text(angle=90, hjust=1)) +
labs(title="Total Cost($ billion) by Event Type", x="", y="Total")
paste("FLOOD has the bigest impact on economy with", max(money_damage_ordered$REAL_MONEY),"billions dollars followed by HURRICANE / TYPHON and STORM SURGE respectively")
## [1] "FLOOD has the bigest impact on economy with 136.85001025 billions dollars followed by HURRICANE / TYPHON and STORM SURGE respectively"