In this report, the goal is to analyze the impact of different weather events on public health and economy based on the storm database collected from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) from 1950 - 2011. The data used will be estimates of fatalities, injuries, property and crop damage to decide which types of event are most harmful to the population health and economy. From these data, we found that high temperatures and tornado are most harmful with respect to population health, while flood, drought, and hurricane/typhoon have the greatest economic impacts.
Step1: Loading R packages required for Analysis and Loading storm data
library(ggplot2)
library(gridExtra)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
storm<-read.csv("repdata-data-StormData.csv",header = TRUE)
Step2: Data cleaning
storm$CROPDMGEXP<-toupper(storm$CROPDMGEXP)
storm$PROPDMGEXP<-toupper(storm$PROPDMGEXP)
storm$PROPDMGEXP[storm$PROPDMGEXP == "B"]<-9
storm$PROPDMGEXP[storm$PROPDMGEXP == "M"]<-6
storm$PROPDMGEXP[storm$PROPDMGEXP == "K"]<-3
storm$PROPDMGEXP[storm$PROPDMGEXP == "H"]<-2
storm$PROPDMGEXP[storm$PROPDMGEXP == ""]<-0
storm$PROPDMGEXP[storm$PROPDMGEXP == "-"]<-0
storm$PROPDMGEXP[storm$PROPDMGEXP == "?"]<-0
storm$PROPDMGEXP[storm$PROPDMGEXP == "+"]<-0
storm$CROPDMGEXP[storm$CROPDMGEXP == "B"]<-9
storm$CROPDMGEXP[storm$CROPDMGEXP == "M"]<-6
storm$CROPDMGEXP[storm$CROPDMGEXP == "K"]<-3
storm$CROPDMGEXP[storm$CROPDMGEXP == "?"]<-0
storm$CROPDMGEXP[storm$CROPDMGEXP == ""]<-0
storm$PROPDMGEXP<-as.numeric(storm$PROPDMGEXP)
storm$CROPDMGEXP<-as.numeric(storm$CROPDMGEXP)
storm$PROPDMG<-storm$PROPDMG*(10^storm$PROPDMGEXP)
storm$CROPDMG<-storm$CROPDMG*(10^storm$CROPDMGEXP)
Step3: Data processing for summarising the data per wheather events
storm1grp <- group_by(storm,EVTYPE)
storm1 <- summarise(storm1grp,tot_fat = sum(FATALITIES,na.rm=T),
tot_inj = sum(INJURIES,na.rm=T),
tot_PROPDMG=sum(PROPDMG,na.rm=T),
tot_CROPDMG=sum(CROPDMG,na.rm=T))
storm_fat<-storm1[order(-storm1$tot_fat),c(1,2)]
storm_inj<-storm1[order(-storm1$tot_inj),c(1,3)]
storm_PROPDMG<-storm1[order(-storm1$tot_PROPDMG),c(1,4)]
storm_CROPDMG<-storm1[order(-storm1$tot_CROPDMG),c(1,5)]
Step 4: Plotting data and Results
Top 10 Wheather Responsible for Fatalities
storm_fat[1:10,]
## Source: local data frame [10 x 2]
##
## EVTYPE tot_fat
## (fctr) (dbl)
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
Top 10 Wheather Events Responsible for injuries
storm_inj[1:10,]
## Source: local data frame [10 x 2]
##
## EVTYPE tot_inj
## (fctr) (dbl)
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
plot1<-ggplot(data=storm_fat[1:10,], aes(x=EVTYPE, y=tot_fat)) +
geom_bar(stat="identity", fill="steelblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Weather Type") +
ylab("Number Of Fatalities") +
ggtitle("Top 10 Calamities in Terms of Fatalities\n Events in the U.S.\n from 1995 - 2011")
plot2<-ggplot(data=storm_inj[1:10,], aes(x=EVTYPE, y=tot_inj)) +
geom_bar(stat="identity", fill="steelblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Weather Type") +
ylab("Number Of Injuries") +
ggtitle("Top 10 Calamities in Terms of Injuries\n Events in the U.S.\n from 1995 - 2011")
grid.arrange(plot1,plot2,ncol=2)
Top 10 Wheather Events Responsible for Property Damage
storm_PROPDMG[1:10,]
## Source: local data frame [10 x 2]
##
## EVTYPE tot_PROPDMG
## (fctr) (dbl)
## 1 FLOOD 144657709807
## 2 HURRICANE/TYPHOON 69305840000
## 3 TORNADO 56947380677
## 4 STORM SURGE 43323536000
## 5 FLASH FLOOD 16822673979
## 6 HAIL 15735267513
## 7 HURRICANE 11868319010
## 8 TROPICAL STORM 7703890550
## 9 WINTER STORM 6688497251
## 10 HIGH WIND 5270046295
Top 10 Wheather Events Responsible for Crop Damage
storm_CROPDMG[1:10,]
## Source: local data frame [10 x 2]
##
## EVTYPE tot_CROPDMG
## (fctr) (dbl)
## 1 DROUGHT 13972566000
## 2 FLOOD 5661968450
## 3 RIVER FLOOD 5029459000
## 4 ICE STORM 5022113500
## 5 HAIL 3025954473
## 6 HURRICANE 2741910000
## 7 HURRICANE/TYPHOON 2607872800
## 8 FLASH FLOOD 1421317100
## 9 EXTREME COLD 1292973000
## 10 FROST/FREEZE 1094086000
plot3<-ggplot(data=storm_PROPDMG[1:10,], aes(x=EVTYPE, y=tot_PROPDMG)) +
geom_bar(stat="identity", fill="steelblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Weather Type") +
ylab("Total Property Damage") +
ggtitle("Top 10 Calamities in Terms of Property Damage\n Events in the U.S.\n from 1995 - 2011")
plot4<-ggplot(data=storm_CROPDMG[1:10,], aes(x=EVTYPE, y=tot_CROPDMG)) +
geom_bar(stat="identity", fill="steelblue") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
xlab("Weather Type") +
ylab("Total Crop Damage") +
ggtitle("Top 10 Calamities in Terms of Crop Damage\n Events in the U.S.\n from 1995 - 2011")
grid.arrange(plot3,plot4,ncol=2)