Assignment 2 : Storm Analysis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Reading the csv file

rawdata<-read.csv(bzfile("repdata-data-StormData.csv.bz2"), header=TRUE, stringsAsFactors=FALSE) 

names(rawdata)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

For our purpose we only require 7 variables

variables<-c("EVTYPE","FATALITIES","INJURIES","PROPDMG", "PROPDMGEXP","CROPDMG","CROPDMGEXP")
data<-rawdata[variables]

Selecting cases with most fatalities and injuries.

fatal <- aggregate(FATALITIES ~ EVTYPE, data = data, FUN = sum)
injury <- aggregate(INJURIES ~ EVTYPE, data = data, FUN = sum)

fatal10 <- fatal[order(-fatal$FATALITIES),][1:10, ]
injury10 <- injury[order(-injury$INJURIES),][1:10, ]

Preparing Data

unique(data$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
data$PROPEXP[data$PROPDMGEXP ==  "K"   ]  <-    1000
data$PROPEXP[data$PROPDMGEXP == "M"     ]   <-  10^6
data$PROPEXP[data$PROPDMGEXP == ""      ]   <-  1
data$PROPEXP[data$PROPDMGEXP == "B"     ]   <-  10^9
data$PROPEXP[data$PROPDMGEXP == "m"     ]   <-  10^6
data$PROPEXP[data$PROPDMGEXP == "+"     ]   <-  0
data$PROPEXP[data$PROPDMGEXP == "0"     ]   <-  1
data$PROPEXP[data$PROPDMGEXP == "5"     ]   <-  10^5
data$PROPEXP[data$PROPDMGEXP == "6"     ]   <-  10^6
data$PROPEXP[data$PROPDMGEXP == "?"     ]   <-  0
data$PROPEXP[data$PROPDMGEXP == "4"     ]   <-  10000
data$PROPEXP[data$PROPDMGEXP == "2"     ]   <-  100
data$PROPEXP[data$PROPDMGEXP == "3"     ]   <-  1000
data$PROPEXP[data$PROPDMGEXP == "h"     ]   <-  100
data$PROPEXP[data$PROPDMGEXP == "7"     ]   <-  10^7
data$PROPEXP[data$PROPDMGEXP == "H"     ]   <-  100
data$PROPEXP[data$PROPDMGEXP == "-"     ]   <-  0
data$PROPEXP[data$PROPDMGEXP == "1"     ]   <-  10
data$PROPEXP[data$PROPDMGEXP == "8" ]   <-  10^8

data$PROPDMGVAL <- data$PROPDMG * data$PROPEXP

unique(data$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
data$CROPEXP[data$CROPDMGEXP ==  ""     ]   <-  1
data$CROPEXP[data$CROPDMGEXP == "M"     ]   <-  10^6
data$CROPEXP[data$CROPDMGEXP == "K"     ]   <-  1000
data$CROPEXP[data$CROPDMGEXP == "m"     ]   <-  10^9
data$CROPEXP[data$CROPDMGEXP == "B"     ]   <-  10^6
data$CROPEXP[data$CROPDMGEXP == "?"     ]   <-  0
data$CROPEXP[data$CROPDMGEXP == "0"     ]   <-  1
data$CROPEXP[data$CROPDMGEXP == "k"     ]   <-  1000
data$CROPEXP[data$CROPDMGEXP == "2" ]   <-  100

data$CROPDMGVAL <- data$CROPDMG * data$CROPEXP

Selecting Crop Damage and Property Damage Cases.

propdmg <- aggregate(PROPDMGVAL ~ EVTYPE, data = data, FUN = sum)
cropdmg <- aggregate(CROPDMGVAL ~ EVTYPE, data = data, FUN = sum)

propdmg10<-propdmg[order(-propdmg$PROPDMGVAL), ][1:10,]
cropdmg10<-cropdmg[order(-cropdmg$CROPDMGVAL), ][1:10,]

Plot for displaying the obtained result

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), las=3,cex = 0.8)
barplot(fatal10$FATALITIES, names.arg=fatal10$EVTYPE, ylim= c(0,7000),col=heat.colors(10),ylab="Number of Fatalities", main=" Top 10 Events with Highest Fatalities")
barplot(injury10$INJURIES, names.arg=injury10$EVTYPE,ylim= c(0,10000), col=terrain.colors(10), ylab="Number of Injuries", main=" Top 10 Events with Highest Injuries")

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), las=3,cex = 0.8, cex.main = 0.9)

barplot((propdmg10$PROPDMGVAL)/(1*10^9), names.arg=propdmg10$EVTYPE, col=heat.colors(10, alpha = 1), ylab=" Cost of Property Damage($ billions)", main="Top 10 Events Causing Highest Property Damage")
barplot((cropdmg10$CROPDMGVAL)/(1*10^9), names.arg=cropdmg10$EVTYPE, col=terrain.colors(10, alpha = 1),  ylab=" Cost of Crop Damage($ billions)", main="Top 10 Events Causing Highest Crop Damage")

As per result flood caused most property damage and droughts have caused harm to crops.