Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Synopsis:

1.First loading and processed data into machine readable formats 2.Calculated totals and averages for each of three categories per year: fatalities, injuries, and economic damage. 3.Calculated the top 5 most most harmful weather events to the population and the top 5 more economically harmful weather events as averaged per year. 4.Plotted the data

Data Processing:

Reading in data

x<-tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", x)
rawData<-read.csv(file=x, sep=",", stringsAsFactors = FALSE)
file.remove(x)

The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.

Processing data

Correcting and converting dates and time of day into workable format

rawData$BGN_DATE<-as.Date(rawData$BGN_DATE, format = "%m/%d/%Y")
rawData$BGN_TIME<-as.character(rawData$BGN_TIME)

library(dplyr)

rawData<-mutate(rawData, EVTYPE=factor(EVTYPE))

Converting property and crop damage amounts into standard amounts

origProp<-subset(rawData$PROPDMG, rawData$PROPDMGEXP=="K")
rawData$PROPDMG[rawData$PROPDMGEXP=="K"]<-origProp*1000
#print("Done with 1")
origProp<-subset(rawData$PROPDMG, rawData$PROPDMGEXP=="M")
rawData$PROPDMG[rawData$PROPDMGEXP=="M"]<-origProp*1000000
#print("Done with 2")
origProp<-subset(rawData$PROPDMG, rawData$PROPDMGEXP=="B")
rawData$PROPDMG[rawData$PROPDMGEXP=="B"]<-origProp*1000000000
#print("Done with 3")
origCrop<-subset(rawData$CROPDMG, rawData$CROPDMGEXP=="K")
rawData$CROPDMG[rawData$CROPDMGEXP=="K"]<-origCrop*1000
#print("Done with 4")
origCrop<-subset(rawData$CROPDMG, rawData$CROPDMGEXP=="M")
rawData$CROPDMG[rawData$CROPDMGEXP=="M"]<-origCrop*1000000
#print("Done with 5")
origCrop<-subset(rawData$CROPDMG, rawData$CROPDMGEXP=="B")
rawData$CROPDMG[rawData$CROPDMGEXP=="B"]<-origCrop*1000000000
#print("Done with 6")

rawData$totalEcoDmg<-rawData$PROPDMG+rawData$CROPDMG

#Making a total for overall economic consequence

Results

##Average fatalities,injuries, and eco dmg per recorded event type per year


#Sorting data into years (as factors)
rawData<-mutate(rawData, YEAR = factor((year(rawData$BGN_DATE))))
refinedData<-group_by(rawData, YEAR)

fatalMeans<-data.frame()
injuryMeans<-data.frame()
ecodmgMeans<-data.frame()

for (i in levels(rawData$YEAR)){
  blanky<-rawData[rawData$YEAR==i,]
  
  fatalMeans<-rbind(fatalMeans, tapply(blanky$FATALITIES, blanky$EVTYPE, sum))
  injuryMeans<-rbind(injuryMeans, tapply(blanky$INJURIES, blanky$EVTYPE, sum))
  ecodmgMeans<-rbind(ecodmgMeans, tapply(blanky$totalEcoDmg, blanky$EVTYPE, sum))
}


#weeding out uninteresting values

fatalMeans1<-rbind(fatalMeans[0,],colMeans(fatalMeans, na.rm = TRUE))
injuryMeans1<-rbind(injuryMeans[0,], colMeans(injuryMeans, na.rm = TRUE))
ecodmgMeans1<-rbind(ecodmgMeans[0,], colMeans(ecodmgMeans, na.rm = TRUE))


colnames(fatalMeans1)<-levels(rawData$EVTYPE)
colnames(injuryMeans1)<-levels(rawData$EVTYPE)
colnames(ecodmgMeans1)<-levels(rawData$EVTYPE)

fatalMeans2<-fatalMeans1[,fatalMeans1>0]
injuryMeans2<-injuryMeans1[,injuryMeans1>0]
ecodmgMeans2<-ecodmgMeans1[,ecodmgMeans1>0]

##Getting top 5 max values for each of the three categories
top5fatal<-sort(fatalMeans2[1,], decreasing = TRUE) #diffuculty sorting in decreasing order; using inefficient method
top5injury<-sort(injuryMeans2[1,], decreasing = TRUE)
top5ecodmg<-sort(ecodmgMeans2[1,], decreasing = TRUE)

The top 5 weather events for Fatalities(Average deaths per year), Injuries (Average injuries per year), and Economic Damage (average cost in dollars per year) along with their impact.

top5fatal[1:5]
##   EXCESSIVE HEAT  TORNADO     HEAT HEAT WAVE FLASH FLOOD
## 1       105.7222 90.85484 72.07692  57.33333    51.47368
top5injury[1:5]
##    TORNADO EXCESSIVE HEAT    FLOOD HURRICANE/TYPHOON THUNDERSTORM WINDS
## 1 1473.323          362.5 357.3158            318.75           302.6667
top5ecodmg[1:5]
##   HURRICANE/TYPHOON      FLOOD STORM SURGE HURRICANE OPAL
## 1       17978428200 7911562014  3610295083     3161846030
##   HEAVY RAIN/SEVERE WEATHER
## 1                   2.5e+09

Graphs showing Public health costs and economic consequence by event type.

library(ggplot2)
fM2<-as.data.frame(t(fatalMeans2))
qplot(rownames(fM2), fM2$`1`, data=fM2, xlab = "Weather Events", ylab="Average fatalities per year", main = "Average fatalities per year by weather event")

iM2<-as.data.frame(t(injuryMeans2))
qplot(rownames(iM2), iM2$`1`, data=iM2, xlab = "Weather Events", ylab="Average injuries per year", main = "Average injuries per year by weather event")

eM2<-as.data.frame(t(ecodmgMeans2))
qplot(rownames(eM2), eM2$`1`, data=eM2, xlab = "Weather Events", ylab="Average economic damage per year", main = "Average economic per year by weather event")