Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
1.First loading and processed data into machine readable formats 2.Calculated totals and averages for each of three categories per year: fatalities, injuries, and economic damage. 3.Calculated the top 5 most most harmful weather events to the population and the top 5 more economically harmful weather events as averaged per year. 4.Plotted the data
x<-tempfile()
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", x)
rawData<-read.csv(file=x, sep=",", stringsAsFactors = FALSE)
file.remove(x)
The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.
rawData$BGN_DATE<-as.Date(rawData$BGN_DATE, format = "%m/%d/%Y")
rawData$BGN_TIME<-as.character(rawData$BGN_TIME)
library(dplyr)
rawData<-mutate(rawData, EVTYPE=factor(EVTYPE))
origProp<-subset(rawData$PROPDMG, rawData$PROPDMGEXP=="K")
rawData$PROPDMG[rawData$PROPDMGEXP=="K"]<-origProp*1000
#print("Done with 1")
origProp<-subset(rawData$PROPDMG, rawData$PROPDMGEXP=="M")
rawData$PROPDMG[rawData$PROPDMGEXP=="M"]<-origProp*1000000
#print("Done with 2")
origProp<-subset(rawData$PROPDMG, rawData$PROPDMGEXP=="B")
rawData$PROPDMG[rawData$PROPDMGEXP=="B"]<-origProp*1000000000
#print("Done with 3")
origCrop<-subset(rawData$CROPDMG, rawData$CROPDMGEXP=="K")
rawData$CROPDMG[rawData$CROPDMGEXP=="K"]<-origCrop*1000
#print("Done with 4")
origCrop<-subset(rawData$CROPDMG, rawData$CROPDMGEXP=="M")
rawData$CROPDMG[rawData$CROPDMGEXP=="M"]<-origCrop*1000000
#print("Done with 5")
origCrop<-subset(rawData$CROPDMG, rawData$CROPDMGEXP=="B")
rawData$CROPDMG[rawData$CROPDMGEXP=="B"]<-origCrop*1000000000
#print("Done with 6")
rawData$totalEcoDmg<-rawData$PROPDMG+rawData$CROPDMG
#Making a total for overall economic consequence
##Average fatalities,injuries, and eco dmg per recorded event type per year
#Sorting data into years (as factors)
rawData<-mutate(rawData, YEAR = factor((year(rawData$BGN_DATE))))
refinedData<-group_by(rawData, YEAR)
fatalMeans<-data.frame()
injuryMeans<-data.frame()
ecodmgMeans<-data.frame()
for (i in levels(rawData$YEAR)){
blanky<-rawData[rawData$YEAR==i,]
fatalMeans<-rbind(fatalMeans, tapply(blanky$FATALITIES, blanky$EVTYPE, sum))
injuryMeans<-rbind(injuryMeans, tapply(blanky$INJURIES, blanky$EVTYPE, sum))
ecodmgMeans<-rbind(ecodmgMeans, tapply(blanky$totalEcoDmg, blanky$EVTYPE, sum))
}
#weeding out uninteresting values
fatalMeans1<-rbind(fatalMeans[0,],colMeans(fatalMeans, na.rm = TRUE))
injuryMeans1<-rbind(injuryMeans[0,], colMeans(injuryMeans, na.rm = TRUE))
ecodmgMeans1<-rbind(ecodmgMeans[0,], colMeans(ecodmgMeans, na.rm = TRUE))
colnames(fatalMeans1)<-levels(rawData$EVTYPE)
colnames(injuryMeans1)<-levels(rawData$EVTYPE)
colnames(ecodmgMeans1)<-levels(rawData$EVTYPE)
fatalMeans2<-fatalMeans1[,fatalMeans1>0]
injuryMeans2<-injuryMeans1[,injuryMeans1>0]
ecodmgMeans2<-ecodmgMeans1[,ecodmgMeans1>0]
##Getting top 5 max values for each of the three categories
top5fatal<-sort(fatalMeans2[1,], decreasing = TRUE) #diffuculty sorting in decreasing order; using inefficient method
top5injury<-sort(injuryMeans2[1,], decreasing = TRUE)
top5ecodmg<-sort(ecodmgMeans2[1,], decreasing = TRUE)
top5fatal[1:5]
## EXCESSIVE HEAT TORNADO HEAT HEAT WAVE FLASH FLOOD
## 1 105.7222 90.85484 72.07692 57.33333 51.47368
top5injury[1:5]
## TORNADO EXCESSIVE HEAT FLOOD HURRICANE/TYPHOON THUNDERSTORM WINDS
## 1 1473.323 362.5 357.3158 318.75 302.6667
top5ecodmg[1:5]
## HURRICANE/TYPHOON FLOOD STORM SURGE HURRICANE OPAL
## 1 17978428200 7911562014 3610295083 3161846030
## HEAVY RAIN/SEVERE WEATHER
## 1 2.5e+09
library(ggplot2)
fM2<-as.data.frame(t(fatalMeans2))
qplot(rownames(fM2), fM2$`1`, data=fM2, xlab = "Weather Events", ylab="Average fatalities per year", main = "Average fatalities per year by weather event")
iM2<-as.data.frame(t(injuryMeans2))
qplot(rownames(iM2), iM2$`1`, data=iM2, xlab = "Weather Events", ylab="Average injuries per year", main = "Average injuries per year by weather event")
eM2<-as.data.frame(t(ecodmgMeans2))
qplot(rownames(eM2), eM2$`1`, data=eM2, xlab = "Weather Events", ylab="Average economic damage per year", main = "Average economic per year by weather event")