Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This document involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing (Downloading, Loading and Cleaning Data)

## installs and loads packages(if not yet installed)
if(!require(png)){
  install.packages("png")
  library(png)
}
## Loading required package: png
if(!require(plyr)){
  install.packages("plyr")
  library(plyr)
}
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.6.3
if(!require(dplyr)){
  install.packages("dplyr")
  library(dplyr)
}
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!require(knitr)){
  install.packages("knitr")
  library(knitr)
}
## Loading required package: knitr
## Warning: package 'knitr' was built under R version 3.6.3
library(png)
library(plyr)
library(dplyr)
library(knitr) 

Reads and cleans data

stormx <- read.csv("./repdata_data_StormData.csv")
  
Statelist<- list("AK","AL","AR","AZ","CA","CO","CT","DC","DE","FL","GA","GU","HI","IA","ID", "IL","IN","KS","KY","LA","MA","MD","ME","MH","MI","MN","MO","MS","MT","NC","ND","NE","NH","NJ","NM","NV","NY", "OH","OK","OR","PA","PR","PW","RI","SC","SD","TN","TX","UT","VA","VI","VT","WA","WI","WV","WY")

#Removes unrelated rows by only retaining the rows that have state in the STATE column.


storm <- stormx[ stormx$STATE %in% Statelist,]

#Removes columns not needed for this analysis

storm <- select(storm, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)

This next section substitutes values on the PROPDMGEXP and CROPDMG EXP to make them numerical,

The source of information is from https://rpubs.com/flyingdisc/PROPDMGEXP

#creates a backup csv file and reads the file with strings as characters
write.csv(storm,'storm.csv')
storm<-read.csv("storm.csv",row.names=NULL, stringsAsFactors = FALSE)


##creates replacement dataframe for PROPDMGEXP and CROPDMGEXP
PROPDMGEXP = c("H","h","K","k","M","m","B","b","+","-","?","1","2","3","4","5","6","7","8","0","") 
PROPREPVAL = c("100","100","1000","1000","1000000","1000000","1000000000","1000000000","1","0","0","10","10","10","10","10","10","10","10","10","0")

CROPDMGEXP = c("H","h","K","k","M","m","B","b","+","-","?","1","2","3","4","5","6","7","8","0","") 
CROPREPVAL = c("100","100","1000","1000","1000000","1000000","1000000000","1000000000","1","0","0","10","10","10","10","10","10","10","10","10","0")


#merges the new replacement values with the original dataframe
propexpdf = data.frame(PROPDMGEXP, PROPREPVAL)
storm2 <- merge(x=storm, y=propexpdf, by = "PROPDMGEXP", all.x=TRUE)

cropexpdf = data.frame(CROPDMGEXP, CROPREPVAL)
storm3 <- merge(x=storm2, y=cropexpdf, by = "CROPDMGEXP", all.x=TRUE)

#creates another backup prior to plotting
write.csv(storm3,'storm3.csv')
storm3<-read.csv("storm3.csv",row.names=NULL, stringsAsFactors = TRUE)

Results

Aggregate and summarizes the top 5 events with most fatalities

  deaths <- aggregate(FATALITIES~EVTYPE, storm3, sum)
  deaths <- deaths[with(deaths, order(-FATALITIES)), ]
  deaths <- deaths[1:5,]
  head(deaths)
##             EVTYPE FATALITIES
## 826        TORNADO       5633
## 129 EXCESSIVE HEAT       1903
## 152    FLASH FLOOD        974
## 274           HEAT        937
## 461      LIGHTNING        815
  png("plot1.png", width=800, height=600)
  plot1 <- barplot(deaths$FATALITIES, names = deaths$EVTYPE, xlab = "Events", ylab = "Fatalities", main = "Deaths by Event Type")
  dev.off() 
## png 
##   2
  img1 <- readPNG("./plot1.png")
  grid::grid.raster(img1)

Aggregate and summarizes the top 5 events with most injuries

  injur <- aggregate(INJURIES~EVTYPE, storm3, sum)
  injur <- injur[with(injur, order(-INJURIES)), ]
  injur <- injur[1:5,]
  head(injur)
##             EVTYPE INJURIES
## 826        TORNADO    91346
## 848      TSTM WIND     6957
## 169          FLOOD     6789
## 129 EXCESSIVE HEAT     6525
## 461      LIGHTNING     5229
  png("plot2.png", width=800, height=600)
  plot2 <- barplot(injur$INJURIES, names = injur$EVTYPE, xlab = "Events", ylab = "Injuries", main = "Injuries by Event Type")
  dev.off() 
## png 
##   2
  img2 <- readPNG("./plot2.png")
  grid::grid.raster(img2)

# Set factors as numeric  
storm3[,"CROPDMG"] <- as.numeric(as.character(storm3[,"CROPDMG"]))
storm3[,"CROPREPVAL"] <- as.numeric(as.character(storm3[,"CROPREPVAL"]))
storm3[,"PROPDMG"] <- as.numeric(as.character(storm3[,"PROPDMG"]))
storm3[,"PROPREPVAL"] <- as.numeric(as.character(storm3[,"PROPREPVAL"]))

Mutates the dataframe by using the exponents for crop and property damage, then adds the two to determine economic cost.

storm4 <- mutate(storm3, CROPTOTAL = CROPREPVAL * CROPDMG)
storm5 <- mutate(storm4, PROPTOTAL = PROPREPVAL * PROPDMG)
storm6 <- mutate(storm5, COSTTOTAL = CROPTOTAL + PROPTOTAL)

cost <- aggregate(COSTTOTAL~EVTYPE, storm6, sum)
cost <- cost[with(cost, order(-COSTTOTAL)), ]
cost <- cost[1:5,]
head(cost)
##                EVTYPE    COSTTOTAL
## 169             FLOOD 150319131250
## 408 HURRICANE/TYPHOON  71853560800
## 826           TORNADO  57352117607
## 662       STORM SURGE  43323541000
## 243              HAIL  18758224527
png("plot3.png", width=800, height=600)
plot3 <- barplot(cost$COSTTOTAL, names = cost$EVTYPE, xlab = "Type of Event", ylab = "Total Economic Damage", main = "Total Damage Costs by Event Type")
dev.off() 
## png 
##   2
img3 <- readPNG("./plot3.png")
grid::grid.raster(img3)