US Storm Economic and Population Information

Synopsis

Data was imported from file repdata_data_StormData.csv. This data was transformed to identify the most harmful types of storms across the US on human population and economy.The process for this data analysis is detailed below.

Data Processing

Read the file into R, create a raw data table.

initialStormData <- read.csv("repdata_data_StormData.csv")
library(ggplot2)
library(magrittr)
library(stringr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Part I: Identify the most harmful events by injuries and fatalities

#Create a new table focused on fatalities and injuries
fiData <- data.frame(initialStormData$EVTYPE, initialStormData$FATALITIES, initialStormData$INJURIES)
#Add a new column for sum of injuries/fatalities
fiData$Total <- (initialStormData$FATALITIES + initialStormData$INJURIES)

#Group together events by type, and sum their total casualties
fiEVData <- fiData %>% group_by(initialStormData.EVTYPE) %>%
    summarise(Total=sum(Total, na.rm=T))
#Rearrange the rows such that highest casualties are at the top
fiEVData <- arrange(fiEVData, desc(Total))
#Create a new table of the ten most harmful events
fiTop <- fiEVData[1:10,]
summary(fiTop)
##  initialStormData.EVTYPE     Total      
##  Length:10               Min.   : 1527  
##  Class :character        1st Qu.: 2237  
##  Mode  :character        Median : 4542  
##                          Mean   :13718  
##                          3rd Qu.: 7410  
##                          Max.   :96979
#Create a bar chart detailing each of the top ten events
fiplot <- ggplot(fiTop, aes(initialStormData.EVTYPE, Total)) + geom_bar(stat = "identity") +
    theme_minimal()+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    xlab("")+ ylab("Population") + ggtitle("Fatalities and Injuries by Storm Type")

Part II: Identify the greatest economic damage

#Create a new data table of the economic data columns
ecoData <- select(initialStormData, c(8, 25:28))
#Replace letter values with numeric values
ecoData$PROPDMGEXP <- str_replace(ecoData$PROPDMGEXP, "K", "1000")
ecoData$PROPDMGEXP <- str_replace(ecoData$PROPDMGEXP, "M", "1000000")
ecoData$PROPDMGEXP <- str_replace(ecoData$PROPDMGEXP, "B", "1000000000")
#List actual value of economic damage
ecoData$PROPDMG <- ecoData$PROPDMG * as.numeric(ecoData$PROPDMGEXP)
## Warning: NAs introduced by coercion
#Replace letter values with numeric values
ecoData$CROPDMGEXP <- str_replace(ecoData$CROPDMGEXP, "K", "1000")
ecoData$CROPDMGEXP <- str_replace(ecoData$CROPDMGEXP, "M", "1000000")
ecoData$CROPDMGEXP <- str_replace(ecoData$CROPDMGEXP, "B", "1000000000")
#List actual value of economic damage
ecoData$CROPDMG <- ecoData$CROPDMG * as.numeric(ecoData$CROPDMGEXP)
## Warning: NAs introduced by coercion
#Add a new column for sum of ecodmg, replaces NA with 0
ecoData$ecoDMG <- coalesce(ecoData$PROPDMG, 0 +ecoData$CROPDMG, 0)
#Group by event type, arrange in order of descending damage, and get top 10
ecoEVData<- ecoData %>%  group_by(EVTYPE) %>%
    summarise(ecoDMG=sum(ecoDMG,na.rm=T))
ecoEVData<- arrange(ecoEVData, desc(ecoDMG))
ecoTop<- ecoEVData[1:10,]

Results

ggplot(fiTop, aes(initialStormData.EVTYPE, Total)) + geom_bar(stat = "identity") +
    theme_classic()+
    theme(axis.text.x = element_text(angle = 30, hjust = 1))+
    xlab("Event Type")+ ylab("Population") + ggtitle("Fatalities and Injuries by Storm Type")

Figure 1: The top 10 casualty inducing event types.

ggplot(ecoTop, aes(EVTYPE, ecoDMG)) + geom_bar(stat = "identity") +
    theme_classic()+
    theme(axis.text.x = element_text(angle = 30, hjust = 1))+
    xlab("Event Type")+ ylab("Cost (USD)") + ggtitle("Economic Damage by Storm Type")

Figure 2: The top 10 economic damaging event types.