Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

The data for this assignment can be downloaded from the course web site: Storm Data[47MB]

The 37 variables included in this dataset are: “STATE_", "BGN_DATE", "BGN_TIME", "TIME_ZONE", "COUNTY", "COUNTYNAME", "STATE", "EVTYPE", "BGN_RANGE", "BGN_AZI", "BGN_LOCATI", "END_DATE", "END_TIME", "COUNTY_END", "COUNTYENDN", "END_RANGE", "END_AZI", "END_LOCATI", "LENGTH", "WIDTH", "F", "MAG", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP", "WFO", "STATEOFFIC", "ZONENAMES", "LATITUDE", "LONGITUDE", "LATITUDE_E", "LONGITUDE”, “REMARKS”, “REFNUM”.

The dataset is stored in a comma-separated-value (CSV) file and there are a total of 902,297 observations in this dataset.

setwd("~/Desktop/Exercises/Course5/RepData_Project2")
library(tidyverse)
library(lubridate)
library(ggplot2)
library(gridExtra)
# Read data
if (! file.exists('stormData.csv')){
  download.file(url = 'https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2', 
                destfile = 'stormData.csv.bz2',method = 'curl',mode = 'w')
}

Data<-read.csv(bzfile('stormData.csv.bz2'), header = TRUE, stringsAsFactors = FALSE)

# Clean up event type preliminarily for letter cases
Data$EVTYPE<-tolower(Data$EVTYPE)

Results

#1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
# Use "FATALITIES" and "INJURIES" to measure the degree of harm to population health
Data_new1<-Data%>%group_by(EVTYPE)%>%
    summarize(
        FATALITIES=sum(FATALITIES, na.rm = TRUE),
        INJURIES=sum(INJURIES, na.rm=TRUE)
    )%>%
    arrange(desc(FATALITIES), desc(INJURIES))
head(Data_new1)
## # A tibble: 6 x 3
##   EVTYPE         FATALITIES INJURIES
##   <chr>               <dbl>    <dbl>
## 1 tornado              5633    91346
## 2 excessive heat       1903     6525
## 3 flash flood           978     1777
## 4 heat                  937     2100
## 5 lightning             816     5230
## 6 tstm wind             504     6957
# Plots
plot_FAT<-Data_new1%>%
    select(EVTYPE, FATALITIES)%>%
    top_n(10)
## Selecting by FATALITIES
plot_INJ<-Data_new1%>%
    select(EVTYPE, INJURIES)%>%
    arrange(desc(INJURIES))%>%
    top_n(10)
## Selecting by INJURIES
p1<-ggplot(plot_FAT, aes(x= fct_inorder(EVTYPE), y=FATALITIES))+
    geom_bar(stat = "identity")+
    labs(x="Event Type", y="Number of Fatalities")+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    ggtitle("Top 10 Fatalities")

p2<-ggplot(plot_INJ, aes(x= fct_inorder(EVTYPE), y=INJURIES))+
    geom_bar(stat = "identity")+
    labs(x="Event Type", y="Number of Injuries")+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    ggtitle("Top 10 Injuries")

grid.arrange(p1, p2, nrow = 1)

As can be seen from the results, tornado is the most harmful event which causes 9.134610^{4} injuries and 5633 fatalities. Excessive heat, flash flood and heat are the following harmful events considering fatalities and injuries.

#2. Across the United States, which types of events have the greatest economic consequences?
# Use "PROPDMG" and "CROPDMG" to measure the degree of harm of economic consequences
Data_new2<-Data%>%group_by(EVTYPE)%>%
    summarize(
        PROPDMG=sum(PROPDMG, na.rm = TRUE),
        CROPDMG=sum(CROPDMG, na.rm=TRUE)
    )%>%
    mutate(DMG=PROPDMG+CROPDMG)%>%
    select(EVTYPE, DMG, PROPDMG, CROPDMG)%>%
    arrange(desc(DMG))
head(Data_new2)
## # A tibble: 6 x 4
##   EVTYPE                 DMG  PROPDMG CROPDMG
##   <chr>                <dbl>    <dbl>   <dbl>
## 1 tornado           3312277. 3212258. 100019.
## 2 flash flood       1599325. 1420125. 179200.
## 3 tstm wind         1445198. 1335996. 109203.
## 4 hail              1268290.  688693. 579596.
## 5 flood             1067976.  899938. 168038.
## 6 thunderstorm wind  943636.  876844.  66791.
# Plots
plot_data<-Data_new2%>%
    select(EVTYPE, DMG)%>%
    top_n(10)
## Selecting by DMG
ggplot(plot_data, aes(x= fct_inorder(EVTYPE), y=DMG))+
    geom_bar(stat = "identity")+
    labs(x="Event Type", y="Amount of Total Damage")+
    theme(axis.text.x = element_text(angle = 45, hjust = 1))+
    ggtitle("Top 10 Total Damage (Property Damage + Crop Damage)")

As can be seen from the results, tornado brings about the most harmful economic consequences which causes 3.312276710^{6} total damage. Falsh flood, thunderstorm wind and hail are the following harmful events causing serious economic consequences.