Synopsis

Based on analysis of data available in the NDAA database, tornadoes are the weather phenomenon that, by far, are responsible for causing economic damage in the United States. Equally they are also responsible for the largest number of accidents and human fatalities.

Introduction

The primary objective of this study is to examine the NOAA Storm Database and provide basic responses to inquiries concerning severe weather incidents.

Materials and Methods

Loading R packages needed for the study.

library(tidyverse)
library(lubridate)
library(gridExtra)

Data Processing

Downloading the data in R

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

download.file(fileUrl, "StormData.csv.bz2", method="curl")


meteo_data<- read.csv("StormData.csv.bz2", header = TRUE, sep = ",")

Having a first look to the database.

str(meteo_data)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Parsing the date with lubridate package

BGN_DATE<- mdy_hms(meteo_data$BGN_DATE)
meteo_data$BGN_DATE<- BGN_DATE
meteo_data$EVTYPE<- as.character(meteo_data$EVTYPE)

Results

Using aggregate to sum the fatalities and injuries due to each weather fenomena

fatalities<- aggregate(FATALITIES~EVTYPE, meteo_data, sum)
arrange(fatalities, desc(FATALITIES))

injuries<- aggregate(INJURIES~EVTYPE, meteo_data, sum)
arrange(injuries, desc(INJURIES))

Plotting the results. Tornado are in the first position.

fatalities %>% 
    arrange(desc(FATALITIES)) %>%
    top_n(n=10, FATALITIES)  %>%
     ggplot(aes(x=reorder(EVTYPE,-FATALITIES), y=FATALITIES))+

        geom_bar(stat="identity")+
        scale_x_discrete(guide = guide_axis(angle = 90))+
        labs(x=NULL, title="Top 10 Weather Events for fatalities") ->g1
injuries %>% 
    arrange(desc(INJURIES)) %>%
    top_n(n=10, INJURIES)  %>%
    ggplot(aes(x=reorder(EVTYPE,-INJURIES), y=INJURIES))+
        geom_bar(stat="identity")+
        scale_x_discrete(guide = guide_axis(angle = 90))+
        labs(x=NULL, title="Top 10 Weather Events for injuries")->g2
grid.arrange(g1, g2, ncol=2)

Calculating the economic burden.

crop_damage<- aggregate(CROPDMG~EVTYPE, meteo_data, sum)
arrange(crop_damage, desc(CROPDMG))

prop_damage<- aggregate(PROPDMG~EVTYPE, meteo_data, sum)
arrange(prop_damage, desc(PROPDMG))
prop_damage %>% 
    arrange(desc(PROPDMG)) %>%
    top_n(n=10, PROPDMG)  %>%
    ggplot(aes(x=reorder(EVTYPE,-PROPDMG), y=PROPDMG))+
        geom_bar(stat="identity")+
        scale_x_discrete(guide = guide_axis(angle = 90))+
        labs(x=NULL, title="Top 10 Events for property damage")->g3
crop_damage %>% 
    arrange(desc(CROPDMG)) %>%
    top_n(n=10, CROPDMG)  %>%
    ggplot(aes(x=reorder(EVTYPE,-CROPDMG), y=CROPDMG))+
        geom_bar(stat="identity")+
        scale_x_discrete(guide = guide_axis(angle = 90))+
        labs(x=NULL, title="Top 10 Events for crop damage")->g4
grid.arrange(g3, g4, ncol=2)

meteo_data<- mutate(meteo_data, TOTDMG=PROPDMG+CROPDMG)

Calculating the total economic consequences of the most severe weather conditions.

tot_damage<- aggregate(TOTDMG~EVTYPE, meteo_data, sum)
arrange(tot_damage, desc(TOTDMG))
tot_damage %>% 
    arrange(desc(TOTDMG)) %>%
    top_n(n=10, TOTDMG)  %>%
    ggplot(aes(x=reorder(EVTYPE,-TOTDMG), y=TOTDMG))+
        geom_bar(stat="identity")+
        scale_x_discrete(guide = guide_axis(angle = 90))+
        labs(x=NULL, title="Top 10 Events for economic damage")

Conclusion

Tornadoes are the weather phenomenon that, by far, are responsible for causing economic damage in the United States. Equally they are responsible for the largest number of accidents and human fatalities.