Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project using the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. To unswer the follwing questions:

  1. Across the United States, which types of sever weather events are most harmful with respect to population health?
    Top 10 fatal events: tornado, excessive heat, flash flood, heat, lightning, tstm wind, flood, rip current, high wind, and avalanche
    Top 10 injury events: tornado, tstm wind, flood, excessive heat, lightning, heat, ice storm, flash flood, thunderstorm wind, and hail
  2. Across the United States, which types of sever weather events have the greatest economic consequences?
    Top 10 property damage events: flash flood, thunderstorm winds, tornado, hail, lightning, flood, hurricane typhoon, flooding, storm surge and heavy snow
    Top 10 crop damage events: drought, flood, river flood, ice storm, hail, hurricane, hurricane typhoon, flash flood, extreme cold, and frost freeze

Data resource

The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

The NOAA webset contains more detailed information about this database.

Data Processing

Download dataset and store data in local folder

setwd("C:/Users/may/Desktop/DataScience/Reproducible_Research/Final_project")
data_raw<-read.csv(bzfile("StormData.csv.bz2"),na.strings = "NA")

Data cleaning

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data<-select(data_raw,REFNUM, BGN_DATE, STATE, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP,CROPDMG,CROPDMGEXP)
#clean the EVTYPE(Events type) variable: made all the char to lower case and replace all punct. to space
EVTYPE2<-tolower(data$EVTYPE)
EVTYPE2<-gsub("[[:blank:][:punct:]+]", " ", EVTYPE2)
data$EVTYPE<-as.factor(EVTYPE2)
#clean the PROPDMGEXP and CROPDMGEXP variable
change <- function(i) {
        if (i %in% c('h', 'H'))
                return(2)
        else if (i %in% c('k', 'K'))
                return(3)
        else if (i %in% c('m', 'M'))
                return(6)
        else if (i %in% c('b', 'B'))
                return(9)
        else if (!is.na(as.numeric(i))) # if a digit
                return(as.numeric(i))
        else if (i %in% c('', '-', '?', '+'))
                return(0)
        else {
                stop("Invalid")
        }
}
pr_exp <- sapply(data$PROPDMGEXP, FUN=change)
cr_exp<-sapply(data$CROPDMGEXP, FUN=change)
#Calculate the property damage and crop damage based on PROPDMG/PROPDMGEXP and CROPDMG/CROPDMGEXP
data$PROPDMG2 <- data$PROPDMG * (10 ** pr_exp)
data$CROPDMG2 <- data$CROPDMG * (10 ** cr_exp)

Results

  1. Across the United States, which types of sever weather events are most harmful with respect to population health? In this part the variable FATALITIES and INJURIES are used to evaluate the events
harm<-  data%>%
        select(EVTYPE,FATALITIES,INJURIES)%>%
        group_by(EVTYPE)%>%
        summarise(fatal=sum(FATALITIES),injur=sum(INJURIES))%>%
        mutate(fatInj=fatal+injur)
fatal<- head(harm[order(harm$fatal, decreasing = T), ], 10)
injury <- head(harm[order(harm$injur, decreasing = T), ], 10)
fetal_injury<-head(harm[order(harm$fatInj, decreasing = T), ], 10)

Table1. Top 10 fatal events

fatal[,c("EVTYPE","fatal")]
## # A tibble: 10 x 2
##    EVTYPE         fatal
##    <fct>          <dbl>
##  1 tornado         5633
##  2 excessive heat  1903
##  3 flash flood      978
##  4 heat             937
##  5 lightning        816
##  6 tstm wind        504
##  7 flood            470
##  8 rip current      368
##  9 high wind        248
## 10 avalanche        224

Table2. Top 10 injury evnets

injury[,c("EVTYPE","injur")]
## # A tibble: 10 x 2
##    EVTYPE            injur
##    <fct>             <dbl>
##  1 tornado           91346
##  2 tstm wind          6957
##  3 flood              6789
##  4 excessive heat     6525
##  5 lightning          5230
##  6 heat               2100
##  7 ice storm          1975
##  8 flash flood        1777
##  9 thunderstorm wind  1488
## 10 hail               1361

Table3. Top 10 fatal and injury evnets

fetal_injury[,c("EVTYPE","fatInj")]
## # A tibble: 10 x 2
##    EVTYPE            fatInj
##    <fct>              <dbl>
##  1 tornado            96979
##  2 excessive heat      8428
##  3 tstm wind           7461
##  4 flood               7259
##  5 lightning           6046
##  6 heat                3037
##  7 flash flood         2755
##  8 ice storm           2064
##  9 thunderstorm wind   1621
## 10 winter storm        1527

Plot for the top events

library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
q1<-ggplot(fatal,aes(x=reorder(EVTYPE,-fatal),y=fatal))+
        geom_bar(stat = "identity")+
        xlab("Event type")+
        ylab("Total number of fatalities")+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        ggtitle("Top 10 fatal events")
q2<-ggplot(injury,aes(x=reorder(EVTYPE,-injur),y=injur))+
        geom_bar(stat = "identity")+
        xlab("Event type")+
        ylab("Total number of injuries")+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        ggtitle("Top 10 injury events")   
q3<-ggplot(fetal_injury,aes(x=reorder(EVTYPE,-fatInj),y=fatInj))+
        geom_bar(stat = "identity")+
        xlab("Event type")+
        ylab("Total number of fatalities and injuries")+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        ggtitle("Top 10 fatal&injury events")
grid.arrange(q1,q2,q3,nrow=1)

2.Across the United States, which types of sever weather events have the greatest economic consequences? In this part the variable PROPDMG2(property damage) and CROPDMG2(crop damage) are used to evaluate the events

eco<-  data %>%
        select(EVTYPE,PROPDMG2,CROPDMG2)%>%
        group_by(EVTYPE)%>%
        summarise(pro_sum=sum(PROPDMG2),crop_sum=sum(CROPDMG2))
eco<-eco[(eco$pro_sum>0|eco$crop_sum>0),]
Property<-head(eco[order(eco$pro_sum,decreasing=T),],10)
Crop<-head(eco[order(eco$crop_sum,decreasing=T),],10)

Table4. Top 10 property damage events

Property[,c("EVTYPE","pro_sum")]
## # A tibble: 10 x 2
##    EVTYPE             pro_sum
##    <fct>                <dbl>
##  1 flash flood        6.82e13
##  2 thunderstorm winds 2.09e13
##  3 tornado            1.08e12
##  4 hail               3.16e11
##  5 lightning          1.73e11
##  6 flood              1.45e11
##  7 hurricane typhoon  6.93e10
##  8 flooding           5.92e10
##  9 storm surge        4.33e10
## 10 heavy snow         1.79e10

Table5. Top 10 crop damage events

Crop[,c("EVTYPE","crop_sum")]
## # A tibble: 10 x 2
##    EVTYPE               crop_sum
##    <fct>                   <dbl>
##  1 drought           13972566000
##  2 flood              5661968450
##  3 river flood        5029459000
##  4 ice storm          5022113500
##  5 hail               3025974480
##  6 hurricane          2741910000
##  7 hurricane typhoon  2607872800
##  8 flash flood        1421317100
##  9 extreme cold       1312973000
## 10 frost freeze       1094186000

Plot for the top events

g1<-ggplot(Property,aes(x=reorder(EVTYPE,-pro_sum),y=pro_sum))+
        geom_bar(stat="identity")+
        xlab("Event type")+
        ylab("Property damage in US dollar")+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        ggtitle("Top 10 property damage events")
g2<-ggplot(Crop,aes(x=reorder(EVTYPE,-crop_sum),y=crop_sum))+
        geom_bar(stat="identity")+
        xlab("Event type")+
        ylab("Crop damage in US dollar")+
        theme(axis.text.x = element_text(angle = 45, hjust = 1))+
        ggtitle("Top 10 Crop damage events")
grid.arrange(g1,g2,nrow=1)