Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern. Through exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database, the goal of this project is to identify the types of weather events that are most harmful with respect to population health, and have the greatest economic consequences across the United States.

Data Processing

url<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, "NOAAStromData.csv.bz2", "curl")
data<-read.csv("NOAAStromData.csv.bz2")

head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

Data Transformation

In order to group the events, special characters and words end with s/ mix are removed. Only the first two words are taken.

dataC<- select(data, EVTYPE, FATALITIES, INJURIES, PROPDMG) %>%
        mutate(EVTYPE = toupper(EVTYPE)) %>%
        filter(FATALITIES > 0 | INJURIES > 0) %>%
        mutate( EVTYPE = gsub('/', ' ', EVTYPE)) %>%
        mutate( EVTYPE = gsub('S$| MIX$|\\.', '', EVTYPE)) %>%
        mutate( EVTYPE = gsub('([A-z]+) ([A-z]+) .*', '\\1  \\2', EVTYPE)) %>%
        group_by(EVTYPE) 

dataC
## # A tibble: 21,929 x 4
## # Groups:   EVTYPE [174]
##    EVTYPE  FATALITIES INJURIES PROPDMG
##    <chr>        <dbl>    <dbl>   <dbl>
##  1 TORNADO          0       15    25  
##  2 TORNADO          0        2    25  
##  3 TORNADO          0        2     2.5
##  4 TORNADO          0        2     2.5
##  5 TORNADO          0        6     2.5
##  6 TORNADO          0        1     2.5
##  7 TORNADO          1       14    25  
##  8 TORNADO          0        3     2.5
##  9 TORNADO          0        3     2.5
## 10 TORNADO          1       26   250  
## # … with 21,919 more rows

The types of events are most harmful to population health.

datahealth<- dataC %>%
        summarise(total=sum(FATALITIES, INJURIES)) %>%
        arrange(desc(total))

maxH<-datahealth[1,1]
datahealth
## # A tibble: 174 x 2
##    EVTYPE            total
##    <chr>             <dbl>
##  1 TORNADO           96979
##  2 EXCESSIVE HEAT     8428
##  3 TSTM WIND          7461
##  4 FLOOD              7259
##  5 LIGHTNING          6047
##  6 HEAT               3037
##  7 FLASH FLOOD        2757
##  8 THUNDERSTORM WIND  2593
##  9 ICE STORM          2064
## 10 HIGH WIND          1722
## # … with 164 more rows

Histogram of the top 5 weather events which caused fatalities and injuries

datahealthTotal<- dataC %>%
        summarise(FATALITIES =sum(FATALITIES), INJURIES =sum(INJURIES))
datahealthTotal <- as.data.frame(datahealthTotal)
datahealthMelt <- melt(datahealthTotal, id=c("EVTYPE"))
names(datahealthMelt)<-c("EVTYPE", "category", "total")

# Stacked barplot with multiple groups
ggplot(data=datahealthMelt, aes(x = reorder(EVTYPE,-total), y = total, fill=category)) +
  geom_bar(stat="identity") +
  labs(x= "Weather event", title = "The top 5 weather events which caused fatalities and injuries") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlim(head(datahealth$EVTYPE, 5)) 
## Warning: Removed 338 rows containing missing values (position_stack).

The types of events have the greatest economic consequences.

dataEconomic<- dataC %>%
        summarise(total=as.integer(sum(PROPDMG))) %>%
        arrange(desc(total))

maxE<-dataEconomic[1,1]

dataEconomic
## # A tibble: 174 x 2
##    EVTYPE             total
##    <chr>              <int>
##  1 TORNADO           877436
##  2 TSTM WIND         107858
##  3 THUNDERSTORM WIND  70765
##  4 FLASH FLOOD        61665
##  5 HIGH WIND          48131
##  6 FLOOD              29231
##  7 LIGHTNING          20320
##  8 WILDFIRE           19527
##  9 WINTER STORM       15463
## 10 HEAVY SNOW         11036
## # … with 164 more rows

Histogram of the top 5 weather events which have the greatest economic consequences

dataEconTotal<- dataC %>%
        summarise(total =sum(PROPDMG))

# Stacked barplot with multiple groups
ggplot(data=dataEconTotal, aes(x = reorder(EVTYPE,-total), y = total)) +
  geom_bar(stat="identity") +
  labs(x= "Weather event", title = "The top 5 weather events which have the greatest economic consequences") +
  theme(plot.title = element_text(hjust = 0.5)) +
  xlim(head(dataEconomic$EVTYPE, 5)) 
## Warning: Removed 169 rows containing missing values (position_stack).

Results

Across the United States, TORNADO are most harmful with respect to population health.
Across the United States, TORNADO have the greatest economic consequences.