1.Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

2.Load the Package Needed

library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine

3.Reading Dataset

The following code is to load the dataset:

storm <- read.csv("repdata_data_StormData.csv", sep = ",", header = TRUE)
head(storm)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

4.Data Extraction

The following code only extract the related variables for the project:

data <- storm[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")]
str(data)
## 'data.frame':    902297 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
head(data)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

5.Data Processing

5.1 Data Processing Based on Total Injury (Included Fatalities)

Total population injury with the sum up of fatality and injuries

data <- mutate(data, TTL_INJURIES = FATALITIES + INJURIES)

5.2 Data Processing Based on Property Damage

Checking on the patterns of data for variables PROPDMGEXP

unique(unique(data$PROPDMGEXP))
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M

Checking of data based on invalid exponent data:

head(data[data$PROPDMGEXP =="+",],n=15)
##                     EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 188780    BREAKUP FLOODING          0        0      20          +       0
## 189001           HIGH WIND          0        0      20          +       0
## 192262 FLOODING/HEAVY RAIN          0        0       2          +       0
## 216755          HIGH WINDS          0        0      15          +       0
## 216802             TORNADO          0        0      60          +       0
##        CROPDMGEXP TTL_INJURIES
## 188780                       0
## 189001                       0
## 192262                       0
## 216755                       0
## 216802                       0
head(data[data$PROPDMGEXP =="",],n=15)
##       EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 54 TSTM WIND          0        0       0                  0           
## 55      HAIL          0        0       0                  0           
## 56      HAIL          0        0       0                  0           
## 57 TSTM WIND          0        0       0                  0           
## 58      HAIL          0        0       0                  0           
## 59 TSTM WIND          0        0       0                  0           
## 60 TSTM WIND          0        0       0                  0           
## 61      HAIL          0        0       0                  0           
## 62      HAIL          0        0       0                  0           
## 63      HAIL          0        0       0                  0           
## 64 TSTM WIND          0        0       0                  0           
## 65 TSTM WIND          0        0       0                  0           
## 66 TSTM WIND          0        0       0                  0           
## 67      HAIL          0        0       0                  0           
## 69 TSTM WIND          0        0       0                  0           
##    TTL_INJURIES
## 54            0
## 55            0
## 56            0
## 57            0
## 58            0
## 59            0
## 60            0
## 61            0
## 62            0
## 63            0
## 64            0
## 65            0
## 66            0
## 67            0
## 69            0
head(data[data$PROPDMGEXP =="?",],n=15)
##                    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 198689 THUNDERSTORM WINDS          0        0       0          ?       0
## 225254        FLASH FLOOD          0        0       0          ?       0
## 227409        FLASH FLOOD          0        0       0          ?       0
## 232016  THUNDERSTORM WIND          0        0       0          ?       0
## 233746               HAIL          0        0       0          ?       0
## 233747               HAIL          0        0       0          ?       0
## 233748               HAIL          0        0       0          ?       0
## 247617 THUNDERSTORM WINDS          0        0       0          ?       0
##        CROPDMGEXP TTL_INJURIES
## 198689                       0
## 225254                       0
## 227409                       0
## 232016                       0
## 233746                       0
## 233747                       0
## 233748                       0
## 247617                       0
head(data[data$PROPDMGEXP =="-",],n=15)
##           EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 229327 HIGH WIND          2        0      15          -       0           
##        TTL_INJURIES
## 229327            2

The transformation of data refer: to https://rpubs.com/gcctang1/271126

Invalid exponent data (“?”,“-”,"") will be hardcoded ‘0’

data$PROPDMGEXP_1[data$PROPDMGEXP %in% c("K","3")] <- 1e+3
data$PROPDMGEXP_1[data$PROPDMGEXP %in% c("M","m","6")] <- 1e+6
data$PROPDMGEXP_1[data$PROPDMGEXP =="B"] <- 1e+9 
data$PROPDMGEXP_1[data$PROPDMGEXP %in% ("+")] <- 1
data$PROPDMGEXP_1[data$PROPDMGEXP =="5"] <- 1e+5
data$PROPDMGEXP_1[data$PROPDMGEXP =="4"] <- 1e+4
data$PROPDMGEXP_1[data$PROPDMGEXP %in% c("2","h","H")] <- 1e+2
data$PROPDMGEXP_1[data$PROPDMGEXP =="7"] <- 1e+7
data$PROPDMGEXP_1[data$PROPDMGEXP =="1"] <- 1e+1
data$PROPDMGEXP_1[data$PROPDMGEXP =="8"] <- 1e+8
data$PROPDMGEXP_1[data$PROPDMGEXP %in% c("0","?","-","")] <- 0
data <- mutate(data, PROPDMG_1 = PROPDMG * PROPDMGEXP_1)

5.3 Data Processing Based on Crop Damage

Checking on the patterns of data for variables CROPDMGEXP

unique(unique(data$CROPDMGEXP))
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M

Checking of data based on invalid exponent data:

head(data[data$CROPDMGEXP =="?",],n=15)
##                    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 192467  FLASH FLOOD WINDS          0        0    0.41                  0
## 197066 THUNDERSTORM WINDS          0        0    0.50          K       0
## 197331 THUNDERSTORM WINDS          0        0    0.50          K       0
## 220300 THUNDERSTORM WINDS          0        0    0.00                  0
## 220877  FLOOD/FLASH FLOOD          0        0  400.00          K       0
## 232901  FLOOD/FLASH FLOOD          0        0    0.50          M       0
## 242953 THUNDERSTORM WINDS          0        0   80.00          K       0
##        CROPDMGEXP TTL_INJURIES PROPDMGEXP_1 PROPDMG_1
## 192467          ?            0        0e+00     0e+00
## 197066          ?            0        1e+03     5e+02
## 197331          ?            0        1e+03     5e+02
## 220300          ?            0        0e+00     0e+00
## 220877          ?            0        1e+03     4e+05
## 232901          ?            0        1e+06     5e+05
## 242953          ?            0        1e+03     8e+04

Invalid exponent data (“?”,“-”,"") will be hardcoded ‘0’

data$CROPDMGEXP_1[data$CROPDMGEXP %in% c("M","m","6")] <- 1e+6
data$CROPDMGEXP_1[data$CROPDMGEXP %in% c("K","k","3")] <- 1e+3
data$CROPDMGEXP_1[data$CROPDMGEXP =="B"] <- 1e+9 
data$CROPDMGEXP_1[data$CROPDMGEXP %in% c("0","?","-","")] <- 0
data$CROPDMGEXP_1[data$CROPDMGEXP %in% c("2","h","H")] <- 1e+2
data <- mutate(data, CROPDMG_1 = CROPDMG * CROPDMGEXP_1)

Adding a variable for total damaged (sum up of property damage and crop damage)

data <- mutate(data, TTL_DMG = PROPDMG_1 + CROPDMG_1 )

6. Results

Impact of types of events with respect to population health

Chart below will showing the top seven events that causing highest total fatalities and highest total injuries.

fatal_agg  <- aggregate( FATALITIES ~ EVTYPE, data,  FUN=sum) %>% arrange(desc(FATALITIES))
g <- ggplot(fatal_agg[1:7,], aes(x=reorder(EVTYPE,-FATALITIES), y=FATALITIES) )
a<- g + geom_bar(stat="identity", fill = "#FF6666") +
  theme(plot.title = element_text(hjust= 0.5)  ,axis.text.x = element_text(angle=90, hjust=1) , 
        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm") ) +
  xlab("Event Type") +
  ylab("Total Fatalities") +
  ggtitle("Top Seven Fatalities Event")
Injury_agg <- aggregate( INJURIES ~ EVTYPE, data, FUN=sum ) %>% arrange(desc(INJURIES))
g1 <- ggplot(Injury_agg[1:7,], aes(x=reorder(EVTYPE,-INJURIES), y=INJURIES) )
b<- g1 + geom_bar(stat="identity", fill="SKY BLUE") +
  theme(plot.title = element_text(hjust= 0.5)  ,axis.text.x = element_text(angle=90, hjust=1) , 
        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm") ) +
  xlab("Event Type") +
  ylab("Total Injuries") +
  ggtitle("Top Seven Injuries Event")
grid.arrange(a,b, ncol=2)

The table below showing the summary of top 7 EVENTS that causing casualty (fatal + injury). The percentage is the percentage of events causing casualty compared to all EVENT types in our study.

fatal_agg_1 <- mutate(fatal_agg, type="fatal") %>% rename(CASUALTY = FATALITIES)
Injury_agg_1 <- mutate(Injury_agg, type="injury") %>% rename(CASUALTY = INJURIES)
Total_casualty <- rbind(fatal_agg_1, Injury_agg_1) 
Total_casualty_1 <- aggregate( CASUALTY ~ EVTYPE, Total_casualty, FUN=sum ) %>% arrange(desc(CASUALTY)) %>% mutate(PERCENTAGE = round(CASUALTY/ sum(CASUALTY) *100,digits=2) )
head(Total_casualty_1,n=7)
##           EVTYPE CASUALTY PERCENTAGE
## 1        TORNADO    96979      62.30
## 2 EXCESSIVE HEAT     8428       5.41
## 3      TSTM WIND     7461       4.79
## 4          FLOOD     7259       4.66
## 5      LIGHTNING     6046       3.88
## 6           HEAT     3037       1.95
## 7    FLASH FLOOD     2755       1.77

Charts below showing top casualty events: (included sum of fatalities and injuries)

The top casualty events will be using the events in above charts for comparison

g3 <- ggplot(Total_casualty[Total_casualty$EVTYPE %in% c("TORNADO","EXCESSIVE HEAT","TSTM WIND","FLOOD","LIGHTNING","HEAT","FLASH FLOOD","ICE STORM"), ]
             , aes(x=reorder(EVTYPE,-CASUALTY), y=CASUALTY, fill=type) )
c<- g3 + geom_bar(stat="identity") +
  theme(plot.title = element_text(hjust= 0.5)  ,axis.text.x = element_text(angle=90, hjust=1) , 
        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm") ) +
  xlab("Event Type") +
  ylab("Total Casualty") +
  ggtitle("Top Seven Casualty Event")
plot(c)

In conclusion, TORNADO having the highest total casualty (96979, 62.30%) compared to other type of EVENTs.

Impact of types of events with respect to economic

Chart below will showing the top five events that causing highest property damage and highest crop damage.

TTL_dmg_agg <- aggregate( TTL_DMG ~ EVTYPE, data, FUN=sum ) %>% arrange(desc(TTL_DMG)) %>% mutate(PERCENTAGE = round(TTL_DMG/ sum(TTL_DMG) *100,digits=2))
head(TTL_dmg_agg)
##              EVTYPE      TTL_DMG PERCENTAGE
## 1             FLOOD 150319678250      31.49
## 2 HURRICANE/TYPHOON  71913712800      15.07
## 3           TORNADO  57362333650      12.02
## 4       STORM SURGE  43323541000       9.08
## 5              HAIL  18761221670       3.93
## 6       FLASH FLOOD  18243990610       3.82
g6 <- ggplot(TTL_dmg_agg[1:5,], aes(x=reorder(EVTYPE,-TTL_DMG), y=TTL_DMG) )
f<- g6 + geom_bar(stat="identity", fill="LIGHT GREEN") +
  theme(plot.title = element_text(hjust= 0.5)  ,axis.text.x = element_text(angle=90, hjust=1) , 
        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm") ) +
  xlab("Event Type") +
  ylab("Total Economic Damange") +
  ggtitle("Top Total DMG")


Prop_agg  <- aggregate( PROPDMG_1 ~ EVTYPE, data,  FUN=sum) %>% arrange(desc(PROPDMG_1))
g4 <- ggplot(Prop_agg[1:5,], aes(x=reorder(EVTYPE,-PROPDMG_1), y=PROPDMG_1) )
d<- g4 + geom_bar(stat="identity", fill = "#FF6666") +
  theme(plot.title = element_text(hjust= 0.5)  ,axis.text.x = element_text(angle=90, hjust=1) , 
        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm") ) +
  xlab("Event Type") +
  ylab("Total Property Damage") +
  ggtitle("Top Property DMG")

Crop_agg <- aggregate( CROPDMG_1 ~ EVTYPE, data, FUN=sum ) %>% arrange(desc(CROPDMG_1))
g5 <- ggplot(Crop_agg[1:5,], aes(x=reorder(EVTYPE,-CROPDMG_1), y=CROPDMG_1) )
e<- g5 + geom_bar(stat="identity", fill="SKY BLUE") +
  theme(plot.title = element_text(hjust= 0.5)  ,axis.text.x = element_text(angle=90, hjust=1) , 
        plot.margin = unit(c(0.5,0.5,0.5,0.5),"cm") ) +
  xlab("Event Type") +
  ylab("Total Crop Damange") +
  ggtitle("Top Crop DMG")

grid.arrange(f,d,e, ncol=3)

From the charts, Highest Property Damage caused by FLOOD while in Crop, DROUGHT causing the highest damage.

When compairing the total damage, Flood having the highest damage followed by HURRICANE/TYPHOON and TORNADO.

Thus, top three events (FLOOD, HURRICANE/TYPHOON and TORNADO) that contribute to property damge having the highest impact in total economic damage.