library(knitr)
library(markdown)
knitr::opts_chunk$set(echo = TRUE, autodep = TRUE)

Synposis

This project analyzes the storm data from the National Oceanic and Atmospheric Administration (NOAA). It aims to find out the impact of the weather event on population health and on economic development. This report shows the top 6 events causing the highest fatalities and those causing the highest injuries. Further, it reveals the top 6 events that result in the largest economic loss.

Data Processing

Download data

download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "RepProject2/StormHealth.csv.bz2")

Read Data into R and preview it

weatherdata <- read.csv("RepProject2/StormHealth.csv.bz2", header = TRUE, sep = ',')
head(weatherdata)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6
summary(weatherdata$EVTYPE)
##                     HAIL                TSTM WIND        THUNDERSTORM WIND 
##                   288661                   219940                    82563 
##                  TORNADO              FLASH FLOOD                    FLOOD 
##                    60652                    54277                    25326 
##       THUNDERSTORM WINDS                HIGH WIND                LIGHTNING 
##                    20843                    20212                    15754 
##               HEAVY SNOW               HEAVY RAIN             WINTER STORM 
##                    15708                    11723                    11433 
##           WINTER WEATHER             FUNNEL CLOUD         MARINE TSTM WIND 
##                     7026                     6839                     6175 
## MARINE THUNDERSTORM WIND               WATERSPOUT              STRONG WIND 
##                     5812                     3796                     3566 
##     URBAN/SML STREAM FLD                 WILDFIRE                 BLIZZARD 
##                     3392                     2761                     2719 
##                  DROUGHT                ICE STORM           EXCESSIVE HEAT 
##                     2488                     2006                     1678 
##               HIGH WINDS         WILD/FOREST FIRE             FROST/FREEZE 
##                     1533                     1457                     1342 
##                DENSE FOG       WINTER WEATHER/MIX           TSTM WIND/HAIL 
##                     1293                     1104                     1028 
##  EXTREME COLD/WIND CHILL                     HEAT                HIGH SURF 
##                     1002                      767                      725 
##           TROPICAL STORM           FLASH FLOODING             EXTREME COLD 
##                      690                      682                      655 
##            COASTAL FLOOD         LAKE-EFFECT SNOW        FLOOD/FLASH FLOOD 
##                      650                      636                      624 
##                LANDSLIDE                     SNOW          COLD/WIND CHILL 
##                      600                      587                      539 
##                      FOG              RIP CURRENT              MARINE HAIL 
##                      538                      470                      442 
##               DUST STORM                AVALANCHE                     WIND 
##                      427                      386                      340 
##             RIP CURRENTS              STORM SURGE            FREEZING RAIN 
##                      304                      261                      250 
##              URBAN FLOOD     HEAVY SURF/HIGH SURF        EXTREME WINDCHILL 
##                      249                      228                      204 
##             STRONG WINDS           DRY MICROBURST    ASTRONOMICAL LOW TIDE 
##                      196                      186                      174 
##                HURRICANE              RIVER FLOOD               LIGHT SNOW 
##                      174                      173                      154 
##         STORM SURGE/TIDE            RECORD WARMTH         COASTAL FLOODING 
##                      148                      146                      143 
##               DUST DEVIL         MARINE HIGH WIND        UNSEASONABLY WARM 
##                      141                      135                      126 
##                 FLOODING   ASTRONOMICAL HIGH TIDE        MODERATE SNOWFALL 
##                      120                      103                      101 
##           URBAN FLOODING               WINTRY MIX        HURRICANE/TYPHOON 
##                       98                       90                       88 
##            FUNNEL CLOUDS               HEAVY SURF              RECORD HEAT 
##                       87                       84                       81 
##                   FREEZE                HEAT WAVE                     COLD 
##                       74                       74                       72 
##              RECORD COLD                      ICE  THUNDERSTORM WINDS HAIL 
##                       64                       61                       61 
##      TROPICAL DEPRESSION                    SLEET         UNSEASONABLY DRY 
##                       60                       59                       56 
##                    FROST              GUSTY WINDS      THUNDERSTORM WINDSS 
##                       53                       53                       51 
##       MARINE STRONG WIND                    OTHER               SMALL HAIL 
##                       48                       48                       47 
##                   FUNNEL             FREEZING FOG             THUNDERSTORM 
##                       46                       45                       45 
##       Temperature record          TSTM WIND (G45)         Coastal Flooding 
##                       43                       39                       38 
##              WATERSPOUTS    MONTHLY PRECIPITATION                    WINDS 
##                       37                       36                       36 
##                  (Other) 
##                     2940

Analysis

Question 1: which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Method: Both the average of fetalities and that of injuries will be used for measuring the harm caused by each weather event to the population health. The events with the top average fetalities and injuries are separately selected. Plots will be generated to show these events.

  1. Group the data by storm types and caculate the average fatalities and injuries for each type of events, and select the top 6 events individually with the highest fatalities and that with the highest injuries.
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
top_fatalities <- weatherdata %>%
        group_by(EVTYPE) %>%
        summarise(avg_fatalities = mean(FATALITIES)) %>%
        top_n(6)
## Selecting by avg_fatalities
top_injuries <- weatherdata %>%
        group_by(EVTYPE) %>%
        summarise(avg_injuries = mean(INJURIES)) %>%
        top_n(6)
## Selecting by avg_injuries
  1. Generate a plot to show the events with top fatalities and another one to show the events with top injuries
library(ggplot2)
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
p1 <- ggplot(top_fatalities, aes(EVTYPE, avg_fatalities)) + geom_bar(fill = "olivedrab", stat = "identity")+coord_flip() +ylab("Average Fatalities") + xlab("Event Types") + ggtitle("Top 6 Events with Highest Average Fatalities")
p2 <- ggplot(top_injuries, aes(EVTYPE, avg_injuries)) + geom_bar(fill = "olivedrab", stat = "identity")+coord_flip() +ylab("Average Injuries") + xlab("Event Types") + ggtitle("Top 6 Events with Highest Average Injuries")
grid.arrange(p1, p2, nrow = 2)

Question 2: which types of events have the greatest economic consequences?

Method: The economic lost was measured by the sum of the property damage and the crop damage.

  1. The variable of PROPDMG is multipled with the variable PROPDMGEXP to get the actual value of the property damage. Then, the total of property damage for each event is caculated to reflect the economic consequences of each event.
unique(weatherdata$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
weatherdata$EXP[weatherdata$PROPDMGEXP == "K"] <- 1000
weatherdata$EXP[weatherdata$PROPDMGEXP == "M" | weatherdata$PROPDMGEXP == "m"] <- 1e+06
weatherdata$EXP[weatherdata$PROPDMGEXP == "h" | weatherdata$PROPDMGEXP == "H"] <- 100
weatherdata$EXP[weatherdata$PROPDMGEXP == "B"] <- 1e+09
weatherdata$EXP[weatherdata$PROPDMGEXP == 8] <- 1e+08
weatherdata$EXP[weatherdata$PROPDMGEXP == 7] <- 1e+07
weatherdata$EXP[weatherdata$PROPDMGEXP == 6] <- 1e+06
weatherdata$EXP[weatherdata$PROPDMGEXP == 5] <- 1e+05
weatherdata$EXP[weatherdata$PROPDMGEXP == 4] <- 10000
weatherdata$EXP[weatherdata$PROPDMGEXP == 3] <- 1000
weatherdata$EXP[weatherdata$PROPDMGEXP == 2] <- 100
weatherdata$EXP[weatherdata$PROPDMGEXP == 1] <- 10
weatherdata$EXP[weatherdata$PROPDMGEXP == 0] <- 1
weatherdata$EXP[weatherdata$PROPDMGEXP == "-" | weatherdata$PROPDMGEXP == "?" | weatherdata$PROPDMGEXP == "+"] <- 0
weatherdata$EXP[weatherdata$PROPDMGEXP == ""] <- 0
  1. The variable of CROPDMG is multipled with the variable CROPDMGEXP to get the actual value of the crop damage. Then, the total of crop damage for each event is caculated to reflect the economic consequences of each event.
unique(weatherdata$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
weatherdata$CROPEXP <- NULL
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "m" | weatherdata$CROPDMGEXP == "M"] <- 1e+06
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "k" | weatherdata$CROPDMGEXP == "K"] <- 1000
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "B"] <- 1e+09
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "2"] <- 100
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "0"] <- 1
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "?"] <- 0
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == ""] <- 0
  1. Create a new variable which reflects the total economic damage by adding the proper damage and the crop damage.
weatherdata$EconomicLoss <- (weatherdata$PROPDMG*weatherdata$EXP) + (weatherdata$CROPDMG*weatherdata$CROPEXP)
  1. Summarize the economic loss by event type
Weather_EcoDMG <- weatherdata %>%
        group_by(EVTYPE) %>%
        summarise(total_DMG = sum(EconomicLoss))
  1. Select the event types that have the most economic damage. Generate a plot of these event types wtih the worst economic damage.
top_EcoLoss <- top_n(Weather_EcoDMG, 6, total_DMG)
top_EcoLoss <- top_EcoLoss[order(top_EcoLoss$total_DMG, decreasing=TRUE), ]
  1. Generate a barplot to present the data.
par(mfrow = c(1, 1))
barplot(top_EcoLoss$total_DMG, las=2, names.arg =top_EcoLoss$EVTYPE, main = "Top 6 Events with Highest Economic Damage", col = "lightblue")

Result

The first two figures displays that the events of tornadoes, TSTM wind, and hail have the highest average fatalities and the events of heat wave have the highest average injuries. The third figure illustrates that floods causes the largest economic damage of over 140 billions.