Synopsis

The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The current work analyzes the data to answer the following questions:

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

Data Loading

## Warning: package 'dplyr' was built under R version 3.5.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Data cleansing and exploration

## Check the column names for location of relevant data
names(stormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"
## Create new dataframe of health injury / fatality data
healthInjuryData <- stormData[,c(8,23:24)]
## Create new dataframe of property damage data
propertyDamageData <- stormData[,c(8,25:28)]
## remove stormData to free up memory
rm(stormData)

## Aggregating and sorting healthInjuryData to identify the leading 5 events leading to health injury or fatality
sortedHealthData <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = healthInjuryData, sum, na.rm = TRUE)
sortedHealthData <- arrange(sortedHealthData, desc(FATALITIES+INJURIES))
sortedHealthData <- sortedHealthData[1:5,]
sortedHealthData
##           EVTYPE FATALITIES INJURIES
## 1        TORNADO       5633    91346
## 2 EXCESSIVE HEAT       1903     6525
## 3      TSTM WIND        504     6957
## 4          FLOOD        470     6789
## 5      LIGHTNING        816     5230
## Explore property damage expense data

table(propertyDamageData$PROPDMGEXP)
## 
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
## Explore crop damage expense data

table(propertyDamageData$CROPDMGEXP)
## 
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994
## Creating columns for literal multiplying factors

propertyDamageData$propMFlit <- factor(propertyDamageData$PROPDMGEXP,levels=c("H","K","M","B","h","m","O"))
propertyDamageData$propMFlit[is.na(propertyDamageData$propMFlit)] <- "O"
table(propertyDamageData$propMFlit)
## 
##      H      K      M      B      h      m      O 
##      6 424665  11330     40      1      7 466248
propertyDamageData$cropMFlit <- factor(propertyDamageData$CROPDMGEXP,levels=c("K","M","B","k","m","O"))
propertyDamageData$cropMFlit[is.na(propertyDamageData$cropMFlit)] <- "O"
table(propertyDamageData$cropMFlit)
## 
##      K      M      B      k      m      O 
## 281832   1994      9     21      1 618440
## Creating columns for numeric multiplying factors

propertyDamageData<- mutate(propertyDamageData,PROPMF= 0, CROPMF=0)

propertyDamageData$PROPMF[propertyDamageData$propMFlit == "K"] <- 1000
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "H" | propertyDamageData$propMFlit == "h"] <- 100
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "M" | propertyDamageData$propMFlit == "m"] <- 1e6
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "B"] <- 1e9
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "O"] <- 1

propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "K" | propertyDamageData$cropMFlit == "k"] <- 1000
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "M" | propertyDamageData$cropMFlit == "m"] <- 1e6
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "B"] <- 1e9
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "O"] <- 1

## Aggregating and sorting propertyDamageData to identify the leading 5 events leading to damage to property or crop

propertyDamageData <- mutate(propertyDamageData, PDValue = PROPDMG * PROPMF / 1e6, CDValue = CROPDMG * CROPMF / 1e6)
propertyDamageDataAggregated <- aggregate(cbind(PDValue,CDValue) ~ EVTYPE, data = propertyDamageData, sum, na.rm=TRUE)
propertyDamageDataAggregated <- propertyDamageDataAggregated %>%
                                group_by(EVTYPE) %>%
                                summarize(PDValue = sum(PDValue, na.rm = TRUE), CDValue = sum(CDValue, na.rm = TRUE))
propertyDamageDataAggregated <- arrange(propertyDamageDataAggregated, desc(PDValue + CDValue))
propertyDamageDataAggregated <- propertyDamageDataAggregated[1:5,]
propertyDamageDataAggregated
## # A tibble: 5 x 3
##   EVTYPE            PDValue  CDValue
##   <fct>               <dbl>    <dbl>
## 1 FLOOD             144658. 5662.   
## 2 HURRICANE/TYPHOON  69306. 2608.   
## 3 TORNADO            56937.  415.   
## 4 STORM SURGE        43324.    0.005
## 5 HAIL               15732. 3026.

Question 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

x <- sortedHealthData$EVTYPE
healthData <- as.matrix(t(sortedHealthData[,-1]))
colnames(healthData) <- x
barplot(healthData, col=c("red","yellow"),main="Figure 1. Storm Damage to Human Health or Fatalities from Storm",cex.names  = 0.6,cex.axis = 0.6, ylim=c(0,100000))
legend("topright",c("Fatalities","Injuries"),fill=c("red","yellow"),bty = "x")

Figure 1 shows that Tornadoes cause the greatest damage to human health and also result in highest number of fatalities, followed by excessive heat, wind, flood, and lightning, in that order.

Question 2. Across the United States, which types of events have the greatest economic consequences?

x <- propertyDamageDataAggregated$EVTYPE
propertyData <- as.matrix(t(propertyDamageDataAggregated[,-1]))
colnames(propertyData) <- x
barplot(propertyData, col=c("cadetblue1","chartreuse"),main="Figure 2. Storm Related Economic Consequences on Property and / or\n Crop",cex.names  = 0.6,cex.axis = 0.6, ylim=c(0,170000))
legend("topright",c("Property Damage, Million $","Crop Damage, Million $"),fill=c("cadetblue1","chartreuse"),bty = "x")

Figure 2 shows that the greatest adverse economic impact on property / crop is from Floods, followed by hurricanes, tornadoes, storm surges, and hail, in that order.