The U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The current work analyzes the data to answer the following questions:
Data Loading
## Warning: package 'dplyr' was built under R version 3.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
Data cleansing and exploration
## Check the column names for location of relevant data
names(stormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
## Create new dataframe of health injury / fatality data
healthInjuryData <- stormData[,c(8,23:24)]
## Create new dataframe of property damage data
propertyDamageData <- stormData[,c(8,25:28)]
## remove stormData to free up memory
rm(stormData)
## Aggregating and sorting healthInjuryData to identify the leading 5 events leading to health injury or fatality
sortedHealthData <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = healthInjuryData, sum, na.rm = TRUE)
sortedHealthData <- arrange(sortedHealthData, desc(FATALITIES+INJURIES))
sortedHealthData <- sortedHealthData[1:5,]
sortedHealthData
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 TSTM WIND 504 6957
## 4 FLOOD 470 6789
## 5 LIGHTNING 816 5230
## Explore property damage expense data
table(propertyDamageData$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
## Explore crop damage expense data
table(propertyDamageData$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
## Creating columns for literal multiplying factors
propertyDamageData$propMFlit <- factor(propertyDamageData$PROPDMGEXP,levels=c("H","K","M","B","h","m","O"))
propertyDamageData$propMFlit[is.na(propertyDamageData$propMFlit)] <- "O"
table(propertyDamageData$propMFlit)
##
## H K M B h m O
## 6 424665 11330 40 1 7 466248
propertyDamageData$cropMFlit <- factor(propertyDamageData$CROPDMGEXP,levels=c("K","M","B","k","m","O"))
propertyDamageData$cropMFlit[is.na(propertyDamageData$cropMFlit)] <- "O"
table(propertyDamageData$cropMFlit)
##
## K M B k m O
## 281832 1994 9 21 1 618440
## Creating columns for numeric multiplying factors
propertyDamageData<- mutate(propertyDamageData,PROPMF= 0, CROPMF=0)
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "K"] <- 1000
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "H" | propertyDamageData$propMFlit == "h"] <- 100
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "M" | propertyDamageData$propMFlit == "m"] <- 1e6
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "B"] <- 1e9
propertyDamageData$PROPMF[propertyDamageData$propMFlit == "O"] <- 1
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "K" | propertyDamageData$cropMFlit == "k"] <- 1000
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "M" | propertyDamageData$cropMFlit == "m"] <- 1e6
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "B"] <- 1e9
propertyDamageData$CROPMF[propertyDamageData$cropMFlit == "O"] <- 1
## Aggregating and sorting propertyDamageData to identify the leading 5 events leading to damage to property or crop
propertyDamageData <- mutate(propertyDamageData, PDValue = PROPDMG * PROPMF / 1e6, CDValue = CROPDMG * CROPMF / 1e6)
propertyDamageDataAggregated <- aggregate(cbind(PDValue,CDValue) ~ EVTYPE, data = propertyDamageData, sum, na.rm=TRUE)
propertyDamageDataAggregated <- propertyDamageDataAggregated %>%
group_by(EVTYPE) %>%
summarize(PDValue = sum(PDValue, na.rm = TRUE), CDValue = sum(CDValue, na.rm = TRUE))
propertyDamageDataAggregated <- arrange(propertyDamageDataAggregated, desc(PDValue + CDValue))
propertyDamageDataAggregated <- propertyDamageDataAggregated[1:5,]
propertyDamageDataAggregated
## # A tibble: 5 x 3
## EVTYPE PDValue CDValue
## <fct> <dbl> <dbl>
## 1 FLOOD 144658. 5662.
## 2 HURRICANE/TYPHOON 69306. 2608.
## 3 TORNADO 56937. 415.
## 4 STORM SURGE 43324. 0.005
## 5 HAIL 15732. 3026.
Question 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
x <- sortedHealthData$EVTYPE
healthData <- as.matrix(t(sortedHealthData[,-1]))
colnames(healthData) <- x
barplot(healthData, col=c("red","yellow"),main="Figure 1. Storm Damage to Human Health or Fatalities from Storm",cex.names = 0.6,cex.axis = 0.6, ylim=c(0,100000))
legend("topright",c("Fatalities","Injuries"),fill=c("red","yellow"),bty = "x")
Figure 1 shows that Tornadoes cause the greatest damage to human health and also result in highest number of fatalities, followed by excessive heat, wind, flood, and lightning, in that order.
Question 2. Across the United States, which types of events have the greatest economic consequences?
x <- propertyDamageDataAggregated$EVTYPE
propertyData <- as.matrix(t(propertyDamageDataAggregated[,-1]))
colnames(propertyData) <- x
barplot(propertyData, col=c("cadetblue1","chartreuse"),main="Figure 2. Storm Related Economic Consequences on Property and / or\n Crop",cex.names = 0.6,cex.axis = 0.6, ylim=c(0,170000))
legend("topright",c("Property Damage, Million $","Crop Damage, Million $"),fill=c("cadetblue1","chartreuse"),bty = "x")
Figure 2 shows that the greatest adverse economic impact on property / crop is from Floods, followed by hurricanes, tornadoes, storm surges, and hail, in that order.