The goal of this report is to explain the analysis realized over the NOAA Storm Database and describe the results found related to which Event Types were most harmful with respect to population health and which ones have the greatest economic consequences. This database contains data starting in the year 1950 and end in November 2011. From these data, the founds showed that Tornado is the most harmful event type with the highest number of fatalities and injuries and the Flood have the highest amount of damage (property plus crop) in dollars.
Loading and Processing the Raw Data
# Load full dataset
stormdataFull <- read.csv(
file = "repdata-data-StormData.csv.bz2",
header = TRUE,
sep = ",",
na.strings = "NA"
)
There are 902297 rows and 37 columns
dim(stormdataFull)
## [1] 902297 37
names(stormdataFull)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
To analyze which Event Types (“EVETYPE”) were most harmful with respect to population health, create a subset with fatalities (“FATALITIES”) and injuries (“INJURIES”), being sure that the data is clean of missing values.
# Selecting data to be used in the analysis
stormdata <- stormdataFull[,c("EVTYPE", "FATALITIES", "INJURIES")]
# Create dataset without missing values
stormdata <- subset(stormdata, complete.cases(stormdata))
Aggregate the sum of Fatalities and Injuries by Event Type.
# Aggregate Fatalities and Injuries
totalF <- aggregate(
list(Fatalities = stormdata$FATALITIES, Injuries = stormdata$INJURIES),
by = list(EventType = stormdata$EVTYPE), FUN = sum)
As the idea is find a list of the most harmful event types, find top 10 event types in number of fatalities and injuries and merge them in a unique list.
# Top 10 Event type in number of fatalities
topF10 <- head(totalF[order(totalF$Fatalities, decreasing = TRUE), ],10)$EventType
# Top 10 Event type in number of injuries
topI10 <- head(totalF[order(totalF$Injuries, decreasing = TRUE), ],10)$EventType
# Most Harmful Event types with respect to population health
# is the union of top Event type in number of fatalities and injuries
topHarmful <- totalF[totalF$EventType %in% union(topF10,topI10),]
To analyze which Event Types (“EVETYPE”) caused the greatest economic consequences, create a subset with property damage (“PROPDMG”) and crop damage (“CROPDMG”), and their alphabetical characters used to signify magnitude (“PROPDMGEXP”, “CROPDMGEXP”), being sure that the data is clean of missing values.
# Selecting data to be used in the analysis
stormdata <- stormdataFull[,c("EVTYPE",
"PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
# Create dataset without missing values
stormdata <- subset(stormdata, complete.cases(stormdata))
Calculate value of property and crop damage using alphabetical characters used to signify magnitude (“H” for hundreds, “K” for thousands, “M” for millions and “B” for billions).
# Distinct alphabetical characters used to signify magnitude
unique(stormdata$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(stormdata$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
# to upper case to make it easier
stormdata$PROPDMGEXP <- toupper(stormdata$PROPDMGEXP)
stormdata$CROPDMGEXP <- toupper(stormdata$CROPDMGEXP)
# Create a new column with adjusted value of property damage with magnitude character
stormdata$PROPDMG_M <- stormdata$PROPDMG
stormdata[stormdata$PROPDMGEXP == "H",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "H",]$PROPDMG * 1e2
stormdata[stormdata$PROPDMGEXP == "K",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "K",]$PROPDMG * 1e3
stormdata[stormdata$PROPDMGEXP == "M",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "M",]$PROPDMG * 1e6
stormdata[stormdata$PROPDMGEXP == "B",]$PROPDMG_M <- stormdata[stormdata$PROPDMGEXP == "B",]$PROPDMG * 1e9
# Create a new column with adjusted value of crop damage with magnitude character
stormdata$CROPDMG_M <- stormdata$CROPDMG
stormdata[stormdata$CROPDMGEXP == "H",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "H",]$CROPDMG * 1e2
stormdata[stormdata$CROPDMGEXP == "K",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "K",]$CROPDMG * 1e3
stormdata[stormdata$CROPDMGEXP == "M",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "M",]$CROPDMG * 1e6
stormdata[stormdata$CROPDMGEXP == "B",]$CROPDMG_M <- stormdata[stormdata$CROPDMGEXP == "B",]$CROPDMG * 1e9
Sum Property and Crop damages to calculate Total Damage per Event Type
stormdata$TOTALDMG <- stormdata$PROPDMG_M + stormdata$CROPDMG_M
Aggregate the sum of total damage (property and crop) by event type and find a top 10 list of the event types that have the greatest economic consequences.
# Selecting data to be used in the analysis
stormDamage <- stormdata[,c("EVTYPE", "TOTALDMG")]
# Aggregate Property Damage and Crop Damage
totalDamage <- aggregate(list(SumDMG = stormdata$TOTALDMG),
by = list(EventType = stormdata$EVTYPE), FUN = sum)
# Top 10 Event types that have the greatest economic consequences
topDamage <- head(totalDamage[order(totalDamage$SumDMG, decreasing = TRUE), ],10)
The most harmful event types with respect to population health are related to their total of fatalities and injuries. Tornado is the most harmful event type as showed in the ordered table below.
topHarmful[order(topHarmful$Fatalities + topHarmful$Injuries, decreasing = TRUE),]
## EventType Fatalities Injuries
## 834 TORNADO 5633 91346
## 130 EXCESSIVE HEAT 1903 6525
## 856 TSTM WIND 504 6957
## 170 FLOOD 470 6789
## 464 LIGHTNING 816 5230
## 275 HEAT 937 2100
## 153 FLASH FLOOD 978 1777
## 427 ICE STORM 89 1975
## 760 THUNDERSTORM WIND 133 1488
## 359 HIGH WIND 248 1137
## 244 HAIL 15 1361
## 585 RIP CURRENT 368 232
## 19 AVALANCHE 224 170
The bar chart showed the most harmful event types and the relation with their number of fatalities and injuries.
#Load necessary library
library(lattice)
# Barchart
barchart(topHarmful$EventType~topHarmful$Fatalities + topHarmful$Injuries,
xlab = "Number of Health Problems",
main = "Most Harmful Event types with respect to population health",
par.settings = list(superpose.polygon = list(col = c("black","red"))) ,
auto.key = list(corner = c(1, 0.1), text = c('Fatalities','Injuries'))
)
The event types that have the greatest economic consequences could be found adding the cost of property and crop damages. the Flood have the highest amount of damage (property plus crop) in dollars as showed in the ordered table below.
topDamage[order(topDamage$SumDMG, decreasing = TRUE),]
## EventType SumDMG
## 170 FLOOD 150319678257
## 411 HURRICANE/TYPHOON 71913712800
## 834 TORNADO 57352114049
## 670 STORM SURGE 43323541000
## 244 HAIL 18758222016
## 153 FLASH FLOOD 17562129167
## 95 DROUGHT 15018672000
## 402 HURRICANE 14610229010
## 590 RIVER FLOOD 10148404500
## 427 ICE STORM 8967041360
The bar chart plotted showed the event types that have the greatest economic consequences adding the cost of property and crop damages in billions of dollars.
#Load necessary library
library(lattice)
# Barchart
barchart(topDamage$EventType~(topDamage$SumDMG/1e9),
xlab = "Property and Crop Damage (in Billion US$)",
main = "Top 10 Event types that have the greatest economic consequences"
)