This project uses the NOAA Storm Database (1950 - 2011) to analyze severe weather events and answer the following questions:
1. Across the United States, which types of events are most harmful with respect to population health?
2. Across the Unites States, which types of events have the greatest economic consequences?
storm_data <- read.csv('repdata_data_StormData.csv.bz2')
fatality_data_subset <- subset(storm_data, FATALITIES > 0, select =c(EVTYPE, FATALITIES))
injury_data_subset <- subset(storm_data, INJURIES > 0, select =c(EVTYPE, INJURIES))
pdamages_data_subset <- subset(storm_data, PROPDMG > 0, select =c(EVTYPE, PROPDMG, PROPDMGEXP))
cdamages_data_subset <- subset(storm_data, CROPDMG > 0, select =c(EVTYPE, CROPDMG, CROPDMGEXP))
unique(storm_data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
unique(storm_data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
Damage_calc_fun <- function(damage_amt, mag)
{
calcAmt <- 0
if (mag %in% c("-", "?", "+"))
{
calcAmt <- 0
}
else
if (mag %in% c("0", ""))
{
calcAmt <- damage_amt
}
else
if (mag %in% c("1"))
{
calcAmt <- (damage_amt * 10)
}
else
if (mag %in% c("2", "H", "h"))
{
calcAmt <- (damage_amt * 100)
}
else
if (mag %in% c("3", "K", "k"))
{
calcAmt <- (damage_amt * 1000)
}
else
if (mag %in% c("4"))
{
calcAmt <- (damage_amt * 10000)
}
else
if (mag %in% c("5"))
{
calcAmt <- (damage_amt * 100000)
}
else
if (mag %in% c("6", "M", "m"))
{
calcAmt <- (damage_amt * 1000000)
}
else
if (mag %in% c("7"))
{
calcAmt <- (damage_amt * 10000000)
}
else
if (mag %in% c("8"))
{
calcAmt <- (damage_amt * 100000000)
}
else
if (mag %in% c("B"))
{
calcAmt <- (damage_amt * 1000000000)
}
else
{
stop("Invalid Data")
}
return(calcAmt)
}
# Property Damage
pdamages_data_subset$Prop_Damage <- (mapply(Damage_calc_fun, pdamages_data_subset$PROPDMG, pdamages_data_subset$PROPDMGEXP))
# Crop Damage
cdamages_data_subset$Crop_Damage <- (mapply(Damage_calc_fun, cdamages_data_subset$CROPDMG, cdamages_data_subset$CROPDMGEXP))
# Fatalities
fatality_data_agg <- aggregate(FATALITIES ~ EVTYPE, data = fatality_data_subset, FUN = "sum", na.rm = TRUE)
fatality_data_agg <- fatality_data_agg[order(fatality_data_agg$FATALITIES, decreasing = TRUE), ]
fatality_data_agg <- fatality_data_agg[1:10, ]
fatality_data_agg
## EVTYPE FATALITIES
## 141 TORNADO 5633
## 26 EXCESSIVE HEAT 1903
## 35 FLASH FLOOD 978
## 57 HEAT 937
## 97 LIGHTNING 816
## 145 TSTM WIND 504
## 40 FLOOD 470
## 116 RIP CURRENT 368
## 75 HIGH WIND 248
## 2 AVALANCHE 224
# Injuries
injury_data_agg <- aggregate(INJURIES ~ EVTYPE, data = injury_data_subset, FUN = "sum", na.rm = TRUE)
injury_data_agg <- injury_data_agg[order(injury_data_agg$INJURIES, decreasing = TRUE), ]
injury_data_agg <- injury_data_agg[1:10, ]
injury_data_agg
## EVTYPE INJURIES
## 129 TORNADO 91346
## 135 TSTM WIND 6957
## 30 FLOOD 6789
## 20 EXCESSIVE HEAT 6525
## 85 LIGHTNING 5230
## 47 HEAT 2100
## 79 ICE STORM 1975
## 28 FLASH FLOOD 1777
## 121 THUNDERSTORM WIND 1488
## 45 HAIL 1361
# Property Damage
pdamages_data_agg <- aggregate(Prop_Damage ~ EVTYPE, data = pdamages_data_subset, FUN = "sum", na.rm = TRUE)
pdamages_data_agg <- pdamages_data_agg[order(pdamages_data_agg$Prop_Damage, decreasing = TRUE), ]
pdamages_data_agg <- pdamages_data_agg[1:10, ]
pdamages_data_agg
## EVTYPE Prop_Damage
## 64 FLOOD 144657709807
## 182 HURRICANE/TYPHOON 69305840000
## 334 TORNADO 56947380617
## 282 STORM SURGE 43323536000
## 51 FLASH FLOOD 16822673979
## 106 HAIL 15735267513
## 174 HURRICANE 11868319010
## 342 TROPICAL STORM 7703890550
## 399 WINTER STORM 6688497251
## 159 HIGH WIND 5270046260
# Crop Damage
cdamages_data_agg <- aggregate(Crop_Damage ~ EVTYPE, data = cdamages_data_subset, FUN = "sum", na.rm = TRUE)
cdamages_data_agg <- cdamages_data_agg[order(cdamages_data_agg$Crop_Damage, decreasing = TRUE), ]
cdamages_data_agg <- cdamages_data_agg[1:10, ]
cdamages_data_agg
## EVTYPE Crop_Damage
## 10 DROUGHT 13972566000
## 27 FLOOD 5661968450
## 78 RIVER FLOOD 5029459000
## 72 ICE STORM 5022113500
## 42 HAIL 3025954473
## 64 HURRICANE 2741910000
## 69 HURRICANE/TYPHOON 2607872800
## 23 FLASH FLOOD 1421317100
## 19 EXTREME COLD 1292973000
## 37 FROST/FREEZE 1094086000
# Fatalities
par(mfrow = c(1,2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(fatality_data_agg$FATALITIES,
names.arg = fatality_data_agg$EVTYPE,
las = 3,
main = "Top 10 Fatalities By Event Type",
ylab = "Frequency")
# Injuries
barplot(injury_data_agg$INJURIES,
names.arg = injury_data_agg$EVTYPE,
las = 3,
main = "Top 10 Injuries By Event Type",
ylab = "Frequency")
par(mfrow = c(1,2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
# Property Damage
barplot(pdamages_data_agg$Prop_Damage,
names.arg = pdamages_data_agg$EVTYPE,
las = 3,
main = "Top 10 Causes of Property Damage",
ylab = "Cost ($billions)")
# Crop Damage
barplot(cdamages_data_agg$Crop_Damage,
names.arg = cdamages_data_agg$EVTYPE,
las = 3,
main = "Top 10 Causes of Crop Damage",
ylab = "Cost ($billions)")