I analyzed the U.S. National Oceanic and Atmospheric Administration (NOAA) data from 1950 to end of November 2011 which has 902297 storm event entries. The project goals were to understand across the United States:
The top three most harmful events for population health were TORNADO, EXCESSIVE HEAT and FLASH FLOOD. The most harmful event was TORNADO with total of 5633 fatalities and 91346 injuries.
The top three events that had the greatest economic consequences are FLOOD, HURRICANE/TYPHOON and TORNADO. In particular, FLOOD caused the highest total property damage at over $144 billion and the highest crop damage of $5.66 billion.
library(plyr)
library(dplyr)
library(ggplot2)
library(lubridate)
library(gridExtra)
require(cowplot)
gz <- "repdata-data-StormData.csv.bz2"
df <- read.csv(gz, stringsAsFactors=FALSE)
df <- tbl_df(df)
df
## Source: local data frame [902,297 x 37]
##
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## 7 1 11/16/1951 0:00:00 0100 CST 9 BLOUNT AL
## 8 1 1/22/1952 0:00:00 0900 CST 123 TALLAPOOSA AL
## 9 1 2/13/1952 0:00:00 2000 CST 125 TUSCALOOSA AL
## 10 1 2/13/1952 0:00:00 2000 CST 57 FAYETTE AL
## .. ... ... ... ... ... ... ...
## Variables not shown: EVTYPE (chr), BGN_RANGE (dbl), BGN_AZI (chr),
## BGN_LOCATI (chr), END_DATE (chr), END_TIME (chr), COUNTY_END (dbl),
## COUNTYENDN (lgl), END_RANGE (dbl), END_AZI (chr), END_LOCATI (chr),
## LENGTH (dbl), WIDTH (dbl), F (int), MAG (dbl), FATALITIES (dbl),
## INJURIES (dbl), PROPDMG (dbl), PROPDMGEXP (chr), CROPDMG (dbl),
## CROPDMGEXP (chr), WFO (chr), STATEOFFIC (chr), ZONENAMES (chr), LATITUDE
## (dbl), LONGITUDE (dbl), LATITUDE_E (dbl), LONGITUDE_ (dbl), REMARKS
## (chr), REFNUM (dbl)
dfSumHlth <- ddply(df, c("EVTYPE"), summarise, TotalFatalities = sum(FATALITIES),
TotalInjuries = sum(INJURIES))
dfSumHlth <- arrange(dfSumHlth,-TotalFatalities)
dfSumHlth <- dfSumHlth[1:30,]
dfSumHlth$EVTYPE <- factor(dfSumHlth$EVTYPE, levels = dfSumHlth$EVTYPE)
df$propDmgMil <- 0
df$cropDmgMil <- 0
# Convert everything into millions
# "K" for thousands, "M" for millions, and "B" for billions
fixUnits <- function(dfRow) {
crop <- dfRow$CROPDMG[[1]] # this is a dataframe column
prop <- dfRow$PROPDMG[[1]]
if (crop > 0 ) {
exp <- dfRow$CROPDMGEXP[[1]]
if (exp == "K") { crop <- crop/1000.0}
if (exp == "B") { crop <- crop * 1000}
dfRow$cropDmgMil <- crop
}
if (prop > 0 ) {
exp <- dfRow$PROPDMGEXP[[1]]
if (exp == "K") { prop <- prop/1000.0}
if (exp == "B") { prop <- prop * 1000}
dfRow$propDmgMil <- prop
}
dfRow
}
dfNew <- ddply(df, .(EVTYPE, CROPDMG, CROPDMGEXP, PROPDMG, PROPDMGEXP,propDmgMil, cropDmgMil), fixUnits)
dfSumEcon <- ddply(dfNew, c("EVTYPE"), summarise, TotalPropDamage = sum(propDmgMil), TotalCropDamage = sum(cropDmgMil))
dfSumEcon <- arrange(dfSumEcon,-TotalPropDamage)
numFactors <- nrow(dfSumEcon)
if (numFactors > 30) numFactors <- 30
dfSumEcon <- dfSumEcon[1:numFactors,]
dfSumEcon$EVTYPE <- factor(dfSumEcon$EVTYPE, levels = dfSumEcon$EVTYPE)
The top three most harmful events for population health were TORNADO, EXCESSIVE HEAT and FLASH FLOOD. The most harmful event was TORNADO with total of 5633 fatalities and 91346 injuries.
plot1 <- ggplot(dfSumHlth, aes(x=EVTYPE, y=TotalFatalities, fill = EVTYPE)) +
geom_bar(stat="identity") # + scale_fill_manual(values=cbPalette)
plot1 <- plot1 + coord_flip() +
xlab("")+ ylab("Fatalities") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
axis.text.y = element_text(colour="grey20",size=8))
plot2 <- ggplot(dfSumHlth, aes(x=EVTYPE, y=TotalInjuries, fill = EVTYPE)) +
geom_bar(stat="identity") # + scale_fill_manual(values=cbPalette)
plot2 <- plot2 + coord_flip() +
xlab("")+ ylab("Injuries") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
axis.text.y = element_text(colour="grey20",size=8))
p <- plot_grid(plot1, plot2, ncol = 2)
save_plot("HealthPlot.png", p,
ncol = 2, # we're saving a grid plot of 2 columns
nrow = 1, # and 2 rows
# each individual subplot should have an aspect ratio of 1.3
base_aspect_ratio = 1.3
)
The top three events that had the greatest economic consequences are FLOOD, HURRICANE/TYPHOON and TORNADO. In particular, FLOOD caused the highest total property damage at over $144 billion and the highest crop damage of $5.66 billion.
plot1 <- ggplot(dfSumEcon, aes(x=EVTYPE, y=TotalPropDamage, fill = EVTYPE)) +
geom_bar(stat="identity") # + scale_fill_manual(values=cbPalette)
plot1 <- plot1 + coord_flip() +
xlab("")+ ylab("Property Damage(mil)") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
axis.text.y = element_text(colour="grey20",size=8))
plot2 <- ggplot(dfSumEcon, aes(x=EVTYPE, y=TotalCropDamage, fill = EVTYPE)) +
geom_bar(stat="identity") # + scale_fill_manual(values=cbPalette)
plot2 <- plot2 + coord_flip() +
xlab("")+ ylab("Crop Damage(mil)") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
axis.text.y = element_text(colour="grey20",size=8))
p <- plot_grid(plot1, plot2, ncol = 2)
save_plot("EconomicPlot.png", p,
ncol = 2, # we're saving a grid plot of 2 columns
nrow = 1, # and 2 rows
# each individual subplot should have an aspect ratio of 1.3
base_aspect_ratio = 1.3
)
print(dfSumHlth)
## EVTYPE TotalFatalities TotalInjuries
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
## 11 WINTER STORM 206 1321
## 12 RIP CURRENTS 204 297
## 13 HEAT WAVE 172 309
## 14 EXTREME COLD 160 231
## 15 THUNDERSTORM WIND 133 1488
## 16 HEAVY SNOW 127 1021
## 17 EXTREME COLD/WIND CHILL 125 24
## 18 STRONG WIND 103 280
## 19 BLIZZARD 101 805
## 20 HIGH SURF 101 152
## 21 HEAVY RAIN 98 251
## 22 EXTREME HEAT 96 155
## 23 COLD/WIND CHILL 95 12
## 24 ICE STORM 89 1975
## 25 WILDFIRE 75 911
## 26 HURRICANE/TYPHOON 64 1275
## 27 THUNDERSTORM WINDS 64 908
## 28 FOG 62 734
## 29 HURRICANE 61 46
## 30 TROPICAL STORM 58 340
print(dfSumEcon)
## EVTYPE TotalPropDamage TotalCropDamage
## 1 FLOOD 144664.7098 5661.96845
## 2 HURRICANE/TYPHOON 69305.8400 2607.87280
## 3 TORNADO 57235.8605 574.95311
## 4 STORM SURGE 43323.5360 0.00500
## 5 FLASH FLOOD 16697.9115 1421.31710
## 6 HAIL 16059.9667 3465.53745
## 7 HURRICANE 11868.3190 2741.91000
## 8 THUNDERSTORM WINDS 7909.1529 282.65070
## 9 TROPICAL STORM 7703.8906 678.34600
## 10 WINTER STORM 6689.4973 26.94400
## 11 HIGH WIND 5305.0463 638.57130
## 12 RIVER FLOOD 5118.9455 5029.45900
## 13 WILDFIRE 4765.1140 295.47280
## 14 STORM SURGE/TIDE 4641.1880 0.85000
## 15 TSTM WIND 4539.9284 554.00735
## 16 ICE STORM 3994.9278 5022.11350
## 17 THUNDERSTORM WIND 3627.1211 414.84305
## 18 HURRICANE OPAL 3172.8460 19.00000
## 19 WILD/FOREST FIRE 3001.8295 106.79683
## 20 HEAVY RAIN/SEVERE WEATHER 2500.0000 0.00000
## 21 TORNADOES, TSTM WIND, HAIL 1600.0000 2.50000
## 22 SEVERE THUNDERSTORM 1205.3600 0.20000
## 23 LIGHTNING 1095.3593 12.09209
## 24 DROUGHT 1046.1060 13972.56600
## 25 HEAVY SNOW 934.2891 134.65310
## 26 HIGH WINDS 706.3237 40.72060
## 27 HEAVY RAIN 694.2481 733.39980
## 28 BLIZZARD 659.2139 112.06000
## 29 WILD FIRES 624.1000 0.00000
## 30 TYPHOON 600.2300 0.82500