Synopsis

I analyzed the U.S. National Oceanic and Atmospheric Administration (NOAA) data from 1950 to end of November 2011 which has 902297 storm event entries. The project goals were to understand across the United States:

The top three most harmful events for population health were TORNADO, EXCESSIVE HEAT and FLASH FLOOD. The most harmful event was TORNADO with total of 5633 fatalities and 91346 injuries.

The top three events that had the greatest economic consequences are FLOOD, HURRICANE/TYPHOON and TORNADO. In particular, FLOOD caused the highest total property damage at over $144 billion and the highest crop damage of $5.66 billion.

Data Processing

Load and initialize the required libraries

library(plyr)
library(dplyr)
library(ggplot2)
library(lubridate)
library(gridExtra)
require(cowplot)

Load the data with read.csv() and understand the profile of the data

gz <- "repdata-data-StormData.csv.bz2"

df <- read.csv(gz, stringsAsFactors=FALSE)
df <- tbl_df(df)
df
## Source: local data frame [902,297 x 37]
## 
##    STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1        1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2        1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3        1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4        1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5        1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6        1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
## 7        1 11/16/1951 0:00:00     0100       CST      9     BLOUNT    AL
## 8        1  1/22/1952 0:00:00     0900       CST    123 TALLAPOOSA    AL
## 9        1  2/13/1952 0:00:00     2000       CST    125 TUSCALOOSA    AL
## 10       1  2/13/1952 0:00:00     2000       CST     57    FAYETTE    AL
## ..     ...                ...      ...       ...    ...        ...   ...
## Variables not shown: EVTYPE (chr), BGN_RANGE (dbl), BGN_AZI (chr),
##   BGN_LOCATI (chr), END_DATE (chr), END_TIME (chr), COUNTY_END (dbl),
##   COUNTYENDN (lgl), END_RANGE (dbl), END_AZI (chr), END_LOCATI (chr),
##   LENGTH (dbl), WIDTH (dbl), F (int), MAG (dbl), FATALITIES (dbl),
##   INJURIES (dbl), PROPDMG (dbl), PROPDMGEXP (chr), CROPDMG (dbl),
##   CROPDMGEXP (chr), WFO (chr), STATEOFFIC (chr), ZONENAMES (chr), LATITUDE
##   (dbl), LONGITUDE (dbl), LATITUDE_E (dbl), LONGITUDE_ (dbl), REMARKS
##   (chr), REFNUM (dbl)

Summarize using ddply the fatalities and injuries

dfSumHlth <- ddply(df, c("EVTYPE"), summarise, TotalFatalities = sum(FATALITIES), 
               TotalInjuries = sum(INJURIES))
dfSumHlth <- arrange(dfSumHlth,-TotalFatalities)

dfSumHlth <- dfSumHlth[1:30,]
dfSumHlth$EVTYPE <- factor(dfSumHlth$EVTYPE, levels = dfSumHlth$EVTYPE)

Summarize using ddply the economic impacts. Note that the units need to be adjusted when summarizing

df$propDmgMil <- 0
df$cropDmgMil <- 0

# Convert everything into millions
# "K" for thousands, "M" for millions, and "B" for billions
fixUnits <- function(dfRow) { 
  crop <- dfRow$CROPDMG[[1]]  # this is a dataframe column
  prop <- dfRow$PROPDMG[[1]]
  if (crop > 0 ) {
    exp <- dfRow$CROPDMGEXP[[1]]
    if (exp == "K") { crop <- crop/1000.0}
    if (exp == "B") { crop <- crop * 1000}
    dfRow$cropDmgMil <- crop
  }
  if (prop > 0 ) {
    exp <- dfRow$PROPDMGEXP[[1]]
    if (exp == "K") { prop <- prop/1000.0}
    if (exp == "B") { prop <- prop * 1000}
    dfRow$propDmgMil <- prop
  }
  dfRow
}
dfNew <- ddply(df, .(EVTYPE, CROPDMG, CROPDMGEXP, PROPDMG, PROPDMGEXP,propDmgMil, cropDmgMil), fixUnits)
dfSumEcon <- ddply(dfNew, c("EVTYPE"), summarise, TotalPropDamage = sum(propDmgMil), TotalCropDamage = sum(cropDmgMil)) 
dfSumEcon <- arrange(dfSumEcon,-TotalPropDamage)
numFactors <- nrow(dfSumEcon)
if (numFactors > 30) numFactors <- 30

dfSumEcon  <- dfSumEcon[1:numFactors,]
dfSumEcon$EVTYPE <- factor(dfSumEcon$EVTYPE, levels = dfSumEcon$EVTYPE)

Results

Population Health Impacts

The top three most harmful events for population health were TORNADO, EXCESSIVE HEAT and FLASH FLOOD. The most harmful event was TORNADO with total of 5633 fatalities and 91346 injuries.

plot1 <- ggplot(dfSumHlth, aes(x=EVTYPE, y=TotalFatalities, fill = EVTYPE)) +
  geom_bar(stat="identity") #  +  scale_fill_manual(values=cbPalette)
plot1 <-  plot1  +  coord_flip() + 
  xlab("")+ ylab("Fatalities") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
                                       axis.text.y = element_text(colour="grey20",size=8))

plot2 <- ggplot(dfSumHlth, aes(x=EVTYPE, y=TotalInjuries, fill = EVTYPE)) +
  geom_bar(stat="identity") #  +  scale_fill_manual(values=cbPalette)
plot2 <-  plot2  +  coord_flip()  + 
  xlab("")+ ylab("Injuries") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
                                     axis.text.y = element_text(colour="grey20",size=8))

p <- plot_grid(plot1, plot2, ncol = 2)
save_plot("HealthPlot.png", p,
          ncol = 2, # we're saving a grid plot of 2 columns
          nrow = 1, # and 2 rows
          # each individual subplot should have an aspect ratio of 1.3
          base_aspect_ratio = 1.3
          )

Population Health Impact

Economic Impacts

The top three events that had the greatest economic consequences are FLOOD, HURRICANE/TYPHOON and TORNADO. In particular, FLOOD caused the highest total property damage at over $144 billion and the highest crop damage of $5.66 billion.

plot1 <- ggplot(dfSumEcon, aes(x=EVTYPE, y=TotalPropDamage, fill = EVTYPE)) +
  geom_bar(stat="identity") #  +  scale_fill_manual(values=cbPalette)
plot1 <-  plot1  +  coord_flip() + 
  xlab("")+ ylab("Property Damage(mil)") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8),
                                                  axis.text.y = element_text(colour="grey20",size=8))


plot2 <- ggplot(dfSumEcon, aes(x=EVTYPE, y=TotalCropDamage, fill = EVTYPE)) +
  geom_bar(stat="identity") #  +  scale_fill_manual(values=cbPalette)
plot2 <-  plot2  +  coord_flip()  + 
  xlab("")+ ylab("Crop Damage(mil)") + theme(legend.position="none", axis.text.x = element_text(colour="grey20",size=8), 
                                             axis.text.y = element_text(colour="grey20",size=8))

p <- plot_grid(plot1, plot2, ncol = 2)
save_plot("EconomicPlot.png", p,
          ncol = 2, # we're saving a grid plot of 2 columns
          nrow = 1, # and 2 rows
          # each individual subplot should have an aspect ratio of 1.3
          base_aspect_ratio = 1.3
          )

Economic impact

Table - Population Health Impacts

print(dfSumHlth)
##                     EVTYPE TotalFatalities TotalInjuries
## 1                  TORNADO            5633         91346
## 2           EXCESSIVE HEAT            1903          6525
## 3              FLASH FLOOD             978          1777
## 4                     HEAT             937          2100
## 5                LIGHTNING             816          5230
## 6                TSTM WIND             504          6957
## 7                    FLOOD             470          6789
## 8              RIP CURRENT             368           232
## 9                HIGH WIND             248          1137
## 10               AVALANCHE             224           170
## 11            WINTER STORM             206          1321
## 12            RIP CURRENTS             204           297
## 13               HEAT WAVE             172           309
## 14            EXTREME COLD             160           231
## 15       THUNDERSTORM WIND             133          1488
## 16              HEAVY SNOW             127          1021
## 17 EXTREME COLD/WIND CHILL             125            24
## 18             STRONG WIND             103           280
## 19                BLIZZARD             101           805
## 20               HIGH SURF             101           152
## 21              HEAVY RAIN              98           251
## 22            EXTREME HEAT              96           155
## 23         COLD/WIND CHILL              95            12
## 24               ICE STORM              89          1975
## 25                WILDFIRE              75           911
## 26       HURRICANE/TYPHOON              64          1275
## 27      THUNDERSTORM WINDS              64           908
## 28                     FOG              62           734
## 29               HURRICANE              61            46
## 30          TROPICAL STORM              58           340

Table - Economic Impacts (in millions of $$)

print(dfSumEcon)
##                        EVTYPE TotalPropDamage TotalCropDamage
## 1                       FLOOD     144664.7098      5661.96845
## 2           HURRICANE/TYPHOON      69305.8400      2607.87280
## 3                     TORNADO      57235.8605       574.95311
## 4                 STORM SURGE      43323.5360         0.00500
## 5                 FLASH FLOOD      16697.9115      1421.31710
## 6                        HAIL      16059.9667      3465.53745
## 7                   HURRICANE      11868.3190      2741.91000
## 8          THUNDERSTORM WINDS       7909.1529       282.65070
## 9              TROPICAL STORM       7703.8906       678.34600
## 10               WINTER STORM       6689.4973        26.94400
## 11                  HIGH WIND       5305.0463       638.57130
## 12                RIVER FLOOD       5118.9455      5029.45900
## 13                   WILDFIRE       4765.1140       295.47280
## 14           STORM SURGE/TIDE       4641.1880         0.85000
## 15                  TSTM WIND       4539.9284       554.00735
## 16                  ICE STORM       3994.9278      5022.11350
## 17          THUNDERSTORM WIND       3627.1211       414.84305
## 18             HURRICANE OPAL       3172.8460        19.00000
## 19           WILD/FOREST FIRE       3001.8295       106.79683
## 20  HEAVY RAIN/SEVERE WEATHER       2500.0000         0.00000
## 21 TORNADOES, TSTM WIND, HAIL       1600.0000         2.50000
## 22        SEVERE THUNDERSTORM       1205.3600         0.20000
## 23                  LIGHTNING       1095.3593        12.09209
## 24                    DROUGHT       1046.1060     13972.56600
## 25                 HEAVY SNOW        934.2891       134.65310
## 26                 HIGH WINDS        706.3237        40.72060
## 27                 HEAVY RAIN        694.2481       733.39980
## 28                   BLIZZARD        659.2139       112.06000
## 29                 WILD FIRES        624.1000         0.00000
## 30                    TYPHOON        600.2300         0.82500