Overview:


In this document we shall examine the Storm data set from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This this data set contains data regarding adverse weather activities in the United States over the years. It notes both human (fatalities) and economic effects of such weather across multiple states. It also provides us detailed geocoding like latitiude and longitude, azimuths etc for detalied granular analysis of the effects of such storms.



Data Processing

We injest the data set from the csv file. Due to the large number of states, we have aggregated the states into regions as demarcated in the R states data set. This allows us for more concise and informative presentation of data. Also, weather is a regional phoenomenom and a study of weather patterns by region is extremely useful. This would also allow state officials to coordinate their state’s effort with other states in the region on a more regional basis. We have also converted the data to Quarterly segments. In addition to the regional data, this gives us a more concise and informative overview of the summary data.

Event Data is gathered from multiple sources. Therefore the coding of such events is quite disparate. For the purpose of our analysis we have performed a reasonable clean up of the Event Data. This helps in gaining better quality of Event and Fatality and Damage related data.

library(datasets)
library(plyr)
library(dplyr)  
library(lubridate)
library(tidyr)
library(ggplot2)
data(state)

setwd("~/GitHub/ReproducibleResearch/Storm")
stormsR <- read.csv("repdata-data-StormData.csv.bz2", nrows=-1)
stormsR$Date <- as.Date(sapply(strsplit(as.character(stormsR$BGN_DATE),' '),function(x) x[[1]]),format = '%m/%d/%Y')
stormsR$Year <- year(stormsR$Date)
stormsR$Month <- months.Date(stormsR$Date)
stormsR$MonthF <- factor(stormsR$Month,levels=month.name)
stormsR$MonthN <- as.integer(stormsR$MonthF)

# Filter Storms data with Valid States/Month/Damage or Fatality data
storms <- stormsR[(stormsR$STATE %in% state.abb & !is.na(stormsR$MonthN)) & 
    (stormsR$FATALITIES > 0 | stormsR$PROPDMG > 0 | stormsR$CROPDMG > 0) &
        (stormsR$PROPDMGEXP %in% c("T","M","B")) 
         ,]


# Recode storms data
storms$EVTYPE_R <- as.character(storms$EVTYPE)
storms$EVTYPE_R[grepl("TSTM",storms$EVTYPE_R) ] <- "THUNDERSTORM"
storms$EVTYPE_R[grepl("THUNDERSTORMS",storms$EVTYPE_R) ] <- "THUNDERSTORM"
storms$EVTYPE_R[grepl("HEAT",storms$EVTYPE_R) ] <- "HEAT"
storms$EVTYPE_R[grepl("WARM",storms$EVTYPE_R) ] <- "HEAT"
storms$EVTYPE_R[grepl("FLOOD",storms$EVTYPE_R) ] <- "FLOODING"
storms$EVTYPE_R[grepl("WIND",storms$EVTYPE_R) ] <- "HIGH WINDS"
storms$EVTYPE_R[grepl("WINDS",storms$EVTYPE_R) ] <- "HIGH WINDS"
storms$EVTYPE_R[grepl("COLD",storms$EVTYPE_R) ] <- "COLD"
storms$EVTYPE_R[grepl("HAIL",storms$EVTYPE_R) ] <- "HAIL"
storms$EVTYPE_R[grepl("HAILSTORM",storms$EVTYPE_R) ] <- "HAIL"
storms$EVTYPE_R[grepl("SNOW",storms$EVTYPE_R) ] <- "SNOW"
storms$EVTYPE_R[grepl("Freeze",storms$EVTYPE_R) ] <- "COLD"
storms$EVTYPE_R[grepl("FREEZE",storms$EVTYPE_R) ] <- "COLD"

storms$EVTYPE <- as.factor(storms$EVTYPE_R)

EVTYPECounts <- ddply(.data = storms,.variables =c("EVTYPE"),
               .fun = function(xx,var) {
                 c(Total= length(xx$Year),TotFatalities = sum(xx$FATALITIES)
                 ) 
               }
)

# Storm Fatalities by Recoded Events
EVTYPECounts_20 <- head(arrange(EVTYPECounts,desc(TotFatalities)),20)
colnames(EVTYPECounts_20) <- c("Event Type", "Event Count", "Fatality Count")
print(knitr::kable(EVTYPECounts_20,caption = "Top 20 Events By Fatalities"))

# Remove raw data from memory
rm(stormsR)

# Prepare regional tables
# join by interval and weekday + MeanSteps
state_divisions <- as.data.frame(cbind(state.abb,as.character(state.division)))
colnames(state_divisions) <- c("STATE","Region")

storms <- merge(x = storms, y = state_divisions, by = "STATE", all.x=TRUE)

storms$Quarter <- ifelse((as.integer(storms$MonthN) < 4), "Q1", ifelse((as.integer(storms$MonthN) < 7), "Q2", ifelse((as.integer(storms$MonthN) < 10), "Q3", "Q4")))
y_storms <- table(storms$Year)
m_storms <- table(storms$MonthF)
s_storms <- table(storms$STATE)

# Create Lookup Table
dataCounts <- ddply(.data = storms,.variables =c("Quarter","Region"),
               .fun = function(xx,var) {
                 c(Total= length(xx$Year)
                 ) 
               }
)

s1 <- tidyr::spread(dataCounts,Quarter,Total)
print(knitr::kable(s1,caption = "Events - Counts By Regional Divisions"))

# Initial activity by state
## 
## 
## Table: Top 20 Events By Fatalities
## 
## Event Type            Event Count   Fatality Count
## -------------------  ------------  ---------------
## TORNADO                      4474             3742
## FLOODING                     3128              514
## HIGH WINDS                    882               94
## HURRICANE/TYPHOON              50               61
## WILDFIRE                      145               55
## THUNDERSTORM                  546               54
## BLIZZARD                       44               42
## TROPICAL STORM                101               34
## HURRICANE                      72               29
## WINTER STORM                  139               25
## HEAT                            9               21
## ICE STORM                     156               20
## LANDSLIDE                      32               19
## SNOW                          132               14
## STORM SURGE                    44               13
## STORM SURGE/TIDE               10               11
## COLD                           20               10
## FOG                             4                8
## GLAZE                           2                6
## WINTER WEATHER/MIX              1                6
## 
## 
## Table: Events - Counts By Regional Divisions
## 
## Region                 Q1     Q2    Q3    Q4
## -------------------  ----  -----  ----  ----
## East North Central    216    943   505   134
## East South Central    341    620   171   162
## Middle Atlantic       141    262   432    76
## Mountain               60    163   181    48
## New England            57     64    98    55
## Pacific               212     65    93   207
## South Atlantic        462    529   530   197
## West North Central    214   1366   548   170
## West South Central    367    956   256   381



Computational Assumption
Computing fatalities is straightforward, whislt computing storm damages relies on the coding of the Exponential. There is no direct documentation of the coding. However research points us to this quote “NWS damage surveys, newspaper clipping services, the insurance industry and the general public, among others.” at https://www.ncdc.noaa.gov/stormevents/faq.jsp. We approxiamte the codes M to be millions,T to be thousands and B to be Billions. Such codes are widely used and we therefore adopt that standard.

We are unsure of the other codes, so we have excluded them from our computation.

# Computation for damage estimates
damageEst <- function (Amt, Exp) {
  TotalAmt = sum(Amt * 
              ifelse(as.character(Exp) %in% c('T'),.1,
                        ifelse(as.character(Exp) %in% c('M'),1,
                                    ifelse(as.character(Exp) %in% c('B'),1000,0)))
                )  

  TotalAmt
}



Results

We lay out in tables below Fatalitiy counts and Damage estimates by Region and Quarter. Also presented is a a grouping of the top two Events causing Fatalities per region and in another table their resulting damage estimates. Also for visual purposes we present graphs of the geographical length and width of Events. Since data shows that Tornadoes are the most dangerous event, we also plot their magnitude by region.

The Appendix contains analysis for a single State (Alabama). Each state manager can plug their states letters to obtain analysis for their own state.

# Compute Fatalities by Region and Quarter
stormsF <- storms %>% group_by(Region,Quarter)  %>%
                   summarise(TotFatalities = sum(FATALITIES)) 

stormsD <- storms %>% group_by(Region,Quarter)  %>%
                   summarise(TotDamage = sum(damageEst(PROPDMG, PROPDMGEXP) + damageEst(CROPDMG, CROPDMGEXP))) 

Region_Fatalities <- tidyr::spread(stormsF,Quarter, TotFatalities)
colnames(Region_Fatalities) <- c("Region", "Q1(F)", "Q2(F)","Q3(F)", "Q4(F)")
Region_Damage <- tidyr::spread(stormsD,Quarter, TotDamage)
colnames(Region_Damage) <- c("Region", "Q1(D)", "Q2(D)","Q3(D)", "Q4(D)")
Region <- merge(x = Region_Fatalities, y = Region_Damage, by = "Region", all.x=TRUE)

print(knitr::kable(Region,caption = "Regional - Quarterly Fatalities & Damage Costs"))

stormsQF <- storms %>% group_by(Region,Quarter,EVTYPE)  %>%
                   summarise(TotFatalities = sum(FATALITIES)) %>%
                   top_n(2,TotFatalities) 

stormsQF_spread <- tidyr::spread(stormsQF,Quarter, TotFatalities)

print(knitr::kable(stormsQF_spread,caption = "Regional - Quarterly Fatalities By Top 2 Event Types",na.print = ""))

stormsQD <- storms %>% group_by(Region,Quarter,EVTYPE)  %>%
                   summarise(TotDamage = sum(damageEst(PROPDMG, PROPDMGEXP)+ damageEst(CROPDMG, CROPDMGEXP))) %>%
                    top_n(2,TotDamage)   

stormsQD_spread <- tidyr::spread(stormsQD,Quarter, TotDamage)

print(knitr::kable(stormsQD_spread,caption = "Regional - Quarterly Damage Estimates By Top 2 Event Types"))

# Plot that gives us an idea of the dispersion of inclement weather incidents in each region
ggplot(data = storms, aes(x=LENGTH,y=WIDTH))  +facet_wrap(~Region) + geom_point() + ggtitle("Geographical Length and Width of Events By Region")

ggplot(data = storms, aes(x=MAG,y=Region))  + geom_point() + ggtitle("Magnitude of Tornadoes in Each Region")

ggplot(data = storms, aes(x=FATALITIES,y=Region))  + geom_point() + ggtitle("Fatalities By Region")

## 
## 
## Table: Regional - Quarterly Fatalities & Damage Costs
## 
## Region                Q1(F)   Q2(F)   Q3(F)   Q4(F)       Q1(D)      Q2(D)      Q3(D)      Q4(D)
## -------------------  ------  ------  ------  ------  ----------  ---------  ---------  ---------
## East North Central       34     539     117      59     1939.80   12264.04   14751.73    1044.89
## East South Central      290     598      34     131     8060.77   15525.56   31585.51    1464.06
## Middle Atlantic          49      78      65      13     1108.99    3843.13    6639.95     483.89
## Mountain                 14       2      15       9     1354.10    3422.47    2624.87    3113.18
## New England               7     102      10      12      727.26     969.60    1958.30     489.64
## Pacific                  33      10      13      82   117981.71     873.95    1261.82    5923.91
## South Atlantic          304     158     161      58     7492.53    3036.77   35313.07   17245.48
## West North Central       75     545      64      46     1706.83   22135.33    5600.68    1211.79
## West South Central      226     653      44     144     3561.71   20945.07   65453.31    6498.69
## 
## 
## Table: Regional - Quarterly Fatalities By Top 2 Event Types
## 
## Region               EVTYPE                Q1    Q2   Q3    Q4
## -------------------  ------------------  ----  ----  ---  ----
## East North Central   FLOODING               7    15   25    NA
## East North Central   HIGH WINDS            NA    NA   NA     7
## East North Central   TORNADO               17   518   62    52
## East South Central   FLOODING              28    24   10    NA
## East South Central   HIGH WINDS            NA    NA   NA     2
## East South Central   HURRICANE/TYPHOON     NA    NA   16    NA
## East South Central   TORNADO              253   570   NA   127
## Middle Atlantic      BLIZZARD              17    NA   NA    NA
## Middle Atlantic      FLOODING              13    26   51    NA
## Middle Atlantic      HIGH WINDS            NA    NA   NA     2
## Middle Atlantic      TORNADO               NA    50    8    11
## Mountain             BLIZZARD               3    NA   NA     7
## Mountain             FLOODING               3     1    9     1
## Mountain             HIGH WINDS            NA    NA   NA     1
## Mountain             TORNADO                4     1    4    NA
## Mountain             WINTER STORM           3    NA   NA    NA
## New England          FLOODING               2     5    4     7
## New England          HIGH WINDS             4    NA   NA    NA
## New England          TORNADO               NA    97    6     3
## Pacific              FLOODING              20     3    4    NA
## Pacific              HIGH WINDS             7    NA    8    NA
## Pacific              LANDSLIDE             NA    NA   NA    14
## Pacific              TORNADO               NA     6   NA    NA
## Pacific              WILDFIRE              NA    NA   NA    30
## South Atlantic       FLOODING              NA    16   58    NA
## South Atlantic       HIGH WINDS            NA    NA   NA    10
## South Atlantic       HURRICANE/TYPHOON     NA    NA   35    NA
## South Atlantic       THUNDERSTORM          25    NA   NA    NA
## South Atlantic       TORNADO              227   141   NA    37
## West North Central   BLIZZARD               4    NA   NA    NA
## West North Central   FLOODING               4    16    7    17
## West North Central   HEAT                  NA    NA    7    NA
## West North Central   TORNADO               63   523   46    23
## West South Central   FLOODING              NA    46   12    42
## West South Central   STORM SURGE/TIDE      NA    NA   11    NA
## West South Central   TORNADO              195   578   NA    93
## West South Central   WILDFIRE              13    NA   NA    NA
## 
## 
## Table: Regional - Quarterly Damage Estimates By Top 2 Event Types
## 
## Region               EVTYPE                      Q1        Q2         Q3         Q4
## -------------------  ------------------  ----------  --------  ---------  ---------
## East North Central   FLOODING                    NA   3597.85   12341.39         NA
## East North Central   HAIL                        NA        NA         NA     202.20
## East North Central   TORNADO                 449.16   5930.86    1198.05     603.20
## East North Central   WINTER STORM            806.34        NA         NA         NA
## East South Central   FLOODING                    NA   6880.66         NA         NA
## East South Central   HIGH WINDS                  NA        NA         NA     113.63
## East South Central   HURRICANE/TYPHOON           NA        NA   18650.70         NA
## East South Central   STORM SURGE                 NA        NA   11263.00         NA
## East South Central   TORNADO                1702.92   7736.31         NA    1199.80
## East South Central   WINTER STORM           5004.90        NA         NA         NA
## Middle Atlantic      FLOODING                578.95   2295.28    5363.40     168.11
## Middle Atlantic      HIGH WINDS                  NA        NA         NA     114.23
## Middle Atlantic      SNOW                    140.91        NA         NA         NA
## Middle Atlantic      TORNADO                     NA   1376.40     771.70         NA
## Mountain             BLIZZARD                103.50        NA         NA         NA
## Mountain             FLOODING               1075.45        NA     542.86      72.53
## Mountain             HAIL                        NA    852.40     807.94    2954.55
## Mountain             WILD/FOREST FIRE            NA   1543.90         NA         NA
## New England          FLOODING                287.46    210.69    1453.30      91.83
## New England          ICE STORM               346.62        NA         NA         NA
## New England          TORNADO                     NA    725.90     378.50     252.50
## Pacific              FLOODING             117139.36        NA     113.48    1273.71
## Pacific              HEAVY RAIN              344.80        NA         NA         NA
## Pacific              LANDSLIDE                   NA    104.94         NA         NA
## Pacific              WILD/FOREST FIRE            NA        NA     921.69         NA
## Pacific              WILDFIRE                    NA    535.40         NA    2784.72
## South Atlantic       FLOODING                    NA    517.81         NA         NA
## South Atlantic       HURRICANE                   NA        NA    7108.23         NA
## South Atlantic       HURRICANE OPAL              NA        NA         NA    3109.00
## South Atlantic       HURRICANE/TYPHOON           NA        NA   19462.24   10200.00
## South Atlantic       THUNDERSTORM           1616.70        NA         NA         NA
## South Atlantic       TORNADO                3751.35   2060.74         NA         NA
## West North Central   FLOODING                148.98   7488.21    1234.61         NA
## West North Central   ICE STORM                   NA        NA         NA     216.03
## West North Central   TORNADO                1166.30   9955.61    1550.54     454.60
## West South Central   FLOODING                    NA        NA         NA    1856.08
## West South Central   HAIL                   1055.30        NA         NA         NA
## West South Central   HURRICANE/TYPHOON           NA        NA   23311.06         NA
## West South Central   STORM SURGE                 NA        NA   31738.52         NA
## West South Central   TORNADO                1158.80   6287.34         NA    2059.35
## West South Central   TROPICAL STORM              NA   5184.00         NA         NA



Appendix

library(plyr)
library(dplyr)  
state_divs <- state_divisions[order(state_divisions$Region),]
print(knitr::kable(state_divs,caption = "States In Each Region"))

stormsF_State <- storms %>% group_by(STATE)  %>%
                   summarise(TotFatalities = sum(FATALITIES)) 

print(knitr::kable(stormsF_State,caption = "Fatalities By State"))

# Compute Fatalities by State and Quarter
stormsState_F <- storms %>% filter(STATE %in% c("AL")) %>% group_by(Quarter,EVTYPE)  %>%
                   summarise(TotFatalities = sum(FATALITIES)) %>%   top_n(5,TotFatalities) 

stormsState_D <- storms %>% filter(STATE %in% c("AL")) %>% group_by(Quarter,EVTYPE)  %>%
                   summarise(TotDamage = sum(damageEst(PROPDMG, PROPDMGEXP) + damageEst(CROPDMG, CROPDMGEXP))) %>% top_n(5,TotDamage) 

State_Fatalities <- tidyr::spread(stormsState_F,Quarter, TotFatalities)
#colnames(State_Fatalities) <- c("Q1(F)", "Q2(F)","Q3(F)", "Q4(F)")

State_Damage <- tidyr::spread(stormsState_D,Quarter, TotDamage)
#colnames(State_Damage) <- c("Q1(D)", "Q2(D)","Q3(D)", "Q4(D)")


print(knitr::kable(State_Fatalities,caption = "State - Quarterly Fatalities"))
print(knitr::kable(State_Damage,caption = "State - Quarterly DamageEstimates"))
## 
## 
## Table: States In Each Region
## 
##      STATE   Region             
## ---  ------  -------------------
## 13   IL      East North Central 
## 14   IN      East North Central 
## 22   MI      East North Central 
## 35   OH      East North Central 
## 49   WI      East North Central 
## 1    AL      East South Central 
## 17   KY      East South Central 
## 24   MS      East South Central 
## 42   TN      East South Central 
## 30   NJ      Middle Atlantic    
## 32   NY      Middle Atlantic    
## 38   PA      Middle Atlantic    
## 3    AZ      Mountain           
## 6    CO      Mountain           
## 12   ID      Mountain           
## 26   MT      Mountain           
## 28   NV      Mountain           
## 31   NM      Mountain           
## 44   UT      Mountain           
## 50   WY      Mountain           
## 7    CT      New England        
## 19   ME      New England        
## 21   MA      New England        
## 29   NH      New England        
## 39   RI      New England        
## 45   VT      New England        
## 2    AK      Pacific            
## 5    CA      Pacific            
## 11   HI      Pacific            
## 37   OR      Pacific            
## 47   WA      Pacific            
## 8    DE      South Atlantic     
## 9    FL      South Atlantic     
## 10   GA      South Atlantic     
## 20   MD      South Atlantic     
## 33   NC      South Atlantic     
## 40   SC      South Atlantic     
## 46   VA      South Atlantic     
## 48   WV      South Atlantic     
## 15   IA      West North Central 
## 16   KS      West North Central 
## 23   MN      West North Central 
## 25   MO      West North Central 
## 27   NE      West North Central 
## 34   ND      West North Central 
## 41   SD      West North Central 
## 4    AR      West South Central 
## 18   LA      West South Central 
## 36   OK      West South Central 
## 43   TX      West South Central 
## 
## 
## Table: Fatalities By State
## 
## STATE    TotFatalities
## ------  --------------
## AK                   3
## AL                 413
## AR                 273
## AZ                   3
## CA                  89
## CO                  16
## CT                  10
## DE                   4
## FL                 222
## GA                 185
## HI                   0
## IA                  69
## ID                   1
## IL                 139
## IN                 150
## KS                 125
## KY                  99
## LA                  90
## MA                 106
## MD                   9
## ME                   4
## MI                 193
## MN                  86
## MO                 366
## MS                 272
## MT                   1
## NC                 140
## ND                  31
## NE                  37
## NH                   6
## NJ                  25
## NM                   3
## NV                   4
## NY                  58
## OH                 189
## OK                 166
## OR                  24
## PA                 122
## RI                   0
## SC                  42
## SD                  16
## TN                 269
## TX                 538
## UT                   9
## VA                  51
## VT                   5
## WA                  22
## WI                  78
## WV                  28
## WY                   3
## 
## 
## Table: State - Quarterly Fatalities
## 
## EVTYPE                  Q1    Q2   Q3   Q4
## ---------------------  ---  ----  ---  ---
## FLOODING                 4     2    2    0
## HAIL                     0     0   NA   NA
## HEAVY RAIN              NA    NA    2   NA
## HEAVY RAIN/LIGHTNING    NA     0   NA   NA
## HIGH SURF               NA    NA   NA    0
## HIGH WINDS               0     1    0    2
## HURRICANE               NA    NA    2   NA
## HURRICANE ERIN          NA    NA    0   NA
## HURRICANE OPAL          NA    NA   NA    0
## HURRICANE/TYPHOON       NA    NA    0   NA
## ICE STORM                0    NA   NA    1
## LIGHTNING               NA     0    0   NA
## STORM SURGE/TIDE        NA    NA    0   NA
## THUNDERSTORM             0     0   NA   NA
## TORNADO                 33   307    1   52
## TROPICAL STORM          NA     0    0   NA
## WINTER STORM             4    NA   NA   NA
## 
## 
## Table: State - Quarterly DamageEstimates
## 
## EVTYPE                    Q1        Q2        Q3      Q4
## ------------------  --------  --------  --------  ------
## FLOODING              351.50   1020.75     80.80      NA
## HAIL                      NA      6.55        NA      NA
## HEAVY RAIN                NA        NA     55.00      NA
## HIGH SURF                 NA        NA        NA     8.0
## HIGH WINDS                NA      7.70     79.00   110.0
## HURRICANE                 NA        NA    242.16      NA
## HURRICANE OPAL            NA        NA        NA    52.0
## HURRICANE/TYPHOON         NA        NA   3646.50      NA
## ICE STORM               3.81        NA        NA    14.4
## LIGHTNING                 NA      7.10        NA      NA
## THUNDERSTORM            4.50        NA        NA      NA
## TORNADO               479.25   5029.39        NA   632.7
## WINTER STORM         5000.00        NA        NA      NA