Synopsis

Storms and high winds were responsible for the largest number of injuries, fatalities and costs. The majority of this damage took place in eastern states. Something mysterious happened in California!

Data Processing

Read the data file into R.

d<-read.csv(bzfile("repdata%2Fdata%2FStormData.csv.bz2"))

Consider only events that show financial or human costs and discard the columns we are not using:

d <- filter(d,INJURIES>0|FATALITIES>0|PROPDMG>0|CROPDMG>0)
d1<-select(d,STATE,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)

Aggregate the event types into a smaller number of catagories (catagorization is somewhat arbitrary!):

n<-levels(d1$EVTYPE)
wind<-grep(".*HURRICANE|TORNADO|WIND|WND|GUSTNADO.*",n,ignore.case = TRUE)
heat<-grep(".*HEAT|WARM|HOT.*",n,ignore.case = TRUE)
water<-grep(".*FLOOD|WATER|RAIN|TIDE|PRECIPITATION|SHOWERS|WET.*",n,ignore.case = TRUE)
cold<-grep(".*SNOW|COLD|BLIZZARD|COOL|WINTER|ICE|FREEZE|AVALANCHE.*",n,ignore.case = TRUE)
fire<-grep(".*FIRE|SMOKE.*",n,ignore.case = TRUE)
storm<-grep(".*STORM|SMOKE|TYPHOON|WET|RAIN|HAIL|FREEZE|LIGHTNING|TSTM.*",n,ignore.case = TRUE)
sea<-grep(".*CURRENT.*",n,ignore.case = TRUE)
d1<-mutate(d1,Event=EVTYPE)

levels(d1$Event) <- list(Other=n,
    Wind=n[wind],
    Heat=n[heat],
    Water=n[water],
    Cold=n[cold],
    Fire=n[fire],
    Storm=n[storm],
    Sea=n[sea])

Decode the damage values so that they are all expressed in 1000s of dollars (see http://www.nws.noaa.gov/wsom/manual/archives/NF429405.HTML#appendixc):

d1<-mutate(d1,prop=
    ifelse(PROPDMGEXP=="K",PROPDMG, 
          ifelse (PROPDMGEXP=="M",PROPDMG*1000, 
                ifelse(PROPDMGEXP=="B",PROPDMG*1000000, 0))))
d1<-mutate(d1,crop=
    ifelse(CROPDMGEXP=="K",CROPDMG, 
          ifelse (CROPDMGEXP=="M",CROPDMG*1000, 
                ifelse(CROPDMGEXP=="B",CROPDMG*1000000, 0))))

Summarise the data by state and add latitude and longditude centres for each state:

d1<-group_by(d1,Event,STATE)
d3<-summarise(d1,Injuries=sum(INJURIES),Fatalities=sum(FATALITIES), Expenses=sum(crop)+sum(prop))
d3<-arrange(d3,Event,STATE)

#d3<-filter(d3,Fatalities>5|Injuries>25)

data(state)
state.location <- data.frame ("STATE" = state.abb,
                              "Longitude" = state.center$x,
                              "Latitude" = state.center$y)
d4 <- merge(d3, state.location)

Create sets with only the maximum casue for each measure so that we can plot them.

d4<-group_by(d4, STATE)
max_injuries <- filter(d4, Injuries == max(Injuries))
max_fatalities <- filter(d4, Fatalities == max(Fatalities))
max_expenses <- filter(d4, Fatalities == max(Fatalities))

Results

Across all states, the event which caused the most injuries were:

d1<-d1%>%group_by(Event)
across_states<-summarise(d1, Injuries=sum(INJURIES),Fatalities=sum(FATALITIES), Expenses=sum(crop)+sum(prop))%>%arrange(desc(Injuries))
across_states
## # A tibble: 8 × 4
##    Event Injuries Fatalities  Expenses
##   <fctr>    <dbl>      <dbl>     <dbl>
## 1   Wind    93313       6171  82514046
## 2  Storm    21944       2188 184770764
## 3   Heat     9243       3178    924805
## 4  Water     8674       1534 179835593
## 5   Cold     3159        994   3599336
## 6  Other     2058        413  15828881
## 7   Fire     1608         90   8899910
## 8    Sea      529        577       163

The events that caused the most Fataliites were:

arrange(across_states, desc(Fatalities))
## # A tibble: 8 × 4
##    Event Injuries Fatalities  Expenses
##   <fctr>    <dbl>      <dbl>     <dbl>
## 1   Wind    93313       6171  82514046
## 2   Heat     9243       3178    924805
## 3  Storm    21944       2188 184770764
## 4  Water     8674       1534 179835593
## 5   Cold     3159        994   3599336
## 6    Sea      529        577       163
## 7  Other     2058        413  15828881
## 8   Fire     1608         90   8899910

The events that caused the most financial damage (in 1000$) were:

arrange(across_states, desc(Expenses))
## # A tibble: 8 × 4
##    Event Injuries Fatalities  Expenses
##   <fctr>    <dbl>      <dbl>     <dbl>
## 1  Storm    21944       2188 184770764
## 2  Water     8674       1534 179835593
## 3   Wind    93313       6171  82514046
## 4  Other     2058        413  15828881
## 5   Fire     1608         90   8899910
## 6   Cold     3159        994   3599336
## 7   Heat     9243       3178    924805
## 8    Sea      529        577       163
map("state")
points(max_injuries$Longitude+1,
        max_injuries$Latitude,
        pch = 16,
        col = d4$Event,
        cex = log10(max_injuries$Fatalities))

max_fatalities <- filter(d4, Fatalities == max(Fatalities))
max_expenses <- filter(d4, Fatalities == max(Fatalities))

points(max_fatalities$Longitude-1,
        max_fatalities$Latitude,
        pch = 17,
        col = max_fatalities$Event,
        cex = log10(max_fatalities$Injuries))
title("Injuries (Triangles) and Fatalities (Circles)")
legend("topright", legend=levels(max_injuries$Event), col=1:length(max_injuries$Event),pch=16, horiz=TRUE)

The figure shows that the source of injuries was in the eastern states due to wind. Fatalities were more varied in source including water, heat and cold.

map("state")
points(max_expenses$Longitude,
        max_expenses$Latitude,
        pch = 16,
        col = max_expenses$Event,
        cex = log10(max_expenses$Expenses))


title("Cause of the most damage across states")
legend("topright", legend=levels(max_injuries$Event), col=1:length(max_injuries$Event),pch=16, horiz=TRUE)

The figure shows that the majority of the damage was in the east of the country and was wind related. There is a region of the midwest where damage was largely related to cold weather.