Fatalities, Injuries & Financial Consequeces of Storms in the United States

Summary

In this report, we will analyze the impact of storm events in the United States as they effect rhe population physically and the economy. Most specifically, we will summarize which types of events have the most serious consequences on the these categories.

Data Processing

storm <- read.csv("repdata-data-StormData.csv.bz2")
str(storm)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
##  $ BGN_TIME  : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
##  $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
##  $ STATE     : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : Factor w/ 35 levels "","  N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_DATE  : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_TIME  : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WFO       : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ZONENAMES : Factor w/ 25112 levels "","                                                                                                                               "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...
#Convert the cost (PROPDMG) based on the next column value (PROPDMGEXP): ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m" "M"

#converting to millions so those are not changed

storm$PROPDMG[storm$PROPDMGEXP == "B"] <- storm$PROPDMG[storm$PROPDMGEXP == "B"] * (10^3)

storm$PROPDMG[storm$PROPDMGEXP == "K"] <- storm$PROPDMG[storm$PROPDMGEXP == "K"] * (10^(-3))

storm$PROPDMG[storm$PROPDMGEXP == "h" | storm$PROPDMGEXP == "H"] <- storm$PROPDMG[storm$PROPDMGEXP == "h" | storm$PROPDMGEXP == "H"] * (10^(-4))

storm$PROPDMG[storm$PROPDMGEXP == "0"] <- storm$PROPDMG[storm$PROPDMGEXP == "0"] * (10^(-6))

storm$PROPDMG[storm$PROPDMGEXP == "1"] <- storm$PROPDMG[storm$PROPDMGEXP == "1"] * (10^(-5))

storm$PROPDMG[storm$PROPDMGEXP == "2"] <- storm$PROPDMG[storm$PROPDMGEXP == "2"] * (10^(-4))

Summarizing Necessary Data

fatalitiesByType <- as.data.frame(sort(with(storm, tapply(FATALITIES, EVTYPE, sum, na.rm = T)), decreasing = T))

fatalitiesByType <- cbind(rownames(fatalitiesByType), fatalitiesByType)
rownames(fatalitiesByType) <- NULL
colnames(fatalitiesByType) <- c("Type", "Total")

injuriesByType <- as.data.frame(sort(with(storm, tapply(INJURIES, EVTYPE, sum, na.rm = T)), decreasing = T))

injuriesByType <- cbind(rownames(injuriesByType), injuriesByType)
rownames(injuriesByType) <- NULL
colnames(injuriesByType) = c("Type", "Total")

costByType <- as.data.frame(sort(with(storm, tapply(PROPDMG, EVTYPE, sum, na.rm = T)), decreasing = T))

costByType <- cbind(rownames(costByType), costByType)
rownames(costByType) <- NULL
colnames(costByType) <- c("Type", "Total.Costs")
Top Ten Events - By Fatalty:
head(fatalitiesByType, 10)
##              Type Total
## 1         TORNADO  5633
## 2  EXCESSIVE HEAT  1903
## 3     FLASH FLOOD   978
## 4            HEAT   937
## 5       LIGHTNING   816
## 6       TSTM WIND   504
## 7           FLOOD   470
## 8     RIP CURRENT   368
## 9       HIGH WIND   248
## 10      AVALANCHE   224
Top Ten Events - By Inury:
head(injuriesByType, 10)
##                 Type Total
## 1            TORNADO 91346
## 2          TSTM WIND  6957
## 3              FLOOD  6789
## 4     EXCESSIVE HEAT  6525
## 5          LIGHTNING  5230
## 6               HEAT  2100
## 7          ICE STORM  1975
## 8        FLASH FLOOD  1777
## 9  THUNDERSTORM WIND  1488
## 10              HAIL  1361
TOP Ten Events - By Cost:
head(costByType, 10)
##                 Type Total.Costs
## 1              FLOOD      144665
## 2  HURRICANE/TYPHOON       69306
## 3            TORNADO       57102
## 4        STORM SURGE       43324
## 5        FLASH FLOOD       16436
## 6               HAIL       15819
## 7          HURRICANE       11868
## 8     TROPICAL STORM        7704
## 9       WINTER STORM        6688
## 10         HIGH WIND        5305

Results

library(ggplot2)

top_fatalities <- head(fatalitiesByType, 10)
top_injuries <- head(injuriesByType, 10)
top_costs <- head(costByType, 10)

plot1 <- qplot(top_fatalities$Type, top_fatalities$Total, xlab = "Events", ylab = "Number of Fatalities", main = "Top 10 Weather Related Causes of Fatalities") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) + geom_bar(stat = "identity", colour = "deepskyblue4", fill = "deepskyblue3")

plot2 <- qplot(top_injuries$Type, top_injuries$Total, xlab = "Events", ylab = "Number of Injuires", main = "Top 10 Weather Related Causes of Injuries") + theme(axis.text.x = element_text(angle = 60, hjust =1)) + geom_bar(stat = "identity", colour = "darkorange2", fill = "darkorange1")

plot3 <- qplot(top_costs$Type, top_costs$Total.Costs, xlab = "Events", ylab = "Costs in Millions (US Dollars)", main = "Top 10 Most Expensive Weather Events") + theme(axis.text.x = element_text(angle = 60, hjust = 1)) + geom_bar(stat = "identity", colour = "green4", fill = "green3")

plot1
plot2
plot3

plot of chunk unnamed-chunk-7plot of chunk unnamed-chunk-7plot of chunk unnamed-chunk-7