This data has been analysed to show the various types of storm events that have the most impact on human health and economic consequences. Record numbers have been reduced by considering only those data from 1/1/1996 onwards when all event type criteria were used. A subset of the data are used for analysis which with 45 event types have been matched Table 2.1.1. This is considered to be a representative sample of the full dataset. The analysis shows that tornado, excessive heat and flood have the highest deaths and injuries. Economic loss also leads with tornado and flood.
Libraries to be used are loaded.
library(stringr)
library(dplyr)
library(lubridate)
library(xtable)
library(ggplot2)
The data are loaded from repdata-data-StormData.csv.bz2, and additionally a text file of official event types has been created by copying and pasting from Table 2.1.1 in NATIONAL WEATHER SERVICE INSTRUCTION 10-1605.
storm <- read.csv("repdata-data-StormData.csv.bz2")
table_event <- read.csv("StormDataEventTable.csv")
table_event$Designator <- str_sub(table_event$Event.Name,start = -1)
table_event$Event.Name <- gsub('.{2}$', '', table_event$Event.Name)
table_event$Event.Name <- str_to_upper(trimws(table_event$Event.Name, which = "both"))
storm$begin <-parse_date_time(storm$BGN_DATE, "%m/%d/%Y %H:%M:%S")
storm$year <- year(storm$begin)
storm1 <- select(storm, c(STATE__, BGN_DATE, COUNTY,STATE, EVTYPE, END_DATE, FATALITIES,
INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP, begin, year))
# filter out data 1995 and earlier as non-comparative data.
storm1$cutoff <- parse_date_time("1995-12-31","Ymd")
storm1 <- filter(storm1, begin > cutoff)
storm1$EVTYPE1 <- str_to_upper(as.character(trimws(storm1$EVTYPE, "both")))
# filter to exclude all events without fatalities or damage
storm1$damage <- storm1$FATALITIES+storm1$INJURIES+storm1$PROPDMG+storm1$CROPDMG
damage_event <- storm1[which(storm1$damage>0),]
table_event$Event.Name1 <- as.character(table_event$Event.Name)
damage_event$EVTYPE1 <- as.character(damage_event$EVTYPE1)
# match elements of table 2.1.1 with data event names
damage_event$mapped <- NA
for(i in 1:nrow(table_event)){
for(j in 1:nrow(damage_event)){
if (table_event$Event.Name[i] == damage_event$EVTYPE1[j])
damage_event$mapped[j] <- table_event$Event.Name[i]
}
}
# reduce dataset to obsevations with event types mapped from Table 2.1.1
good <- complete.cases((damage_event$mapped))
damage_event_mg <- damage_event[good,]
Crop and property damages ($) are combined, and injuries and deaths (counts) are combined for each weather event. The *EXP fields for each of CROP and PROP are recoded as: -H/h multiplied by 100 (hundred) -K/k multiplied by 1000 (thousand) -M/m multiplied by 1000000 (million) -B/b multiplied by 1000000000 (billion)
The totals for prop/crop damage are converted to $ millions for ease of reading in tables and plots.
damage_event_mg$PROPDMGEXP<-as.character(damage_event_mg$PROPDMGEXP)
for(i in 1:nrow(damage_event_mg)){
if(damage_event_mg$PROPDMGEXP[i]=="H"|damage_event_mg$PROPDMGEXP[i]=="h")
damage_event_mg$propcost[i] <- damage_event_mg$PROPDMG[i]*100
if(damage_event_mg$PROPDMGEXP[i]=="K"|damage_event_mg$PROPDMGEXP[i]=="k")
damage_event_mg$propcost[i] <- damage_event_mg$PROPDMG[i]*1000
if(damage_event_mg$PROPDMGEXP[i]=="M"|damage_event_mg$PROPDMGEXP[i]=="m")
damage_event_mg$propcost[i] <- damage_event_mg$PROPDMG[i]*1000000
if(damage_event_mg$PROPDMGEXP[i]=="B"|damage_event_mg$PROPDMGEXP[i]=="b")
damage_event_mg$propcost[i] <- damage_event_mg$PROPDMG[i]*1000000000
}
damage_event_mg$CROPDMGEXP<-as.character(damage_event_mg$CROPDMGEXP)
for(i in 1:nrow(damage_event_mg)){
if(damage_event_mg$CROPDMGEXP[i]=="H"|damage_event_mg$CROPDMGEXP[i]=="h")
damage_event_mg$cropcost[i] <- damage_event_mg$CROPDMG[i]*100
if(damage_event_mg$CROPDMGEXP[i]=="K"|damage_event_mg$CROPDMGEXP[i]=="k")
damage_event_mg$cropcost[i] <- damage_event_mg$CROPDMG[i]*1000
if(damage_event_mg$CROPDMGEXP[i]=="M"|damage_event_mg$CROPDMGEXP[i]=="m")
damage_event_mg$cropcost[i] <- damage_event_mg$CROPDMG[i]*1000000
if(damage_event_mg$CROPDMGEXP[i]=="B"|damage_event_mg$CROPDMGEXP[i]=="b")
damage_event_mg$cropcost[i] <- damage_event_mg$CROPDMG[i]*1000000000
}
damage_event_mg$cropProp <- damage_event_mg$cropcost+damage_event_mg$propcost
damage_event_mg$casualties <-damage_event_mg$FATALITIES + damage_event_mg$INJURIES
The full dataset has 902297 records. After reducing by select (by appropriate variables) and filter (by date), the dataset has 201318 rows. Further reduction to keep only records matching Table 2.1.1 brought the final dataset down to 135671 records, a sample of more than half the date-filtered data.
by_event <- group_by(damage_event_mg,mapped)
Ev_sum <- summarise(by_event,sum(casualties),sum(cropProp)/1000000 )
Ev_sum1 <- as.data.frame(Ev_sum)
names(Ev_sum1) <- c("Event_Type","Casualties","Economic_Loss_millions")
Ev_sum1_cas <- arrange(Ev_sum1, desc(Casualties))
Ev_sum1_cas <- select(Ev_sum1_cas, Event_Type, Casualties)
Ev_sum1_Ec <- arrange(Ev_sum1, desc(Economic_Loss_millions))
Ev_sum1_Ec <- select(Ev_sum1_Ec, Event_Type, Economic_Loss_millions)
TopTenCas <- slice(Ev_sum1_cas, 1:10)
TopTenEc <- slice(Ev_sum1_Ec,1:10)
The table of events by casualties shows the highest number is due to Tornado with over 22,000 people injured or killed in the period 1 January 1996 to 30 November 2011. Following that is excessive heat and flood. These figures are an underestimate of the total numbers but do give a relative comparison for the various types of events.
xt <- xtable(TopTenCas)
print(xt, type="html", decimal.mark=",")
Event_Type | Casualties | |
---|---|---|
1 | TORNADO | 22178 |
2 | EXCESSIVE HEAT | 8188 |
3 | FLOOD | 7172 |
4 | LIGHTNING | 4792 |
5 | FLASH FLOOD | 2561 |
6 | THUNDERSTORM WIND | 1530 |
7 | WINTER STORM | 1483 |
8 | HEAT | 1459 |
9 | HIGH WIND | 1318 |
10 | WILDFIRE | 986 |
These figures are also shown in the plot of casualties.
barplot(TopTenCas$Casualties, names.arg = TopTenCas$Event_Type, xlab= "Event type",
ylab="Count of casualties", main="Casualties by Event Type")
The table of events by economic loss shows the leading causes are flood, tornado and hail. Again, these figures are an underestimate of the total numbers but do give a relative comparison for the various types of events.
xt <- xtable(TopTenEc)
print(xt, type="html", decimal.mark=",")
Event_Type | Economic_Loss_millions | |
---|---|---|
1 | FLOOD | 149087.72 |
2 | TORNADO | 25263.73 |
3 | HAIL | 18211.89 |
4 | FLASH FLOOD | 17005.10 |
5 | DROUGHT | 14455.35 |
6 | TROPICAL STORM | 8331.13 |
7 | HIGH WIND | 6031.79 |
8 | WILDFIRE | 5082.34 |
9 | STORM SURGE/TIDE | 4642.38 |
10 | THUNDERSTORM WIND | 3780.99 |
These figures are also shown in the plot of economic losses.
barplot(TopTenEc$Economic_Loss_millions, names.arg = TopTenEc$Event_Type,xlab=
"Event type", ylab="Economic losses ($millions)", main="Economic Losses by Event Type")
Priorities for government spending should look at flood mitigation measures (eg levees, township locations) and early warning sytems for tornadoes and hail. Education and infrastructure to assist people to cope with heat would be useful.