The NOAA storm data are analyzed here to give a sense of the severity of typical events for various types of weather. Some events happen more often in some areas, while other events are very infrequent, so using averages (means) in this report helps us get a sense of what to expect if we know the typical number of events of a given type of weather in a given locale.
The analysis of the NOAA storm data shows that heat, hurricanes/tropical storms, ice and tornados are the weather events most harmful to human health. In general, weather events cause more injuries than fatalities, though the opposite is true for cold temperatures. The most economically expensive events are ice, hurricanes/tropical storms, fire and floods. Ice might not seem as dramatic as a hurricane, but it can have devastating effects on crops, which are counted in the tally of economic losses.
To begin, the data is loaded into R for processing. Each of the almost 1 million rows in the data set represents a single storm, and we are interested in the storm event types and their associated fatalities, injuries, property damage and crop damage, which we can extract.
storm_data <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
storm_damage <- storm_data[,c(8,23,24,25,26,27,28)]
Then we group data manually into 14 categories of interest by searching the event type for appropriate keywords. For example, for “THUNDERSTORM”, we include lightning. The data contain many repeated entries due to typos or similar words used to describe phenomena. Here, we reclassify the data into broader categories as well. For example, the category of “SNOW” includes snow squalls and combinations of snow and wind.
#THUNDERSTORM
toMatch = c("THUNDER", "TSTM", "hunder", "LIGHTNING", "lightning", "Lightning")
thunder_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
thunder_fatalities <- mean(thunder_data$FATALITIES)
thunder_injuries <- mean(thunder_data$INJURIES)
thunder_harm <- cbind(event="THUNDERSTORM", fatalities = thunder_fatalities, injuries = thunder_injuries)
#HEAT
toMatch = c("HEAT", "eat")
heat_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
heat_fatalities <- mean(heat_data$FATALITIES)
heat_injuries <- mean(heat_data$INJURIES)
heat_harm <- cbind(event="HEAT", fatalities = heat_fatalities, injuries = heat_injuries)
#COLD
toMatch = c("COLD", "cold", "Cold")
cold_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
cold_fatalities <- mean(cold_data$FATALITIES)
cold_injuries <- mean(cold_data$INJURIES)
cold_harm <- cbind(event="COLD", fatalities = cold_fatalities, injuries = cold_injuries)
#ICE
toMatch = c("ICE", "ice", "Ice", "FROST", "rost")
ice_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
ice_fatalities <- mean(ice_data$FATALITIES)
ice_injuries <- mean(ice_data$INJURIES)
ice_harm <- cbind(event="ICE", fatalities = ice_fatalities, injuries = ice_injuries)
#HAIL
toMatch = c("HAIL", "Hail", "hail")
hail_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
hail_fatalities <- mean(hail_data$FATALITIES)
hail_injuries <- mean(hail_data$INJURIES)
hail_harm <- cbind(event="HAIL", fatalities = hail_fatalities, injuries = hail_injuries)
#FLOOD
toMatch = c("FLOOD", "Flood", "flood")
flood_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
flood_fatalities <- mean(flood_data$FATALITIES)
flood_injuries <- mean(flood_data$INJURIES)
flood_harm <- cbind(event="FLOOD", fatalities = flood_fatalities, injuries= flood_injuries)
#SNOW (and winter storms and wintry mix precipitation)
toMatch = c("SNOW", "Snow", "snow", "MIX", "mix")
snow_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
snow_fatalities <- mean(snow_data$FATALITIES)
snow_injuries <- mean(snow_data$INJURIES)
snow_harm <- cbind(event="SNOW", fatalities = snow_fatalities, injuries = snow_injuries)
#RAIN
toMatch = c("RAIN", "rain", "Rain")
rain_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
rain_fatalities <- mean(rain_data$FATALITIES)
rain_injuries <- mean(rain_data$INJURIES)
rain_harm <- cbind(event="RAIN", fatalities = rain_fatalities, injuries = rain_injuries)
#HURRICANE and tropical storms
toMatch = c("HURRICANE", "urricane", "TROPICAL", "ropical")
hurricane_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
hurricane_fatalities <- mean(hurricane_data$FATALITIES)
hurricane_injuries <- mean(hurricane_data$INJURIES)
hurricane_harm <- cbind(event="HURRICANE/TROPICAL STORM", fatalities = hurricane_fatalities, injuries = hurricane_injuries)
#TORNADO
toMatch = c("TORNADO", "ornado")
tornado_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
tornado_fatalities <- mean(tornado_data$FATALITIES)
tornado_injuries <- mean(tornado_data$INJURIES)
tornado_harm <- cbind(event="TORNADO", fatalities = tornado_fatalities, injuries = tornado_injuries)
#DROUGHT
toMatch = c("DROUGHT", "rought")
drought_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
drought_fatalities <- mean(drought_data$FATALITIES)
drought_injuries <- mean(drought_data$INJURIES)
drought_harm <- cbind(event="DROUGHT", fatalities = drought_fatalities, injuries = drought_injuries)
#FIRE
toMatch = c("Fire", "FIRE", "fire")
fire_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
fire_fatalities <- mean(fire_data$FATALITIES)
fire_injuries <- mean(fire_data$INJURIES)
fire_harm <- cbind(event="FIRE", fatalities = fire_fatalities, injuries = fire_injuries)
#SURF (dangerous surf and high seas)
toMatch = c("SURF", "SEAS", "Seas", "seas", "urf")
surf_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
surf_fatalities <- mean(surf_data$FATALITIES)
surf_injuries <- mean(surf_data$INJURIES)
surf_harm <- cbind(event="SURF", fatalities = surf_fatalities, injuries = surf_injuries)
#WIND
toMatch = c("WIND", "Wind", "wind")
wind_data <- storm_damage[grep(paste(toMatch,collapse="|"),storm_damage$EVTYPE),]
wind_fatalities <- mean(wind_data$FATALITIES)
wind_injuries <- mean(wind_data$INJURIES)
wind_harm <- cbind(event="WIND", fatalities = wind_fatalities, injuries = wind_injuries)
We’ve calculated the mean number of fatalities and injuries for each storm event and now combine the data into a data set that has the events and their injuries or fatalities.
harm <- rbind(thunder_harm, snow_harm, heat_harm, cold_harm, ice_harm, hail_harm, flood_harm, rain_harm, surf_harm, fire_harm, drought_harm, tornado_harm, hurricane_harm, wind_harm)
harm <- as.data.frame(harm)
harm[,2] <- as.numeric(as.character(harm[,2]))
harm[,3] <- as.numeric(as.character(harm[,3]))
We must also calculate the economic costs of events in terms of property damage and damage to crops. The data contains a base value and an “exponent” to give the order of magnitude of the cost (e.g, “K” is “thousands”). Therefore, the total losses to property and crops have to be determined by multiplying the base value and a value determined by the “exponent”. For simplicity, the damage to property and crops are grouped together.
factors <- list("K"=1000, "k"=1000, "M"=1000000, "m"=1000000, "B"=1000000000)
cost <- as.data.frame(matrix(nrow=0, ncol=2))
names(cost) <- c("names", "cost")
#COLD
amount = 0
for(i in 1:length(cold_data[,1])){
base1 <- cold_data[i,4]
base2 <- cold_data[i,6]
f1 <- as.character(cold_data[i,5])
f2 <- as.character(cold_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(cold_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="COLD", cost=amount))
#DROUGHT
amount = 0
for(i in 1:length(drought_data[,1])){
base1 <- drought_data[i,4]
base2 <- drought_data[i,6]
f1 <- as.character(drought_data[i,5])
f2 <- as.character(drought_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(drought_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="DROUGHT", cost=amount))
#FIRE
amount = 0
for(i in 1:length(fire_data[,1])){
base1 <- fire_data[i,4]
base2 <- fire_data[i,6]
f1 <- as.character(fire_data[i,5])
f2 <- as.character(fire_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(fire_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="FIRE", cost=amount))
#FLOOD
amount = 0
for(i in 1:length(flood_data[,1])){
base1 <- flood_data[i,4]
base2 <- flood_data[i,6]
f1 <- as.character(flood_data[i,5])
f2 <- as.character(flood_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(flood_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="FLOOD", cost=amount))
#HAIL
amount = 0
for(i in 1:length(hail_data[,1])){
base1 <- hail_data[i,4]
base2 <- hail_data[i,6]
f1 <- as.character(hail_data[i,5])
f2 <- as.character(hail_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(hail_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="HAIL", cost=amount))
#HEAT
amount = 0
for(i in 1:length(heat_data[,1])){
base1 <- heat_data[i,4]
base2 <- heat_data[i,6]
f1 <- as.character(heat_data[i,5])
f2 <- as.character(heat_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(heat_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="HEAT", cost=amount))
#HURRICANE/TROPICAL STORM
amount = 0
for(i in 1:length(hurricane_data[,1])){
base1 <- hurricane_data[i,4]
base2 <- hurricane_data[i,6]
f1 <- as.character(hurricane_data[i,5])
f2 <- as.character(hurricane_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(hurricane_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="HURRICANE/TROPICAL STORM", cost=amount))
#ICE
amount = 0
for(i in 1:length(ice_data[,1])){
base1 <- ice_data[i,4]
base2 <- ice_data[i,6]
f1 <- as.character(ice_data[i,5])
f2 <- as.character(ice_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(ice_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="ICE", cost=amount))
#RAIN
amount = 0
for(i in 1:length(rain_data[,1])){
base1 <- rain_data[i,4]
base2 <- rain_data[i,6]
f1 <- as.character(rain_data[i,5])
f2 <- as.character(rain_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(rain_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="RAIN", cost=amount))
#SNOW
amount = 0
for(i in 1:length(snow_data[,1])){
base1 <- snow_data[i,4]
base2 <- snow_data[i,6]
f1 <- as.character(snow_data[i,5])
f2 <- as.character(snow_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(snow_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="SNOW", cost=amount))
#SURF
amount = 0
for(i in 1:length(surf_data[,1])){
base1 <- surf_data[i,4]
base2 <- surf_data[i,6]
f1 <- as.character(surf_data[i,5])
f2 <- as.character(surf_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(surf_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="SURF", cost=amount))
#THUNDER
amount = 0
for(i in 1:length(thunder_data[,1])){
base1 <- thunder_data[i,4]
base2 <- thunder_data[i,6]
f1 <- as.character(thunder_data[i,5])
f2 <- as.character(thunder_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(thunder_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="THUNDERSTORM", cost=amount))
#TORNADO
amount = 0
for(i in 1:length(tornado_data[,1])){
base1 <- tornado_data[i,4]
base2 <- tornado_data[i,6]
f1 <- as.character(tornado_data[i,5])
f2 <- as.character(tornado_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(tornado_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="TORNADO", cost=amount))
#WIND
amount = 0
for(i in 1:length(wind_data[,1])){
base1 <- wind_data[i,4]
base2 <- wind_data[i,6]
f1 <- as.character(wind_data[i,5])
f2 <- as.character(wind_data[i,7])
if(is.numeric(factors[[f1]])){
factor1 = factors[[f1]]
} else {
factor1 = 0 #Excluding unclear data
}
if(is.numeric(factors[[f2]])){
factor1 = factors[[f2]]
} else {
factor2 = 0 #Excluding unclear data
}
amount <- amount + (base1*factor1)+(base2*factor2)
}
amount <- amount/length(wind_data[,1]) #gives the mean cost per event
cost <- rbind(cost, cbind(names="WIND", cost=amount))
#round the cost to the near dollar value
cost[,2] <- format(round(as.numeric(as.character(cost[,2]))),nsmall=2)
names(cost) <- c("event","cost")
This histogram compares the average injuries and fatalities that occur for different storm events. It shows that the most harmful events are heat, hurricanes/tropical storms, ice, and tornados. Heat especially causes the most injuries and the most deaths per heat event.
library(reshape2)
harm.molten <- melt(harm, value.name="count", variable.name="harm")
## Using event as id variables
library(ggplot2)
q <- qplot(x=event, y=count, fill=harm, data=harm.molten, geom="bar", position="dodge", stat="identity")
q+ theme(axis.text.x = element_text(angle=90, hjust = 1)) + labs(title="Average Injuries and Fatalities Per Weather Event")
The following histogram shows the economic costs of weather events, which includes damage to property and crops. In this case, most weather events cause several thousands of dollars, with the most costy being ice, hurricanes/tropical storms, fire, and flooding. The cost is in US dollars.
q <- qplot(x=event, y=cost, data=cost, geom="bar", position="dodge", stat="identity")
q+ ylab("cost per event (US dollars)") + theme(axis.text.x = element_text(angle=90, hjust = 1)) + labs(title="Cost Per Weather Event")