This document provides an analysis of weather events’ impact in being most harmful to population or having the greatest economic consequences. The paper analyzises weather events in USA between 1950 and end in November 2011.

Data Processing

Load Data and group data

Raw data is being loaded and summed by event type and unit measurement in case of economic impact.

#Load raw data
data = read.csv("repdata-data-StormData.csv")

# load injuries and fatalities data
injuryData = aggregate(data$INJURIES, list(data$EVTYPE), sum)
fatalData = aggregate(data$FATALITIES, list(data$EVTYPE), sum)

# load property and crop damage data
propData = aggregate(data$PROPDMG, list(data$EVTYPE, data$PROPDMGEXP), sum)
cropData = aggregate(data$CROPDMG, list(data$EVTYPE, data$CROPDMGEXP), sum)

rm(data)

Transform and regroup data

All data with zeros is being removed. Since weather events sometimes contain the same event using various string casing, all events are transformed to upper case and regrouped again. In case of economic impact, data is multiplied by thousands, millions and billions depending on measurement unit.

# preprocess injuries data
injuryData[,3] = lapply(injuryData[1], toupper)
names(injuryData) = c('name', 'value', 'upName')
# filter out zeroes
injuryData$value<-as.numeric(injuryData$value) 
injuryData = injuryData[injuryData$value > 0,]
# regroup by upper case event type
injuryData = aggregate(injuryData$value, list(injuryData$upName), sum)
names(injuryData) = c('name', 'injuries')

# preprocess fatalities data
fatalData[,3] = lapply(fatalData[1], toupper)
names(fatalData) = c('name', 'value', 'upName')
# filter out zeroes
fatalData$value<-as.numeric(fatalData$value) 
fatalData = fatalData[fatalData$value > 0,]
# regroup by upper case event type
fatalData = aggregate(fatalData$value, list(fatalData$upName), sum)
names(fatalData) = c('name', 'fatalities')

# preprocess property data
names(propData) = c('name', 'mul', 'value')
# filter out zeroes
propData = propData[propData$value > 0,]

propData[,4] = lapply(propData[1], toupper) # upper case name
propData[,5] = lapply(propData[2], toupper) # upper case mul labels to deal with k and K
propData[,6] = 1 # x1 multiplication
propData[,7] = 0 # flattened values
names(propData) = c('name', 'mulLabel', 'value', 'nameUp', 'mulLabelUp', 'mul', 'flatValue')

# flatten the data
propData[propData$mulLabelUp == 'K',6] = 1000
propData[propData$mulLabelUp == 'M',6] = 1000000
propData[propData$mulLabelUp == 'B',6] = 1000000000
propData$flatValue = propData$mul * propData$value
propData = aggregate(propData$flatValue, list(propData$nameUp), sum)
names(propData) = c('name', 'propertyDamage')

# preprocess crop data
names(cropData) = c('name', 'mul', 'value')
# filter out zeroes
cropData = cropData[cropData$value > 0,]

cropData[,4] = lapply(cropData[1], toupper) # upper case name
cropData[,5] = lapply(cropData[2], toupper) # upper case mul labels to deal with k and K
cropData[,6] = 1 # x1 multiplication
cropData[,7] = 0 # flattened values
names(cropData) = c('name', 'mulLabel', 'value', 'nameUp', 'mulLabelUp', 'mul', 'flatValue')

# flatten the data
cropData[cropData$mulLabelUp == 'K',6] = 1000
cropData[cropData$mulLabelUp == 'M',6] = 1000000
cropData[cropData$mulLabelUp == 'B',6] = 1000000000
cropData$flatValue = cropData$mul * cropData$value
cropData = aggregate(cropData$flatValue, list(cropData$nameUp), sum)
names(cropData) = c('name', 'cropDamage')

Plot 3 weather events with the most impact

Economic data has been transformed depending upon it’s value to make it graph more readable.

# plot human casualties
library(ggplot2)
humanData = merge(fatalData, injuryData, all = T)
humanData = humanData[order(-humanData$fatalities, -humanData$injuries),]
humanData = humanData[1:3,]
humanData[,1] = as.factor(humanData$name)

fatalities = data.frame(name = humanData[1], value=humanData[2], type = "Fatalities")
injuries = data.frame(name = humanData[1], value=humanData[3], type = "Injuries")
names(injuries) = c('name', 'value',  'type')
names(fatalities) = c('name', 'value',  'type')
total <- rbind(fatalities, injuries)

ggplot(data=total, aes(x=name, y=value, group=type)) + 
    geom_line(aes(color=type)) +
    xlab("Weather event") +
    ylab("Number of people") +
    ggtitle("Top 3 weather event with most human casualties (Figure 1)")

# plot economy impact
economyData = merge(propData, cropData, all = T)
economyData = economyData[order(-economyData$propertyDamage, -economyData$cropDamage),]
economyData = economyData[1:3,]
economyData[,1] = as.factor(economyData$name)

propertyDamage = data.frame(name = economyData[1], value=economyData[2], type = "Property damage")
cropDamage = data.frame(name = economyData[1], value=economyData[3], type = "Crops damage")
names(cropDamage) = c('name', 'value',  'type')
names(propertyDamage) = c('name', 'value',  'type')
total <- rbind(propertyDamage, cropDamage)

unit = ''
# normalize
maxValue = max(total$value)
if(maxValue > 1000000000){
    unit = '(Billions)'
    total$value = total$value / 1000000000
} else if(maxValue > 1000000){
    unit = '(Millions)'
    total$value = total$value / 1000000
} else if(maxValue > 1000){
    unit = '(Thousands)'
    total$value = total$value / 1000
}

ggplot(data=total, aes(x=name, y=value, group=type)) + 
    geom_line(aes(color=type)) +
    xlab("Weather event") +
    ylab(paste("U.S. dollars ", unit)) +
    ggtitle("Top 3 weather event with most economic impact (Figure 2)")

Results

According to Figure 1, the top most destructive weather events to human population are flash tornadoes, excessive heat and tornadoes, whereas the weather event which takes the most casualties is tornado, taking over 5,000 lives and near 100,000 injuries.

According to Figure 2, the top most destructive weather events to economy are floods, hurricanes and tornadoes, whereas the weather event which has the most impact is flood, causing over 144 Billions damage to properties and near 6 Billions to crops.