The US National Oceanic and Atmospheric Administration has created a database recording natural events in the United States and the damages they have caused both in properties and humans.
The five main features considered in this report are:
1. EVTYPE -Type of event
2. FATALITIES - Number of fatalities
3. INJURIES - Number of injuries
4. PROPDMG - Measured damage to properties
5. CROPDMG - Measured damage to crops
6. PROPDMGEXP - intensity of damage to properties
(expressed as multiplier)
7. CROPDMGEXP - intensity of damage to crops (expressed
as multiplier)
Calculations were done to be able to make variables that could be used later on in plotting:
library("ggplot2")
library("gridExtra")
dat = read.csv("repdata_data_StormData.csv")
# Calculation for Number of Fatalities and Injuries for each Event
fat = aggregate(dat$FATALITIES, list(dat$EVTYPE), sum)
inj = aggregate(dat$INJURIES, list(dat$EVTYPE), sum)
fat = fat[order(fat$x, decreasing = TRUE),]
inj = inj[order(inj$x, decreasing = TRUE),]
fat = fat[1:10,]
inj = inj[1:10,]
dat$PROPDMGEQ = ""
dat$CROPDMGEQ = ""
The measured property and crop damages were transformed to be able to reflect actual values based on the measured values and intensities expressed in the data. These were transformed as follows:
# Conversion of the expressions to actual calculated amounts
for (i in 1:length(dat$PROPDMG)){
if (dat$PROPDMGEXP[i] == 'k' | dat$PROPDMGEXP[i] == 'K'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i]* 1000
}
else if (dat$PROPDMGEXP[i] == 'M' | dat$PROPDMGEXP[i] == 'm'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i]* 1000000
}
else if (dat$PROPDMGEXP[i] == 'b' | dat$PROPDMGEXP[i] == 'B'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i]* 1000000000
}
else if (dat$PROPDMGEXP[i] == '0'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i]
}
else if (dat$PROPDMGEXP[i] == '5'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *100000
}
else if (dat$PROPDMGEXP[i] == '6'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *1000000
}
else if (dat$PROPDMGEXP[i] == '4'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *10000
}
else if (dat$PROPDMGEXP[i] == '2'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *100
}
else if (dat$PROPDMGEXP[i] == '3'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *1000
}
else if (dat$PROPDMGEXP[i] == '7'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *10000000
}
else if (dat$PROPDMGEXP[i] == '1'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *10
}
else if (dat$PROPDMGEXP[i] == '8'){
dat$PROPDMGEQ[i] = dat$PROPDMG[i] *100000000
}
else {
dat$PROPDMGEQ[i] = dat$PROPDMG[i]
}
}
for (i in 1:length(dat$CROPDMG)){
if (dat$CROPDMGEXP[i] == 'k' | dat$CROPDMGEXP[i] == 'K'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i]* 1000
}
else if (dat$CROPDMGEXP[i] == 'M' | dat$CROPDMGEXP[i] == 'm'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i]* 1000000
}
else if (dat$CROPDMGEXP[i] == 'b' | dat$CROPDMGEXP[i] == 'B'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i]* 1000000000
}
else if (dat$CROPDMGEXP[i] == 'h' | dat$CROPDMGEXP[i] == 'H'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i]* 100
}
else if (dat$CROPDMGEXP[i] == '0'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i]
}
else if (dat$CROPDMGEXP[i] == '5'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *100000
}
else if (dat$CROPDMGEXP[i] == '6'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *1000000
}
else if (dat$CROPDMGEXP[i] == '4'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *10000
}
else if (dat$CROPDMGEXP[i] == '2'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *100
}
else if (dat$CROPDMGEXP[i] == '3'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *1000
}
else if (dat$CROPDMGEXP[i] == '7'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *10000000
}
else if (dat$CROPDMGEXP[i] == '1'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *10
}
else if (dat$CROPDMGEXP[i] == '8'){
dat$CROPDMGEQ[i] = dat$CROPDMG[i] *100000000
}
else {
dat$CROPDMGEQ[i] = dat$CROPDMG[i]
}
}
# Calculation of the total damages of each event
dat$PROPDMGEQ = as.double(dat$PROPDMGEQ)
dat$CROPDMGEQ = as.double(dat$CROPDMGEQ)
propag = aggregate(dat$PROPDMGEQ, list(dat$EVTYPE), sum)
cropag = aggregate(dat$CROPDMGEQ, list(dat$EVTYPE), sum)
propag = propag[order(propag$x, decreasing = TRUE),]
cropag = cropag[order(cropag$x, decreasing = TRUE),]
propag = propag[1:10,]
cropag = cropag[1:10,]
The results were plotted as follows:
p <- ggplot(fat, aes(reorder(Group.1, -x),x, fill=Group.1)) + geom_bar(stat = "identity")
p <- p + scale_fill_discrete(name = "Event Types")
p <- p + ggtitle("Top 10 Most Fatal Events") + xlab("Events") + ylab("No. of Fatalities")
p1 <- ggplot(inj, aes(reorder(Group.1, -x),x, fill=Group.1)) + geom_bar(stat = "identity")
p1 <- p1 + scale_fill_discrete(name = "Event Types")
p1 <- p1 + ggtitle("Top 10 Events with the Most Injuries") + xlab("Events") + ylab("No. of Injuries")
grid.arrange(p, p1, nrow=2)
p2 <- ggplot(propag, aes(reorder(Group.1, -x),x, fill=Group.1)) + geom_bar(stat = "identity")
p2 <- p2 + scale_fill_discrete(name = "Event Types")
p2 <- p2 + ggtitle("Top 10 Events with the Most Property Damage Recorded") + xlab("Events") + ylab("Cost of Property Damage ($)")
p3 <- ggplot(cropag, aes(reorder(Group.1, -x),x, fill=Group.1)) + geom_bar(stat = "identity")
p3 <- p3 + scale_fill_discrete(name = "Event Types")
p3 <- p3 + ggtitle("Top 10 Events with the Most Damage Done to Crops") + xlab("Events") + ylab("Cost of Crop Damage ($)")
grid.arrange(p2, p3, nrow=2)
The results show that the top 10 weather events wreck the most havoc. Concerned government officials can prepare for disasters accordingly based on the ranking of damages on property and human life. Most states could prepare for these events based on where they live and can set their priorities based on this report.