The weather events are harmful to the communities and municipalities on public health and economics. I try to find out the most harmful one on each aspect. First, I separately calculate fatalities and injuries on types of events. The result shows that Tornado is the most harmful one with both the highest fatalities and injuries. Second, I calculate property damage and crop damage. It shows that Tornado is the most harmful one with highest property damage and Drought is most harmful on crop damage.
I unzip and load data into R by function “bunzip2” and “read.csv”
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
## R.utils v2.4.0 (2016-09-13) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:RevoMods':
##
## timestamp
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
bunzip2("repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv")
StormData <- read.csv("repdata_data_StormData.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string
Texts of some rows are in bad format, they are a sentence, it will be hard to use them, because it takes much time to parse the text and some of them are lack of monetary value. Hence, I decide to remove them and only use those with exact monetary value. Only first 547363 rows are in usable format, so I subset them from StormData
StormData_monetary <- StormData[1:547363, ]
The class of the variables are “factor”, to use them, I make several transformation: Transform EVTYPE from class “factor” to “character”; Transform FATALITIES & INJURIES from class “factor” to “numeric”; Transform PROPDMG & CROPDMG from class “factor” to “numeric”
StormData_monetary$EVTYPE <- as.character(StormData_monetary$EVTYPE)
StormData_monetary$FATALITIES <- as.numeric(as.character(StormData_monetary$FATALITIES))
StormData_monetary$INJURIES <- as.numeric(as.character(StormData_monetary$INJURIES))
StormData_monetary$PROPDMG <- as.numeric(as.character(StormData_monetary$PROPDMG))
StormData_monetary$CROPDMG <- as.numeric(as.character(StormData_monetary$CROPDMG))
For PROPDMG and CROPDMG, the alphabatical character in PROPDMG and CROPDMG represents the magnitude of the values, so I multiply the values with their magnitude
StormData_num <- as.numeric(nrow(StormData_monetary))
for (i in 1:StormData_num) {
if (StormData_monetary[i,"PROPDMGEXP"] == "K") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "M") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "B") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000000000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "m") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "+") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 0
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "0") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "5") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "6") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10000000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "?") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 0
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "4") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "2") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "3") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "h") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "7") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10000000
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "H") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "-") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 0
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "1") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10
}
else if (StormData_monetary[i,"PROPDMGEXP"] == "8") {
StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100000000
}
}
for (j in 1:StormData_num) {
if (StormData_monetary[j,"CROPDMGEXP"] == "K") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "M") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000000
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "B") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000000000
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "m") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000000
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "?") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 0
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "0") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "k") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000
}
else if (StormData_monetary[j,"CROPDMGEXP"] == "2") {
StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 100
}
}
I calculate sum of FATALITIES and INJURIES based on EVTYPE
population_health_FATALITIES <- aggregate(FATALITIES ~ EVTYPE, StormData_monetary, FUN = sum)
population_health_INJURIES <- aggregate(INJURIES ~ EVTYPE, StormData_monetary, FUN = sum)
And sort them in decreasing order, extract the biggest 10 of them
FATALITIES_order <- population_health_FATALITIES[order(population_health_FATALITIES$FATALITIES, decreasing = T), ]
FATALITIES_order10 <- FATALITIES_order[1:10, ]
INJURIES_order <- population_health_INJURIES[order(population_health_INJURIES$INJURIES, decreasing = T), ]
INJURIES_order10 <- INJURIES_order[1:10, ]
Then I plot them in one figure and show the most harmful event with its fatalities and injuries
library(ggplot2)
library(gridExtra)
g1 <- ggplot(FATALITIES_order10, aes(EVTYPE))
g2 <- ggplot(INJURIES_order10, aes(EVTYPE))
grid.arrange(g1 + geom_bar(aes(weight = FATALITIES)) + coord_flip() + ggtitle("Highest Fatalities"), g2 + geom_bar(aes(weight = INJURIES)) + coord_flip() + ggtitle("Highest Injuries"), nrow = 2)
From the figure, it shows that Tornado is the highest in both fatalities and injuries. Then I do the same to PROPDMG and CROPDMG
property_damage <- aggregate(PROPDMG ~ EVTYPE, StormData_monetary, FUN = sum)
crop_damage <- aggregate(CROPDMG ~ EVTYPE, StormData_monetary, FUN = sum)
property_order <- property_damage[order(property_damage$PROPDMG, decreasing = T), ]
property_order10 <- property_order[1:10, ]
crop_order <- crop_damage[order(crop_damage$CROPDMG, decreasing = T), ]
crop_order10 <- crop_order[1:10, ]
g3 <- ggplot(property_order10, aes(EVTYPE))
g4 <- ggplot(crop_order10, aes(EVTYPE))
grid.arrange(g3 + geom_bar(aes(weight = PROPDMG)) + coord_flip() + ggtitle("Highest Property Damamge"), g4 + geom_bar(aes(weight = CROPDMG)) + coord_flip() + ggtitle("Highest Crop Damamge"), nrow = 2)
From the figure, it shows that Tornado is the highest in property damage and Drought is the highest in crop damage