Most Harmful Events in The US on Population Health and Economics

Synopsis

The weather events are harmful to the communities and municipalities on public health and economics. I try to find out the most harmful one on each aspect. First, I separately calculate fatalities and injuries on types of events. The result shows that Tornado is the most harmful one with both the highest fatalities and injuries. Second, I calculate property damage and crop damage. It shows that Tornado is the most harmful one with highest property damage and Drought is most harmful on crop damage.

Data Processing

I unzip and load data into R by function “bunzip2” and “read.csv”

library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.21.0 (2016-10-30) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.4.0 (2016-09-13) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:RevoMods':
## 
##     timestamp
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
bunzip2("repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv")
StormData <- read.csv("repdata_data_StormData.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec =
## dec, : EOF within quoted string

Texts of some rows are in bad format, they are a sentence, it will be hard to use them, because it takes much time to parse the text and some of them are lack of monetary value. Hence, I decide to remove them and only use those with exact monetary value. Only first 547363 rows are in usable format, so I subset them from StormData

StormData_monetary <- StormData[1:547363, ]

The class of the variables are “factor”, to use them, I make several transformation: Transform EVTYPE from class “factor” to “character”; Transform FATALITIES & INJURIES from class “factor” to “numeric”; Transform PROPDMG & CROPDMG from class “factor” to “numeric”

StormData_monetary$EVTYPE <- as.character(StormData_monetary$EVTYPE)
StormData_monetary$FATALITIES <- as.numeric(as.character(StormData_monetary$FATALITIES))
StormData_monetary$INJURIES <- as.numeric(as.character(StormData_monetary$INJURIES))
StormData_monetary$PROPDMG <- as.numeric(as.character(StormData_monetary$PROPDMG))
StormData_monetary$CROPDMG <- as.numeric(as.character(StormData_monetary$CROPDMG))

For PROPDMG and CROPDMG, the alphabatical character in PROPDMG and CROPDMG represents the magnitude of the values, so I multiply the values with their magnitude

StormData_num <- as.numeric(nrow(StormData_monetary))
for (i in 1:StormData_num) {
  if (StormData_monetary[i,"PROPDMGEXP"] == "K") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "M") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "B") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000000000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "m") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "+") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 0
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "0") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "5") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "6") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10000000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "?") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 0
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "4") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "2") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "3") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 1000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "h") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "7") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10000000
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "H") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "-") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 0
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "1") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 10
  }
  else if (StormData_monetary[i,"PROPDMGEXP"] == "8") {
    StormData_monetary[i, "PROPDMG"] <- StormData_monetary[i, "PROPDMG"] * 100000000
  }
}

for (j in 1:StormData_num) {
  if (StormData_monetary[j,"CROPDMGEXP"] == "K") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "M") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000000
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "B") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000000000
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "m") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000000
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "?") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 0
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "0") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "k") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 1000
  }
  else if (StormData_monetary[j,"CROPDMGEXP"] == "2") {
    StormData_monetary[j, "CROPDMG"] <- StormData_monetary[j, "CROPDMG"] * 100 
  }
}

Result

I calculate sum of FATALITIES and INJURIES based on EVTYPE

population_health_FATALITIES <- aggregate(FATALITIES ~ EVTYPE, StormData_monetary, FUN = sum)
population_health_INJURIES <- aggregate(INJURIES ~ EVTYPE, StormData_monetary, FUN = sum)

And sort them in decreasing order, extract the biggest 10 of them

FATALITIES_order <- population_health_FATALITIES[order(population_health_FATALITIES$FATALITIES, decreasing = T), ]
FATALITIES_order10 <- FATALITIES_order[1:10, ]
INJURIES_order <- population_health_INJURIES[order(population_health_INJURIES$INJURIES, decreasing = T), ]
INJURIES_order10 <- INJURIES_order[1:10, ]

Then I plot them in one figure and show the most harmful event with its fatalities and injuries

library(ggplot2)
library(gridExtra)


g1 <- ggplot(FATALITIES_order10, aes(EVTYPE))

g2 <- ggplot(INJURIES_order10, aes(EVTYPE))

grid.arrange(g1 + geom_bar(aes(weight = FATALITIES)) + coord_flip() + ggtitle("Highest Fatalities"), g2 + geom_bar(aes(weight = INJURIES)) + coord_flip() + ggtitle("Highest Injuries"), nrow = 2)

From the figure, it shows that Tornado is the highest in both fatalities and injuries. Then I do the same to PROPDMG and CROPDMG

property_damage <- aggregate(PROPDMG ~ EVTYPE, StormData_monetary, FUN = sum)
crop_damage <- aggregate(CROPDMG ~ EVTYPE, StormData_monetary, FUN = sum)
property_order <- property_damage[order(property_damage$PROPDMG, decreasing = T), ]
property_order10 <- property_order[1:10, ]
crop_order <- crop_damage[order(crop_damage$CROPDMG, decreasing = T), ]
crop_order10 <- crop_order[1:10, ]

g3 <- ggplot(property_order10, aes(EVTYPE))

g4 <- ggplot(crop_order10, aes(EVTYPE))

grid.arrange(g3 + geom_bar(aes(weight = PROPDMG)) + coord_flip() + ggtitle("Highest Property Damamge"), g4 + geom_bar(aes(weight = CROPDMG)) + coord_flip() + ggtitle("Highest Crop Damamge"), nrow = 2)

From the figure, it shows that Tornado is the highest in property damage and Drought is the highest in crop damage