TORNADO is the most harmful evnt with respect to population health. FLOOD is the event have the greatest economic consequences.
mydata <- read.csv("StormData.csv")
Remove harmless events, and remove “PROPDMGEXP” with value of missing, +, - ?
exp <- c("0", "1", "2", "3", "4", "5", "6", "7", "8", "B", "h", "H", "K", "m",
"M")
mysubset <- subset(mydata, (INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG >
0) & PROPDMGEXP %in% exp)
mysubset$PROPDMGEXP <- as.character(mysubset$PROPDMGEXP)
mysubset[grepl("M", mysubset$PROPDMGEXP, ignore.case = TRUE), ]$PROPDMGEXP <- 1e+06
mysubset[grepl("B", mysubset$PROPDMGEXP, ignore.case = TRUE), ]$PROPDMGEXP <- 1e+09
mysubset[grepl("H", mysubset$PROPDMGEXP, ignore.case = TRUE), ]$PROPDMGEXP <- 100
mysubset[grepl("K", mysubset$PROPDMGEXP, ignore.case = TRUE), ]$PROPDMGEXP <- 1000
mysubset$EVTYPE <- as.character(mysubset$EVTYPE)
sumharm <- aggregate(INJURIES ~ EVTYPE, data = mysubset, FUN = sum, na.rm = TRUE)
# most harmful evnts with respect to population health
sumharm[sumharm$INJURIES == max(sumharm$INJURIES), c("EVTYPE")]
[1] "TORNADO"
# top five most harmful evnts
sumharm2 <- sumharm[order(-sumharm$INJURIES), ][1:5, ]
library(ggplot2)
ggplot(sumharm2, aes(x = EVTYPE, y = INJURIES)) + geom_bar(stat = "identity",
position = position_dodge())
mysubset$PROPDMGEXP <- as.numeric(mysubset$PROPDMGEXP)
mysubset$myharm = mysubset$PROPDMG * mysubset$PROPDMGEXP
sumharm <- aggregate(myharm ~ EVTYPE, data = mysubset, FUN = sum, na.rm = TRUE)
# events have the greatest economic consequences
sumharm[sumharm$myharm == max(sumharm$myharm), c("EVTYPE")]
[1] "FLOOD"