The most harmful events with respect to population health and have the greatest economic consequences are tornado, flood and hurricane in US
I sumed the number of injuries for all types of events and found the most harmful events with respect to population health is tornado. I also sumed the numbers of PROPDMG and CROPDMG for all types of events and found flood has the highest number for PROPDMG while hurricane has highest number for CROPDMG. Therefore, I concluded The most harmful events with respect to population health and have the greatest economic consequences are tornado , flood and hurricane in US.
library(data.table)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
download.file(
"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
"PA2.csv.bz2")
df <- fread("PA2.csv.bz2")
df$EVTYPE <- tolower(df$EVTYPE)
df$EVTYPE <- as.factor(df$EVTYPE)
df$PROPDMGEXP <- tolower(df$PROPDMGEXP)
df$CROPDMGEXP <- tolower(df$CROPDMGEXP)
df_injuries <- tapply(df$INJURIES, df$EVTYPE, sum)
df_injuries <- df_injuries[order(df_injuries, decreasing = T)]
barplot(df_injuries[1:3])
df <- mutate(df, propdamage = PROPDMG * case_when(
PROPDMGEXP == "b" ~ 10^9,
PROPDMGEXP == "h" ~ 10^2,
PROPDMGEXP == "K" ~ 10^3,
PROPDMGEXP == "M" ~ 10^6,
PROPDMGEXP == 0 ~ 0,
PROPDMGEXP == 1 ~ 1,
PROPDMGEXP == 2 ~ 2,
PROPDMGEXP == 3 ~ 3,
PROPDMGEXP == 4 ~ 4,
PROPDMGEXP == 5 ~ 5,
PROPDMGEXP == 6 ~ 6,
PROPDMGEXP == 7 ~ 7,
PROPDMGEXP == 8 ~ 8,
TRUE ~ 0
))
df <- mutate(df, cropdamage = CROPDMG * case_when(
PROPDMGEXP == "b" ~ 10^9,
PROPDMGEXP == "h" ~ 10^2,
PROPDMGEXP == "K" ~ 10^3,
PROPDMGEXP == "M" ~ 10^6,
PROPDMGEXP == 0 ~ 0,
PROPDMGEXP == 1 ~ 1,
PROPDMGEXP == 2 ~ 2,
PROPDMGEXP == 3 ~ 3,
PROPDMGEXP == 4 ~ 4,
PROPDMGEXP == 5 ~ 5,
PROPDMGEXP == 6 ~ 6,
PROPDMGEXP == 7 ~ 7,
PROPDMGEXP == 8 ~ 8,
TRUE ~ 0
))
df_DMG <- aggregate(df[,c("propdamage", "cropdamage")], list(df$EVTYPE), sum)
colnames(df_DMG)[1] <- "EVTYPE"
df_DMG[order(df_DMG$propdamage, decreasing = T), ][1:5,]
## EVTYPE propdamage cropdamage
## 154 flood 122500000000 3.3500e+10
## 372 hurricane/typhoon 65500000000 7.2971e+11
## 599 storm surge 42560000000 0.0000e+00
## 363 hurricane 5700000000 8.0100e+11
## 758 tornado 5300000511 0.0000e+00
df_DMG[order(df_DMG$cropdamage, decreasing = T), ][1:5,]
## EVTYPE propdamage cropdamage
## 363 hurricane 5.700e+09 8.0100e+11
## 372 hurricane/typhoon 6.550e+10 7.2971e+11
## 154 flood 1.225e+11 3.3500e+10
## 370 hurricane opal/high winds 1.000e+08 1.0000e+10
## 875 wildfire 1.040e+09 6.5000e+09