Create our analysis dataset by including the interested variables from storm.data.
analysis_data <- storm.data[,c(8,23:28)]
head(analysis_data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
### Create a new variable (health_impact), which is the total number of deaths + injuries.
analysis_data$health_impact <- analysis_data[,2] + analysis_data[,3]
#str(analysis_data)
#summary(analysis_data)
#View(table(analysis_data$PROPDMGEXP))
we only report the top 10 most harmful events to population health based on the numbers of deaths plus injuries.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#View(table(analysis_data$EVTYPE))
health_impact <- analysis_data %>% select(health_impact, EVTYPE) %>%
group_by(EVTYPE) %>%
summarise(health_sum = sum(health_impact)) %>%
slice_max(health_sum, n=10)
#View(health_impact)
table(analysis_data$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5 6
## 465934 1 8 5 216 25 13 4 4 28 4
## 7 8 B h H K m M
## 5 1 40 1 6 424665 7 11330
head(analysis_data)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
## health_impact
## 1 15
## 2 0
## 3 2
## 4 2
## 5 2
## 6 6
#In order to correctly calculate the the economic damage from property and crop, we have to convert the character unit into numbers.
PROPDMGEXP_1 <- rep(0, length(analysis_data$PROPDMGEXP))
PROPDMGEXP_1[which(analysis_data$PROPDMGEXP %in% c("K","k"))] = 10^3
PROPDMGEXP_1[which(analysis_data$PROPDMGEXP %in% c("M","m"))] = 10^6
PROPDMGEXP_1[which(analysis_data$PROPDMGEXP %in% c("B","b"))] = 10^9
table(analysis_data$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
CROPDMGEXP_1 <- rep(0, length(analysis_data$CROPDMGEXP))
CROPDMGEXP_1[which(analysis_data$CROPDMGEXP %in% c("K","k"))] = 10^3
CROPDMGEXP_1[which(analysis_data$CROPDMGEXP %in% c("M","m"))] = 10^6
CROPDMGEXP_1[which(analysis_data$CROPDMGEXP %in% c("B","b"))] = 10^9
### Create another new variable (economy_impact), which is the total economic loss of property + crop.
analysis_data$economy_impact <- analysis_data$PROPDMG*PROPDMGEXP_1 + analysis_data$CROPDMG*CROPDMGEXP_1
#View(analysis_data)
economy_impact <- analysis_data %>% select(economy_impact, EVTYPE) %>%
group_by(EVTYPE) %>%
summarise(economy_sum = sum(economy_impact)) %>%
slice_max(economy_sum, n=10)
#View(economy_impact)