Data Processing

1. Library packages

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
## Warning: package 'tidyr' was built under R version 3.6.3
library(forcats)
## Warning: package 'forcats' was built under R version 3.6.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.3

2. Data downloading

storm_data_ul <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(storm_data_ul,"storm_data")

3. Data processing

The following for preparing the health related data.

storm<-read.csv("storm_data")

storm_tb <- tbl_df(storm)
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
harmful_health <- select(storm_tb, c("EVTYPE", "FATALITIES", "INJURIES"))
harmful_health <- mutate(harmful_health, sum= FATALITIES+INJURIES)
total<-group_by(harmful_health, EVTYPE)%>% summarise(FATALITIES=sum(FATALITIES), INJURIES=sum(INJURIES), TOTAL=sum(sum))
## `summarise()` ungrouping output (override with `.groups` argument)
total<-total[order(total$TOTAL, decreasing = TRUE),]
plot<- total[1:10,]

fatalities_count <- select(plot, EVTYPE:FATALITIES)
fatalities_count<- rename(fatalities_count, count=FATALITIES)
fatalities_count$harmful_type <- "FATALITIES"

injuries_count<- select(plot,c(EVTYPE,INJURIES))
injuries_count<-rename(injuries_count, count=INJURIES)
injuries_count$harmful_type<- "INJURIES"

Type_count_plot<- rbind(fatalities_count, injuries_count)
Type_count_plot$EVTYPE<- fct_inorder(as.character(Type_count_plot$EVTYPE))

The following for prepare the property and crop damage data. We computed the value according to the exponent.

consequences<- select(storm_tb, c("EVTYPE", "PROPDMG","PROPDMGEXP", "CROPDMG","CROPDMGEXP"))

unique(consequences$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
consequences$PROPEXP[consequences$PROPDMGEXP == "K"] <- 1000
## Warning: Unknown or uninitialised column: `PROPEXP`.
consequences$PROPEXP[consequences$PROPDMGEXP == "M"] <- 1e+06
consequences$PROPEXP[consequences$PROPDMGEXP == ""] <- 1
consequences$PROPEXP[consequences$PROPDMGEXP == "B"] <- 1e+09
consequences$PROPEXP[consequences$PROPDMGEXP == "m"] <- 1e+06
consequences$PROPEXP[consequences$PROPDMGEXP == "0"] <- 1
consequences$PROPEXP[consequences$PROPDMGEXP == "5"] <- 1e+05
consequences$PROPEXP[consequences$PROPDMGEXP == "6"] <- 1e+06
consequences$PROPEXP[consequences$PROPDMGEXP == "4"] <- 10000
consequences$PROPEXP[consequences$PROPDMGEXP == "2"] <- 100
consequences$PROPEXP[consequences$PROPDMGEXP == "3"] <- 1000
consequences$PROPEXP[consequences$PROPDMGEXP == "h"] <- 100
consequences$PROPEXP[consequences$PROPDMGEXP == "7"] <- 1e+07
consequences$PROPEXP[consequences$PROPDMGEXP == "H"] <- 100
consequences$PROPEXP[consequences$PROPDMGEXP == "1"] <- 10
consequences$PROPEXP[consequences$PROPDMGEXP == "8"] <- 1e+08
# give 0 to invalid exponent consequences, so they not count in
consequences$PROPEXP[consequences$PROPDMGEXP == "+"] <- 0
consequences$PROPEXP[consequences$PROPDMGEXP == "-"] <- 0
consequences$PROPEXP[consequences$PROPDMGEXP == "?"] <- 0


unique(consequences$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
consequences$CROPEXP[consequences$CROPDMGEXP == "M"] <- 1e+06
## Warning: Unknown or uninitialised column: `CROPEXP`.
consequences$CROPEXP[consequences$CROPDMGEXP == "K"] <- 1000
consequences$CROPEXP[consequences$CROPDMGEXP == "m"] <- 1e+06
consequences$CROPEXP[consequences$CROPDMGEXP == "B"] <- 1e+09
consequences$CROPEXP[consequences$CROPDMGEXP == "0"] <- 1
consequences$CROPEXP[consequences$CROPDMGEXP == "k"] <- 1000
consequences$CROPEXP[consequences$CROPDMGEXP == "2"] <- 100
consequences$CROPEXP[consequences$CROPDMGEXP == ""] <- 1
# give 0 to invalid exponent consequences, so they not count in
consequences$CROPEXP[consequences$CROPDMGEXP == "?"] <- 0

# compute the property damage value
consequences$PROPDMGVAL <- consequences$PROPDMG * consequences$PROPEXP

# compute the crop damage value
consequences$CROPDMGVAL <- consequences$CROPDMG * consequences$CROPEXP

consequences_val<- select(consequences,c(EVTYPE, PROPDMGVAL, CROPDMGVAL))
consequences_vals<- mutate(consequences_val,sum=PROPDMGVAL+CROPDMGVAL)

total_con<-group_by(consequences_vals, EVTYPE)%>% summarise(PROPDMGVAL=sum(PROPDMGVAL), CROPDMGVAL=sum(CROPDMGVAL), TOTAL=sum(sum))
## `summarise()` ungrouping output (override with `.groups` argument)
total_con<-total_con[order(total_con$TOTAL, decreasing = TRUE),]
plot_con<- total_con[1:10,]

PROPDMGVAL_count <- select(plot_con, EVTYPE:PROPDMGVAL)
PROPDMGVAL_count<- rename(PROPDMGVAL_count, count=PROPDMGVAL)
PROPDMGVAL_count$consequence_type <- "PROPDMGVAL"

CROPDMGVAL_count<- select(plot_con,c(EVTYPE,CROPDMGVAL))
CROPDMGVAL_count<-rename(CROPDMGVAL_count, count=CROPDMGVAL)
CROPDMGVAL_count$consequence_type<- "CROPDMGVAL"
Type_count_con<- rbind(PROPDMGVAL_count, CROPDMGVAL_count)

Type_count_con$EVTYPE<- fct_inorder(as.character(Type_count_con$EVTYPE))

Result

1. Across the United States, which types of events are most harmful with respect to population health?

g<- ggplot(data = Type_count_plot, aes(x=as.factor(EVTYPE), y=count, fill=harmful_type))
g<- g+ geom_bar(stat = "identity")+theme(axis.text.x = element_text(angle = 90, hjust = 1))
g<- g+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
g<- g+ labs(x="EVTYPE", y="Count")
g<- g+ labs(title = "Across the United States, \nwhich types of events are most harmful with respect to population health?")
g

The tornado caused the most severe damage to population health. The damage includes both fatalities and injuries.

2. Across the United States, which types of events have the greatest economic consequences?

g<- ggplot(data = Type_count_con, aes(x=as.factor(EVTYPE), y=count, fill=consequence_type))
g<- g+ geom_bar(stat = "identity")+theme(axis.text.x = element_text(angle = 90, hjust = 1))
g<- g+ theme(axis.text.x = element_text(angle = 90, hjust = 1))
g<- g+ labs(x="EVTYPE", y="Dollar")
g<- g+ labs(title = "Across the United States, \nwhich types of events have the greatest economic consequences?")
g

The flood caused the most severe economic consequences. The economic consequences includes both property loss and crop damages.