In this report we aim to compare various weather events along various types of harm. First we will see which event types are particularly harmful to the population itself by looking at injury and fatality numbers. In the second part we will look at the damage to property and crops. Finally we compute the Spearman Rank Correlation Matrix for the event types ranked by their harm to the population and their property and crop damage.
setwd("C:/Users/Apple/Desktop/RStudio Tour/assignment/project5.2")
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.2
dt<-fread("repdata_data_StormData.csv.bz2",header = TRUE, sep = ",")
names(dt)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
To evaluate health impact, the total fatalities and injuries are calculated for each event type(EVTYPE)
dt.fatalities<-dt[,.(FATALITIES=sum(FATALITIES)),
by=EVTYPE][order(-FATALITIES)]
dt.fatalities
## EVTYPE FATALITIES
## 1: TORNADO 5633
## 2: EXCESSIVE HEAT 1903
## 3: FLASH FLOOD 978
## 4: HEAT 937
## 5: LIGHTNING 816
## ---
## 981: SLEET STORM 0
## 982: DENSE SMOKE 0
## 983: LAKESHORE FLOOD 0
## 984: ASTRONOMICAL LOW TIDE 0
## 985: VOLCANIC ASHFALL 0
dt.injuries<-dt[,.(INJURIES=sum(INJURIES)),
by=EVTYPE][order(-INJURIES)]
dt.injuries
## EVTYPE INJURIES
## 1: TORNADO 91346
## 2: TSTM WIND 6957
## 3: FLOOD 6789
## 4: EXCESSIVE HEAT 6525
## 5: LIGHTNING 5230
## ---
## 981: SLEET STORM 0
## 982: DENSE SMOKE 0
## 983: LAKESHORE FLOOD 0
## 984: ASTRONOMICAL LOW TIDE 0
## 985: VOLCANIC ASHFALL 0
The data provides two types of economic impacts, PROPDMP and CROPDMP. The actual damage in currency is indicated by PROPDMGEXP and CROPDMGEXP parameters. According to link, PROPDMGEXP and CROPDMGEXP index can be explained as the following:
H, h -> hundreds = x100
K, K -> kilos = x1,000
M, m -> millions = x1,000,000
B,b -> billions = x1,000,000,000
(+) -> x1
(-) -> x0
(?) -> x0
blank -> x0
dt.damage <- dt[,.(PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP),by=EVTYPE]
#transfer the index
symbol <-sort(unique(c(dt.damage$PROPDMGEXP,dt.damage$CROPDMGEXP)))
symbol
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "k" "K" "m"
## [20] "M"
multiplier <- c(0,0,0,1,10,10,10,10,10,10,10,10,10,10^9,10^2,10^2,
10^3,10^3,10^6,10^6)
covert.multiplier <- data.table(symbol,multiplier)
PROPDMG.multiplier <- covert.multiplier$multiplier[match(
dt.damage$PROPDMGEXP,covert.multiplier$symbol)]
CROPDMG.multiplier <- covert.multiplier$multiplier[match(
dt.damage$CROPDMGEXP,covert.multiplier$symbol)]
#covert the damage into $USD
dt.economic <- data.table(cbind("PRO"=dt.damage$PROPDMG
*PROPDMG.multiplier,
"CRO"=dt.damage$CROPDMG * CROPDMG.multiplier,
"EVTYPE" = dt.damage$EVTYPE))
#sum and arrange the damage by EVTYPE
dt.ecodmg <- dt.economic[,.(ECONOMICDMG = sum(as.numeric(PRO),
as.numeric(CRO))),
by=EVTYPE][order(-ECONOMICDMG)]
dt.ecodmg
## EVTYPE ECONOMICDMG
## 1: FLOOD 150319678250
## 2: HURRICANE/TYPHOON 71913712800
## 3: TORNADO 57352117607
## 4: STORM SURGE 43323541000
## 5: HAIL 18758224527
## ---
## 981: DROWNING 0
## 982: GUSTY THUNDERSTORM WIND 0
## 983: HIGH SURF ADVISORIES 0
## 984: SLEET STORM 0
## 985: VOLCANIC ASHFALL 0
The top 10 events with the highest total fatalities and injuries are shown graphically.
library(ggplot2)
ggplot(dt.fatalities[1:10],aes(reorder(EVTYPE,-FATALITIES), FATALITIES))+geom_bar(stat = "identity")+labs(x="EVENT TYPE", y = "FATALITIES",title = "TOTAL FATALITIES")+theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))
ggplot(dt.injuries[1:10],aes(reorder(EVTYPE,-INJURIES), INJURIES))+geom_bar(stat = "identity")+labs(x="EVENT TYPE", y = "INJURIES",title = "TOTAL INJURIES")+theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))
As shown in the figures, tornado causes the hightest in both the total fatality and injury count.
ggplot(dt.ecodmg[1:10],aes(x=reorder(EVTYPE,-ECONOMICDMG),y=ECONOMICDMG))+geom_bar(stat = "identity")+labs(x="EVENT TYPE", y = "ECONOMIC DAMAGE",title = "TOTAL IMPACT ON ECONOMIC") +theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))
According to the plot, flood has the greatest economic consequences.