Inrtroduction

In this report we aim to compare various weather events along various types of harm. First we will see which event types are particularly harmful to the population itself by looking at injury and fatality numbers. In the second part we will look at the damage to property and crops. Finally we compute the Spearman Rank Correlation Matrix for the event types ranked by their harm to the population and their property and crop damage.

Download and Read Data

setwd("C:/Users/Apple/Desktop/RStudio Tour/assignment/project5.2")
library(data.table)
## Warning: package 'data.table' was built under R version 4.0.2
dt<-fread("repdata_data_StormData.csv.bz2",header = TRUE, sep = ",")
names(dt)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Processing Data

Health Impact

To evaluate health impact, the total fatalities and injuries are calculated for each event type(EVTYPE)

dt.fatalities<-dt[,.(FATALITIES=sum(FATALITIES)),
                  by=EVTYPE][order(-FATALITIES)]
dt.fatalities
##                     EVTYPE FATALITIES
##   1:               TORNADO       5633
##   2:        EXCESSIVE HEAT       1903
##   3:           FLASH FLOOD        978
##   4:                  HEAT        937
##   5:             LIGHTNING        816
##  ---                                 
## 981:           SLEET STORM          0
## 982:           DENSE SMOKE          0
## 983:       LAKESHORE FLOOD          0
## 984: ASTRONOMICAL LOW TIDE          0
## 985:      VOLCANIC ASHFALL          0
dt.injuries<-dt[,.(INJURIES=sum(INJURIES)),
                  by=EVTYPE][order(-INJURIES)]
dt.injuries
##                     EVTYPE INJURIES
##   1:               TORNADO    91346
##   2:             TSTM WIND     6957
##   3:                 FLOOD     6789
##   4:        EXCESSIVE HEAT     6525
##   5:             LIGHTNING     5230
##  ---                               
## 981:           SLEET STORM        0
## 982:           DENSE SMOKE        0
## 983:       LAKESHORE FLOOD        0
## 984: ASTRONOMICAL LOW TIDE        0
## 985:      VOLCANIC ASHFALL        0

Economic Impact

The data provides two types of economic impacts, PROPDMP and CROPDMP. The actual damage in currency is indicated by PROPDMGEXP and CROPDMGEXP parameters. According to link, PROPDMGEXP and CROPDMGEXP index can be explained as the following:

H, h -> hundreds = x100

K, K -> kilos = x1,000

M, m -> millions = x1,000,000

B,b -> billions = x1,000,000,000

(+) -> x1

(-) -> x0

(?) -> x0

blank -> x0

dt.damage <- dt[,.(PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP),by=EVTYPE]

#transfer the index
symbol <-sort(unique(c(dt.damage$PROPDMGEXP,dt.damage$CROPDMGEXP)))
symbol
##  [1] ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "k" "K" "m"
## [20] "M"
multiplier <- c(0,0,0,1,10,10,10,10,10,10,10,10,10,10^9,10^2,10^2,
                10^3,10^3,10^6,10^6)

covert.multiplier <- data.table(symbol,multiplier)

PROPDMG.multiplier <- covert.multiplier$multiplier[match(
    dt.damage$PROPDMGEXP,covert.multiplier$symbol)]

CROPDMG.multiplier <- covert.multiplier$multiplier[match(
    dt.damage$CROPDMGEXP,covert.multiplier$symbol)]

#covert the damage into $USD
dt.economic <- data.table(cbind("PRO"=dt.damage$PROPDMG
                                *PROPDMG.multiplier,
                          "CRO"=dt.damage$CROPDMG * CROPDMG.multiplier,
                          "EVTYPE" = dt.damage$EVTYPE))

#sum and arrange the damage by EVTYPE
dt.ecodmg <- dt.economic[,.(ECONOMICDMG = sum(as.numeric(PRO),
                                              as.numeric(CRO))),
                           by=EVTYPE][order(-ECONOMICDMG)]
dt.ecodmg 
##                       EVTYPE  ECONOMICDMG
##   1:                   FLOOD 150319678250
##   2:       HURRICANE/TYPHOON  71913712800
##   3:                 TORNADO  57352117607
##   4:             STORM SURGE  43323541000
##   5:                    HAIL  18758224527
##  ---                                     
## 981:                DROWNING            0
## 982: GUSTY THUNDERSTORM WIND            0
## 983:    HIGH SURF ADVISORIES            0
## 984:             SLEET STORM            0
## 985:        VOLCANIC ASHFALL            0

Results

Health Impact

The top 10 events with the highest total fatalities and injuries are shown graphically.

library(ggplot2)
ggplot(dt.fatalities[1:10],aes(reorder(EVTYPE,-FATALITIES), FATALITIES))+geom_bar(stat = "identity")+labs(x="EVENT TYPE", y = "FATALITIES",title = "TOTAL FATALITIES")+theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))

ggplot(dt.injuries[1:10],aes(reorder(EVTYPE,-INJURIES), INJURIES))+geom_bar(stat = "identity")+labs(x="EVENT TYPE", y = "INJURIES",title = "TOTAL INJURIES")+theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))

As shown in the figures, tornado causes the hightest in both the total fatality and injury count.

Economic Impact

ggplot(dt.ecodmg[1:10],aes(x=reorder(EVTYPE,-ECONOMICDMG),y=ECONOMICDMG))+geom_bar(stat = "identity")+labs(x="EVENT TYPE", y = "ECONOMIC DAMAGE",title = "TOTAL IMPACT ON ECONOMIC") +theme(axis.text.x = element_text(angle=90,vjust=0.5,hjust=1))

According to the plot, flood has the greatest economic consequences.