date: 2021-11-12
This analysis is conducted to answer two questions: 1. Which weather event is the most harmful to population health 2. Which weather event contributes to the worst economic damages
After processing the data, we can draw the conclusion that: 1. Tornado is the event causing most harm to population health 2. Flood leads to worst economical consequences
if (!file.exists("StormData.csv.bz2")) {
fileUrl<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile="StormData.csv.bz2", method="curl")
}
stormdataraw <- read.csv("StormData.csv.bz2", header = TRUE, sep = ",")
str(stormdataraw)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
## $ BGN_TIME : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
## $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
## $ STATE : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : Factor w/ 35 levels ""," N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_LOCATI: Factor w/ 54429 levels ""," Christiansburg",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_DATE : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_TIME : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ END_LOCATI: Factor w/ 34506 levels ""," CANTON"," TULIA",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ WFO : Factor w/ 542 levels ""," CI","%SD",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ ZONENAMES : Factor w/ 25112 levels ""," "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : Factor w/ 436781 levels "","\t","\t\t",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
This dataset has 902297 obs and 37 variables.
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
varnames<- c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES","PROPDMG","PROPDMGEXP",
"CROPDMG", "CROPDMGEXP")
stormdata<- stormdataraw[varnames]
stormdata$BGN_DATE <- as.Date(stormdata$BGN_DATE, "%m/%d/%Y")
stormdata$YEAR <- year(stormdata$BGN_DATE)
stormdata <- filter(stormdata, stormdata$YEAR>=1996)
stormdata <- filter(stormdata, FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0 )
Take a look at PROPDMGEXP and CROPDMGEXP table(stormdataPROPDMGEXP)table(stormdataCROPDMGEXP)
Let’s convert the letter of PROPDMGEXP and CROPDMGEXP into numerical value, then create a new variable CROPDMGFACTOR to represent the magnitude of CROPDMG, and PROPDMGFACTOR to represent the magnitude of PROPDM
stormdata$CROPDMGFACTOR[(stormdata$CROPDMGEXP == "")] <- 10^0
stormdata$CROPDMGFACTOR[(stormdata$CROPDMGEXP == "K")] <- 10^3
stormdata$CROPDMGFACTOR[(stormdata$CROPDMGEXP == "M")] <- 10^6
stormdata$CROPDMGFACTOR[(stormdata$CROPDMGEXP == "B")] <- 10^9
stormdata$PROPDMGFACTOR[(stormdata$PROPDMGEXP == "")] <- 10^0
stormdata$PROPDMGFACTOR[(stormdata$PROPDMGEXP == "K")] <- 10^3
stormdata$PROPDMGFACTOR[(stormdata$PROPDMGEXP == "M")] <- 10^6
stormdata$PROPDMGFACTOR[(stormdata$PROPDMGEXP == "B")] <- 10^9
stormdata$HEALTH = stormdata$FATALITIES+stormdata$INJURIES
stormdata$ECONOMY = stormdata$PROPDMG*stormdata$PROPDMGFACTOR+
stormdata$CROPDMG*stormdata$CROPDMGFACTOR
stormdata <- stormdata[c("EVTYPE","HEALTH","ECONOMY")]
stormdata$EVTYPE <- toupper(stormdata$EVTYPE)
dim(data.frame(table(stormdata$EVTYPE)))
## [1] 186 2
After uppercase all the event type, there are still 186 events, which is not as stated in the document (48 types), we need to do some work to clean the event type. Combine similar names of event type into one which is named in 2.1.1 storm data event table from: documenthttps://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf.
## First, let's find out all recorded event type
evtypeUnique <- unique(stormdata$EVTYPE)
## Now we can combine similar events
evtypeUnique[grep("THUNDERSTORM", evtypeUnique)]
## [1] "THUNDERSTORM" "THUNDERSTORM WIND (G40)"
## [3] "THUNDERSTORM WIND" "MARINE THUNDERSTORM WIND"
a <- c("THUNDERSTORM", "THUNDERSTORM WIND (G40)", "THUNDERSTORM WIND")
evtypeUnique[grep("TSTM WIND", evtypeUnique)]
## [1] "TSTM WIND" "TSTM WIND/HAIL"
## [3] "TSTM WIND (G45)" "TSTM WIND 40"
## [5] "TSTM WIND 45" "TSTM WIND (41)"
## [7] "TSTM WIND (G40)" "TSTM WIND AND LIGHTNING"
## [9] " TSTM WIND (G45)" "TSTM WIND (G45)"
## [11] "TSTM WIND (G35)" " TSTM WIND"
## [13] "TSTM WIND G45" "NON-TSTM WIND"
## [15] "NON TSTM WIND" "MARINE TSTM WIND"
b <- c( "TSTM WIND", "TSTM WIND/HAIL", "TSTM WIND (G45)", "TSTM WIND 40", "TSTM WIND 45",
"TSTM WIND (41)", "TSTM WIND (G40)", "TSTM WIND AND LIGHTNING", " TSTM WIND (G45)", "TSTM WIND (G45)",
"TSTM WIND (G35)", " TSTM WIND", "TSTM WIND G45" )
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% a|stormdata$EVTYPE %in% b)] <- "THUNDERSTORM WIND"
evtypeUnique[grep("ASTR", evtypeUnique)]
## [1] "ASTRONOMICAL HIGH TIDE" "ASTRONOMICAL LOW TIDE"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "ASTRONOMICAL LOW TIDE")] <- "ASTRONOMICAL LOW TIDE"
evtypeUnique[grep("AVA", evtypeUnique)]
## [1] "AVALANCHE"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "AVALANCHE")] <- "AVALANCHE"
evtypeUnique[grep("BLIZ", evtypeUnique)]
## [1] "BLIZZARD"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "BLIZZARD")] <- "BLIZZARD"
evtypeUnique[grep("FLOOD", evtypeUnique)]
## [1] "FLASH FLOOD" "FLOOD"
## [3] "ICE JAM FLOOD (MINOR" "EROSION/CSTL FLOOD"
## [5] "RIVER FLOODING" "COASTAL FLOODING"
## [7] "TIDAL FLOODING" "COASTAL FLOOD"
## [9] "RIVER FLOOD" "COASTAL FLOODING/EROSION"
## [11] "COASTAL FLOODING/EROSION" "FLOOD/FLASH/FLOOD"
## [13] "FLASH FLOOD/FLOOD" " FLASH FLOOD"
## [15] "LAKESHORE FLOOD"
c <- c("COASTAL FLOOD", "COASTAL FLOODING", "COASTAL FLOODING/EROSION", "COASTAL FLOODING/EROSION", "EROSION/CSTL FLOOD" )
d <- c("FLOOD", "ICE JAM FLOOD (MINOR", "RIVER FLOODING", "TIDAL FLOODING", "RIVER FLOOD", "FLOOD/FLASH/FLOOD")
e <- c("FLASH FLOOD", "FLASH FLOOD/FLOOD", " FLASH FLOOD")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% c)] <- "COASTAL FLOOD"
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% d)] <- "FLOOD"
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% e)] <- "FLASH FLOOD"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "LAKESHORE FLOOD" )] <- "LAKESHORE FLOOD"
evtypeUnique[grep("COLD", evtypeUnique)]
## [1] "EXTREME COLD" "UNSEASONABLE COLD"
## [3] "EXTENDED COLD" "COLD"
## [5] "COLD TEMPERATURE" "COLD AND SNOW"
## [7] "UNSEASONABLY COLD" "COLD WEATHER"
## [9] "EXTREME COLD/WIND CHILL" "COLD/WIND CHILL"
evtypeUnique[grep("CHILL", evtypeUnique)]
## [1] "EXTREME WINDCHILL" "EXTREME COLD/WIND CHILL"
## [3] "COLD/WIND CHILL"
f <- c("UNSEASONABLE COLD", "EXTENDED COLD", "COLD", "COLD TEMPERATURE", "COLD AND SNOW",
"UNSEASONABLY COLD", "COLD WEATHER", "COLD/WIND CHILL")
g <- c("EXTREME COLD", "EXTREME COLD/WIND CHILL", "EXTREME WINDCHILL")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% f)] <- "COLD/WIND CHILL"
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% g)] <- "EXTREME COLD/WIND CHILL"
evtypeUnique[grep("DENS", evtypeUnique)]
## [1] "DENSE FOG" "DENSE SMOKE"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "DENSE FOG")] <- "DENSE FOG"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "DENSE SMOKE")] <- "DENSE SMOKE"
evtypeUnique[grep("DRO", evtypeUnique)]
## [1] "DROUGHT" "DROWNING"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "DROUGHT")] <- "DROUGHT"
evtypeUnique[grep("DUST", evtypeUnique)]
## [1] "DUST STORM" "DUST DEVIL" "BLOWING DUST"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "DUST DEVIL")] <- "DUST DEVIL"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "DUST STORM")] <- "DUST STORM"
evtypeUnique[grep("HEAT", evtypeUnique)]
## [1] "EXCESSIVE HEAT" "HEAT WAVE" "HEAT" "RECORD HEAT"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "EXCESSIVE HEAT")] <- "EXCESSIVE HEAT"
h <- c("HEAT WAVE" , "HEAT", "RECORD HEAT")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% h)] <- "HEAT"
evtypeUnique[grep("FR", evtypeUnique)]
## [1] "FREEZING RAIN" "FREEZE" "DAMAGING FREEZE"
## [4] "EARLY FROST" "FREEZING SPRAY" "FREEZING DRIZZLE"
## [7] "HARD FREEZE" "FROST/FREEZE" "AGRICULTURAL FREEZE"
## [10] "FROST" "LIGHT FREEZING RAIN" "FREEZING FOG"
i <- c("FREEZING RAIN", "FREEZE", "DAMAGING FREEZE", "EARLY FROST", "FREEZING SPRAY",
"FREEZING DRIZZLE", "HARD FREEZE", "FROST/FREEZE", "AGRICULTURAL FREEZE", "FROST", "LIGHT FREEZING RAIN")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% i)] <- "FROST/FREEZE"
evtypeUnique[grep("CLOUD", evtypeUnique)]
## [1] "FUNNEL CLOUD"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "FUNNEL CLOUD")] <- "FUNNEL CLOUD"
evtypeUnique[grep("FOG", evtypeUnique)]
## [1] "FOG" "DENSE FOG" "FREEZING FOG"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "FREEZING FOG")] <- "FREEZING FOG"
evtypeUnique[grep("HAIL", evtypeUnique)]
## [1] "HAIL" "TSTM WIND/HAIL" "SMALL HAIL" "GUSTY WIND/HAIL"
## [5] "MARINE HAIL"
j <- c("HAIL", "SMALL HAIL", "GUSTY WIND/HAIL")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% j)] <- "HAIL"
evtypeUnique[grep("RAIN", evtypeUnique)]
## [1] "FREEZING RAIN" "HEAVY RAIN" "HEAVY RAIN/HIGH SURF"
## [4] "TORRENTIAL RAINFALL" "GUSTY WIND/RAIN" "GUSTY WIND/HVY RAIN"
## [7] "RAIN/SNOW" "UNSEASONAL RAIN" "LIGHT FREEZING RAIN"
## [10] "RAIN"
k <- c("HEAVY RAIN", "HEAVY RAIN/HIGH SURF", "TORRENTIAL RAINFALL", "GUSTY WIND/RAIN", "GUSTY WIND/HVY RAIN")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% k)] <- "HEAVY RAIN"
evtypeUnique[grep("SNOW", evtypeUnique)]
## [1] "HEAVY SNOW" "HEAVY SNOW SHOWER" "LIGHT SNOW"
## [4] "SNOW" "SNOW SQUALLS" "LIGHT SNOWFALL"
## [7] "COLD AND SNOW" "RAIN/SNOW" "SNOW AND ICE"
## [10] "SNOW SQUALL" "LAKE EFFECT SNOW" "BLOWING SNOW"
## [13] "EXCESSIVE SNOW" "LATE SEASON SNOW" "FALLING SNOW/ICE"
## [16] "LAKE-EFFECT SNOW"
l <- c("HEAVY SNOW", "HEAVY SNOW SHOWER", "SNOW SQUALLS", "SNOW SQUALL", "EXCESSIVE SNOW")
m <- c("LAKE EFFECT SNOW", "LAKE-EFFECT SNOW")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% l)] <- "HEAVY SNOW"
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% m)] <- "LAKE-EFFECT SNOW"
evtypeUnique[grep("SURF", evtypeUnique)]
## [1] "ROUGH SURF" "HEAVY SURF" "HIGH SURF"
## [4] "HEAVY RAIN/HIGH SURF" "HEAVY SURF AND WIND" " HIGH SURF ADVISORY"
## [7] "HAZARDOUS SURF" "HEAVY SURF/HIGH SURF"
n <- c("HEAVY SURF", "HIGH SURF", "HEAVY SURF AND WIND", " HIGH SURF ADVISORY", "HAZARDOUS SURF", "HEAVY SURF/HIGH SURF")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% n)] <- "HIGH SURF"
evtypeUnique[grep("WIND", evtypeUnique)]
## [1] "TSTM WIND" "HIGH WIND"
## [3] "TSTM WIND/HAIL" "STRONG WIND"
## [5] "WINDS" "STRONG WINDS"
## [7] "EXTREME WINDCHILL" "WHIRLWIND"
## [9] "WIND DAMAGE" "GUSTY WIND/RAIN"
## [11] "GUSTY WIND/HVY RAIN" "WIND"
## [13] "TSTM WIND (G45)" "GUSTY WINDS"
## [15] "GUSTY WIND" "TSTM WIND 40"
## [17] "TSTM WIND 45" "TSTM WIND (41)"
## [19] "TSTM WIND (G40)" "GRADIENT WIND"
## [21] "TSTM WIND AND LIGHTNING" "HEAVY SURF AND WIND"
## [23] "HIGH WINDS" " TSTM WIND (G45)"
## [25] "TSTM WIND (G45)" "HIGH WIND (G40)"
## [27] "TSTM WIND (G35)" "GUSTY WIND/HAIL"
## [29] " TSTM WIND" "WIND AND WAVE"
## [31] "TSTM WIND G45" "NON-SEVERE WIND DAMAGE"
## [33] "THUNDERSTORM WIND (G40)" "NON-TSTM WIND"
## [35] "NON TSTM WIND" "EXTREME COLD/WIND CHILL"
## [37] "MARINE TSTM WIND" "MARINE HIGH WIND"
## [39] "THUNDERSTORM WIND" "COLD/WIND CHILL"
## [41] "MARINE THUNDERSTORM WIND" "MARINE STRONG WIND"
o <- c("HIGH WIND", "GUSTY WIND", "GRADIENT WIND", "HIGH WINDS", "HIGH WIND (G40)")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% o)] <- "HIGH WIND"
evtypeUnique[grep("HURRI", evtypeUnique)]
## [1] "HURRICANE" "HURRICANE EDOUARD" "HURRICANE/TYPHOON"
evtypeUnique[grep("TYPH", evtypeUnique)]
## [1] "TYPHOON" "HURRICANE/TYPHOON"
p <- c("HURRICANE", "HURRICANE EDOUARD", "HURRICANE/TYPHOON", "TYPHOON")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% p)] <- "HURRICANE(TYPHOON)"
evtypeUnique[grep("ICE", evtypeUnique)]
## [1] "ICE STORM" "ICE JAM FLOOD (MINOR" "SNOW AND ICE"
## [4] "BLACK ICE" "ICE ROADS" "FALLING SNOW/ICE"
## [7] "ICE ON ROAD"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "ICE STORM")] <- "ICE STORM"
evtypeUnique[grep("LIGHT", evtypeUnique)]
## [1] "LIGHTNING" "LIGHT SNOW"
## [3] "LIGHT SNOWFALL" "TSTM WIND AND LIGHTNING"
## [5] "LIGHT FREEZING RAIN"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "LIGHTNING")] <- "LIGHTNING"
evtypeUnique[grep("MARINE", evtypeUnique)]
## [1] "MARINE ACCIDENT" "MARINE TSTM WIND"
## [3] "MARINE HIGH WIND" "MARINE THUNDERSTORM WIND"
## [5] "MARINE STRONG WIND" "MARINE HAIL"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "MARINE HAIL")] <- "MARINE HAIL"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "MARINE HIGH WIND")] <- "MARINE HIGH WIND"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "MARINE STRONG WIND")] <- "MARINE STRONG WIND"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "MARINE THUNDERSTORM WIND")] <- "MARINE THUNDERSTORM WIND"
evtypeUnique[grep("RIP", evtypeUnique)]
## [1] "RIP CURRENTS" "RIP CURRENT"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "RIP CURRENTS"|stormdata$EVTYPE== "RIP CURRENT")] <- "RIP CURRENT"
evtypeUnique[grep("SEI", evtypeUnique)]
## [1] "SEICHE"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "SEICHE")] <- "SEICHE"
evtypeUnique[grep("SURGE", evtypeUnique)]
## [1] "STORM SURGE" "STORM SURGE/TIDE"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "STORM SURGE"|stormdata$EVTYPE== "STORM SURGE/TIDE")] <- "STORM SURGE/TIDE"
evtypeUnique[grep("STRONG", evtypeUnique)]
## [1] "STRONG WIND" "STRONG WINDS" "MARINE STRONG WIND"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "STRONG WIND"|stormdata$EVTYPE== "STRONG WINDS")] <- "STRONG WIND"
evtypeUnique[grep("TORN", evtypeUnique)]
## [1] "TORNADO"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "TORNADO")] <- "TORNADO"
evtypeUnique[grep("TROP", evtypeUnique)]
## [1] "TROPICAL STORM" "TROPICAL DEPRESSION"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "TROPICAL DEPRESSION")] <- "TROPICAL DEPRESSION"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "TROPICAL STORM")] <- "TROPICAL STORM"
evtypeUnique[grep("TSU", evtypeUnique)]
## [1] "TSUNAMI"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "TSUNAMI")] <- "TSUNAMI"
evtypeUnique[grep("ASH", evtypeUnique)]
## [1] "FLASH FLOOD" "FLOOD/FLASH/FLOOD" "FLASH FLOOD/FLOOD"
## [4] " FLASH FLOOD" "VOLCANIC ASH"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "VOLCANIC ASH")] <- "VOLCANIC ASH"
evtypeUnique[grep("WATER", evtypeUnique)]
## [1] "WATERSPOUT" "HIGH WATER"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "WATERSPOUT" )] <- "WATERSPOUT"
evtypeUnique[grep("WILD", evtypeUnique)]
## [1] "WILD/FOREST FIRE" "WILDFIRE"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "WILD/FOREST FIRE"|stormdata$EVTYPE=="WILDFIRE")] <- "WILDFIRE"
evtypeUnique[grep("WINTER", evtypeUnique)]
## [1] "WINTER STORM" "WINTER WEATHER" "WINTER WEATHER MIX"
## [4] "WINTER WEATHER/MIX"
stormdata$EVTYPENEW[(stormdata$EVTYPE== "WINTER STORM")] <- "WINTER STORM"
q <- c( "WINTER WEATHER", "WINTER WEATHER MIX", "WINTER WEATHER/MIX")
stormdata$EVTYPENEW[(stormdata$EVTYPE %in% q)] <- "WINTER WEATHER"
Now let’s see how many event types are there
dim(data.frame(table(stormdata$EVTYPENEW)))
## [1] 46 2
Because two event types listed in table 2.1.1 were not found in the processed data, so finally we got 46 event types which correctly mathed with table 2.1.1
healthImpact <- with(stormdata, aggregate(HEALTH ~ EVTYPENEW, FUN = sum), na.rm = TRUE)
healthImpact <- healthImpact [order(healthImpact$HEALTH, decreasing = TRUE),]
healthImpact[1:10,]
## EVTYPENEW HEALTH
## 38 TORNADO 22178
## 11 EXCESSIVE HEAT 8188
## 14 FLOOD 7176
## 37 THUNDERSTORM WIND 5508
## 28 LIGHTNING 4792
## 13 FLASH FLOOD 2561
## 44 WILDFIRE 1543
## 19 HEAT 1531
## 45 WINTER STORM 1483
## 24 HURRICANE(TYPHOON) 1453
healthImpactChart <- ggplot(head(healthImpact, 10),
aes(x = EVTYPENEW, y = HEALTH, fill = EVTYPENEW)) +
geom_bar(stat = "identity") +
xlab("Event Type") +
ylab("Total Fatalities and Injures") +
theme(plot.title = element_text(size = 14, hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Top 10 Weather Events Most Harmful to Population Health")
print(healthImpactChart)
economyImpact <- with(stormdata, aggregate(ECONOMY ~ EVTYPENEW, FUN = sum), na.rm = TRUE)
economyImpact <- economyImpact [order(economyImpact$ECONOMY, decreasing = TRUE),]
economyImpact[1:10,]
## EVTYPENEW ECONOMY
## 14 FLOOD 149075967950
## 24 HURRICANE(TYPHOON) 87068996810
## 35 STORM SURGE/TIDE 47835579000
## 38 TORNADO 24900370720
## 18 HAIL 17092055870
## 13 FLASH FLOOD 16557160610
## 8 DROUGHT 14413667000
## 37 THUNDERSTORM WIND 8930498480
## 40 TROPICAL STORM 8320186550
## 44 WILDFIRE 8162704630
economyImpactChart <- ggplot(head(economyImpact, 10),
aes(x = EVTYPENEW, y = ECONOMY, fill = EVTYPENEW)) +
geom_bar(stat = "identity") +
xlab("Event Type") +
ylab("Total Damage of Properties") +
theme(plot.title = element_text(size = 14, hjust = 0.5)) +
theme(axis.text.x = element_text(angle = 90)) +
ggtitle("Top 10 Weather Events Cost Worst Economical Consequeces")
print(economyImpactChart)
From the plot we can tell Flood leads to worst economical consequences
date: 2021-11-12