This raport summarizes exploration of the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The aim of this exploration is to determine which types of weather events are most harmful accross the United States with respect to:
The data was downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 on January 23, 2015.
if(!file.exists("./data")){dir.create("./data")}
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if(!file.exists("./data/stormData.csv.bz2")){download.file(url, destfile = "./data/stormData.csv.bz2")}
(files <- list.files("./data/", full.names=T))
## [1] "./data/stormData.csv.bz2"
dat <- read.csv(files[1])
### load required packages
if(!("dplyr" %in% rownames(installed.packages()))) {
install.packages("dplyr")
}
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if(!("xtable" %in% rownames(installed.packages()))) {
install.packages("xtable")
}
require(xtable)
## Loading required package: xtable
The original data has 902297 observations of 37 variables. However, upon initial exploration it was determined that only some variables were needed for the analysis. These were:
- event type (EVTYPE)
- FATALITIES
- INJURIES
- properity damage (PROPDMG)
- Order of magnitude for properity damage (PROPDMGEXP)
- crop damage (CROPDMG)
- Order of magnitude for crop damage (CROPDMGEXP)
Subset the variables of interest
dat2 <- select(dat, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
rm(dat)
We are also only interested in observations that have more than 0 injuries/fatalities/crop or properity damage. So select only those observations
dat3 <- filter(dat2, FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)
str(dat3)
## 'data.frame': 254633 obs. of 7 variables:
## $ EVTYPE : Factor w/ 985 levels " HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
rm(dat2)
The event type (EVTYPE) has 985 types of weather events. However, the official NOAA NCDC publication there should be 48 such events (Table 2.1.1). So first clean up the event type variable.
### first change everything to upper case
dat3$EVTYPE <- as.factor(toupper(dat3$EVTYPE))
nlevels(dat3$EVTYPE)
## [1] 447
### create new variable for cleaned event
dat3 <- mutate(dat3, cleanEv = "other")
dat3$cleanEv[grep(".*UR.*ICAN.*", dat3$EVTYPE)] <- "HURRICANE/TYPHOON"
dat3$cleanEv[grep("TYPHOON", dat3$EVTYPE)] <- "HURRICANE/TYPHOON"
dat3$cleanEv[grep(".LOW TIDE*.*", dat3$EVTYPE)] <- "LOW TIDE"
dat3$cleanEv[grep(".HIGH TIDE.*", dat3$EVTYPE)] <- "HIGH TIDE"
dat3$cleanEv[grep("AVALAN", dat3$EVTYPE)] <- "AVALANCHE"
dat3$cleanEv[grep("BLIZ", dat3$EVTYPE)] <- "BLIZZARD"
dat3$cleanEv[grep("FLOOD", dat3$EVTYPE)] <- "FLOOD"
dat3$cleanEv[grep("FLASH FLOOD", dat3$EVTYPE)] <- "FLASH FLOOD"
dat3$cleanEv[grep("^(COASTAL FLOOD)", dat3$EVTYPE)] <- "COASTAL FLOOD"
dat3$cleanEv[grep("^(LAKESHORE FLOOD)", dat3$EVTYPE)] <- "LAKESHORE FLOOD"
dat3$cleanEv[grep("^(COLD/WIND CHILL)|^COLD|^(WIND CHILL)", dat3$EVTYPE)] <- "COLD/WIND CHILL"
dat3$cleanEv[grep("^(EXTREME COLD/WIND CHILL)|^(EXTREME COLD)|^(EXTREME WIND CHILL)", dat3$EVTYPE)] <- "EXTREME COLD/WIND CHILL"
dat3$cleanEv[grep("FREEZ.* FOG", dat3$EVTYPE)] <- "FREEZING FOG"
dat3$cleanEv[grep("DENSE FOG", dat3$EVTYPE)] <- "DENSE FOG"
dat3$cleanEv[grep("DENSE SMOKE", dat3$EVTYPE)] <- "DENSE SMOKE"
dat3$cleanEv[grep("DROUGHT", dat3$EVTYPE)] <- "DROUGHT"
dat3$cleanEv[grep("DUST DEVIL", dat3$EVTYPE)] <- "DUST DEVIL"
dat3$cleanEv[grep("DUST STORM", dat3$EVTYPE)] <- "DUST STORM"
dat3$cleanEv[grep("HEAT", dat3$EVTYPE)] <- "HEAT"
dat3$cleanEv[grep("EXCES.* HEAT", dat3$EVTYPE)] <- "EXCESSIVE HEAT"
dat3$cleanEv[grep("FROST|FREEZE", dat3$EVTYPE)] <- "FROST/FREEZE"
dat3$cleanEv[grep("FUNNEL CLOUD", dat3$EVTYPE)] <- "FUNNEL CLOUD"
dat3$cleanEv[grep("HAIL", dat3$EVTYPE)] <- "HAIL"
dat3$cleanEv[grep("HEAVY RAIN", dat3$EVTYPE)] <- "HEAVY RAIN"
dat3$cleanEv[grep("HEAVY SNOW", dat3$EVTYPE)] <- "HEAVY SNOW"
dat3$cleanEv[grep("HIGH SURF", dat3$EVTYPE)] <- "HIGH SURF"
dat3$cleanEv[grep("HIGH WIND", dat3$EVTYPE)] <- "HIGH WIND"
dat3$cleanEv[grep("ICE STORM", dat3$EVTYPE)] <- "ICE STORM"
dat3$cleanEv[grep("LAKE.*EFFECT SNOW", dat3$EVTYPE)] <- "LAKE EFFECT SNOW"
dat3$cleanEv[grep("^LIGHTNING$", dat3$EVTYPE)] <- "LIGHTNING"
dat3$cleanEv[grep("MARINE HAIL", dat3$EVTYPE)] <- "MARINE HAIL"
dat3$cleanEv[grep("MARINE HIGH WIND", dat3$EVTYPE)] <- "MARINE HIGH WIND"
dat3$cleanEv[grep("STRONG WIND", dat3$EVTYPE)] <- "STRONG WIND"
dat3$cleanEv[grep("MARINE STRONG WIND", dat3$EVTYPE)] <- "MARINE STRONG WIND"
dat3$cleanEv[grep("THUNDE.*R.*STORM.* WIND.*", dat3$EVTYPE)] <- "THUNDERSTORM WIND"
dat3$cleanEv[grep("MARINE T", dat3$EVTYPE)] <- "MARINE THUNDERSTORM WIND"
dat3$cleanEv[grep("RIP", dat3$EVTYPE)] <- "RIP CURRENT"
dat3$cleanEv[grep("SEI", dat3$EVTYPE)] <- "SEICHE"
dat3$cleanEv[grep("SLE", dat3$EVTYPE)] <- "SLEET"
dat3$cleanEv[grep("STORM S.*", dat3$EVTYPE)] <- "STORM SURGE/TIDE"
dat3$cleanEv[grep("TORN", dat3$EVTYPE)] <- "TORNADO"
dat3$cleanEv[grep("WATERSP", dat3$EVTYPE)] <- "WATERSPOUT"
dat3$cleanEv[grep("TROPICAL S", dat3$EVTYPE)] <- "TROPICAL STORM"
dat3$cleanEv[grep("TROPICAL D", dat3$EVTYPE)] <- "TROPICAL DEPRESSION"
dat3$cleanEv[grep("VOLCAN", dat3$EVTYPE)] <- "VOLCANIC ASH"
dat3$cleanEv[grep("WILD", dat3$EVTYPE)] <- "WILDFIRE"
dat3$cleanEv[grep("WINTER S", dat3$EVTYPE)] <- "WINTER STORM"
dat3$cleanEv[grep("WINTER W", dat3$EVTYPE)] <- "WINTER WEATHER"
dat3$cleanEv <- as.factor(dat3$cleanEv)
nlevels(dat3$cleanEv)
## [1] 48
#clean up properity damage exponent variable
sum(is.na(dat3$PROPDMGEXP))
## [1] 0
levels(dat3$PROPDMGEXP)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
dat3 <- mutate(dat3, propDmgExp="0")
dat3$propDmgExp[dat3$PROPDMGEXP=="0" | dat3$PROPDMGEXP==""] <- 1
dat3$propDmgExp[dat3$PROPDMGEXP=="2" | dat3$PROPDMGEXP=="h" | dat3$PROPDMGEXP=="H"] <- 2
dat3$propDmgExp[dat3$PROPDMGEXP=="3" | dat3$PROPDMGEXP=="K"] <- 3
dat3$propDmgExp[dat3$PROPDMGEXP=="4"] <- 4
dat3$propDmgExp[dat3$PROPDMGEXP=="5"] <- 5
dat3$propDmgExp[dat3$PROPDMGEXP=="6" | dat3$PROPDMGEXP=="m" | dat3$PROPDMGEXP=="M"] <- 6
dat3$propDmgExp[dat3$PROPDMGEXP=="7"] <- 7
dat3$propDmgExp[dat3$PROPDMGEXP=="B"] <- 9
dat3$propDmgExp <- as.numeric(dat3$propDmgExp)
#create new properity damage variable from the propdmg*10^propdmgexp
sum(is.na(dat3$PROPDMG))
## [1] 0
dat3 <- mutate(dat3, ProperityDamage=PROPDMG*10^propDmgExp)
##clean up crop damage exponent variable
sum(is.na(dat3$CROPDMGEXP))
## [1] 0
levels(dat3$CROPDMGEXP)
## [1] "" "?" "0" "2" "B" "k" "K" "m" "M"
dat3 <- mutate(dat3, cropDmgExp="0")
dat3$cropDmgExp[dat3$CROPDMGEXP=="0" | dat3$CROPDMGEXP==""] <- 1
dat3$cropDmgExp[dat3$CROPDMGEXP=="2"] <- 2
dat3$cropDmgExp[dat3$CROPDMGEXP=="K" | dat3$CROPDMGEXP=="k"] <- 3
dat3$cropDmgExp[dat3$CROPDMGEXP=="m" | dat3$CROPDMGEXP=="M"] <- 6
dat3$cropDmgExp[dat3$CROPDMGEXP=="B"] <- 9
dat3$cropDmgExp <- as.numeric(dat3$cropDmgExp)
#create new crop damage variable from the cropdmg*10^cropdmgexp
sum(is.na(dat3$CROPDMG))
## [1] 0
dat3 <- mutate(dat3, CropDamage=CROPDMG*10^cropDmgExp)
sum(is.na(dat3$FATALITIES))
## [1] 0
sum(is.na(dat3$INJURIES))
## [1] 0
The injury and fatalities variables look ok
Additionally from the variable ‘cleanEv’ remove the ‘other’ category since it is a mix of, often unrelated, events.
datClean <- select(dat3, FATALITIES, INJURIES, cleanEv, ProperityDamage, CropDamage)
datClean <- filter(datClean, !cleanEv=="other")
str(datClean)
## 'data.frame': 189325 obs. of 5 variables:
## $ FATALITIES : num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ cleanEv : Factor w/ 48 levels "AVALANCHE","BLIZZARD",..: 41 41 41 41 41 41 41 41 41 41 ...
## $ ProperityDamage: num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
## $ CropDamage : num 0 0 0 0 0 0 0 0 0 0 ...
summary(datClean)
## FATALITIES INJURIES cleanEv
## Min. : 0.0000 Min. : 0.0000 THUNDERSTORM WIND:55892
## 1st Qu.: 0.0000 1st Qu.: 0.0000 TORNADO :39962
## Median : 0.0000 Median : 0.0000 HAIL :26606
## Mean : 0.0753 Mean : 0.6953 FLASH FLOOD :21597
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 LIGHTNING :13293
## Max. :583.0000 Max. :1700.0000 FLOOD :10630
## (Other) :21345
## ProperityDamage CropDamage
## Min. :0.000e+00 Min. :0.000e+00
## 1st Qu.:2.000e+03 1st Qu.:0.000e+00
## Median :1.000e+04 Median :0.000e+00
## Mean :2.228e+06 Mean :2.551e+05
## 3rd Qu.:5.000e+04 3rd Qu.:0.000e+00
## Max. :1.150e+11 Max. :5.000e+09
##
rm(dat3)
byEventInjury <- datClean %>%
group_by(cleanEv) %>%
summarize(injuryMean=mean(INJURIES),
injuryTotal=sum(INJURIES)) %>%
arrange(desc(injuryTotal))
iTot <- as.data.frame(byEventInjury[1:20,])
iTot$cleanEv <- droplevels(iTot$cleanEv)
iTot$cleanEv <- factor(iTot$cleanEv, levels=iTot$cleanEv[order(-iTot$injuryTotal)])
xtTot <- xtable(iTot)
print(xtTot, type = "html")
cleanEv | injuryMean | injuryTotal | |
---|---|---|---|
1 | TORNADO | 2.29 | 91364.00 |
2 | FLOOD | 0.64 | 6795.00 |
3 | EXCESSIVE HEAT | 9.20 | 6525.00 |
4 | LIGHTNING | 0.39 | 5230.00 |
5 | HEAT | 9.96 | 2699.00 |
6 | THUNDERSTORM WIND | 0.04 | 2413.00 |
7 | ICE STORM | 2.79 | 1992.00 |
8 | FLASH FLOOD | 0.08 | 1800.00 |
9 | WILDFIRE | 1.28 | 1606.00 |
10 | HIGH WIND | 0.24 | 1507.00 |
11 | HAIL | 0.06 | 1466.00 |
12 | WINTER STORM | 0.89 | 1353.00 |
13 | HURRICANE/TYPHOON | 5.75 | 1333.00 |
14 | HEAVY SNOW | 0.74 | 1034.00 |
15 | BLIZZARD | 3.16 | 805.00 |
16 | WINTER WEATHER | 0.98 | 538.00 |
17 | RIP CURRENT | 0.82 | 529.00 |
18 | DUST STORM | 4.27 | 440.00 |
19 | TROPICAL STORM | 0.91 | 383.00 |
20 | DENSE FOG | 4.62 | 342.00 |
par(mar=c(12, 4, 4, 2))
with(iTot, plot(cleanEv, injuryTotal/1000, type="l", las=2, main="Top 20 weather events having the highest total number of injuries", ylab="Total number of injuries (x1000)" ))
byEventInjury <- arrange(byEventInjury, desc(injuryMean))
itMean <- as.data.frame(byEventInjury[1:20,])
itMean$cleanEv <- droplevels(itMean$cleanEv)
itMean$cleanEv <- factor(itMean$cleanEv, levels=itMean$cleanEv[order(-itMean$injuryMean)])
xtMean <- xtable(itMean)
print(xtMean, type = "html")
cleanEv | injuryMean | injuryTotal | |
---|---|---|---|
1 | HEAT | 9.96 | 2699.00 |
2 | EXCESSIVE HEAT | 9.20 | 6525.00 |
3 | HURRICANE/TYPHOON | 5.75 | 1333.00 |
4 | DENSE FOG | 4.62 | 342.00 |
5 | DUST STORM | 4.27 | 440.00 |
6 | BLIZZARD | 3.16 | 805.00 |
7 | ICE STORM | 2.79 | 1992.00 |
8 | TORNADO | 2.29 | 91364.00 |
9 | WILDFIRE | 1.28 | 1606.00 |
10 | WATERSPOUT | 1.12 | 72.00 |
11 | HIGH SURF | 1.09 | 204.00 |
12 | WINTER WEATHER | 0.98 | 538.00 |
13 | TROPICAL STORM | 0.91 | 383.00 |
14 | WINTER STORM | 0.89 | 1353.00 |
15 | RIP CURRENT | 0.82 | 529.00 |
16 | EXTREME COLD/WIND CHILL | 0.82 | 255.00 |
17 | HEAVY SNOW | 0.74 | 1034.00 |
18 | FLOOD | 0.64 | 6795.00 |
19 | AVALANCHE | 0.63 | 170.00 |
20 | MARINE STRONG WIND | 0.48 | 22.00 |
par(mar=c(12, 4, 4, 2))
with(itMean, plot(cleanEv, injuryMean, type="l", las=2, main="Top 20 weather events having the highest average number of injuries", ylab="Average number of injuries" ))
Summing up total number of injuries for each weather event we observe that in total tornados cause the most injuries in the US. However, when we look at average number of injuries per weather event, heat it the leading cause of injury.
byEventFatality <- datClean %>%
group_by(cleanEv) %>%
summarize(fatalMean=mean(FATALITIES),
fatalTotal=sum(FATALITIES)) %>%
arrange(desc(fatalTotal))
fTot <- as.data.frame(byEventFatality[1:20,])
fTot$cleanEv <- droplevels(fTot$cleanEv)
fTot$cleanEv <- factor(fTot$cleanEv, levels=fTot$cleanEv[order(-fTot$fatalTotal)])
xtfTot <- xtable(fTot)
print(xtfTot, type = "html")
cleanEv | fatalMean | fatalTotal | |
---|---|---|---|
1 | TORNADO | 0.14 | 5658.00 |
2 | EXCESSIVE HEAT | 2.71 | 1922.00 |
3 | HEAT | 4.49 | 1216.00 |
4 | FLASH FLOOD | 0.05 | 1035.00 |
5 | LIGHTNING | 0.06 | 816.00 |
6 | RIP CURRENT | 0.90 | 577.00 |
7 | FLOOD | 0.05 | 483.00 |
8 | HIGH WIND | 0.05 | 297.00 |
9 | EXTREME COLD/WIND CHILL | 0.92 | 287.00 |
10 | AVALANCHE | 0.84 | 225.00 |
11 | WINTER STORM | 0.14 | 217.00 |
12 | THUNDERSTORM WIND | 0.00 | 199.00 |
13 | COLD/WIND CHILL | 1.14 | 158.00 |
14 | HIGH SURF | 0.78 | 146.00 |
15 | HURRICANE/TYPHOON | 0.57 | 133.00 |
16 | HEAVY SNOW | 0.09 | 127.00 |
17 | STRONG WIND | 0.03 | 111.00 |
18 | BLIZZARD | 0.40 | 101.00 |
19 | HEAVY RAIN | 0.09 | 99.00 |
20 | WILDFIRE | 0.07 | 90.00 |
par(mar=c(12, 4, 4, 2))
with(fTot, plot(cleanEv, fatalTotal/1000, type="l", las=2, main="Top 20 weather events having the highest total number of fatalities", ylab="Total number of fatalities (x1000)" ))
byEventFatality <- arrange(byEventFatality, desc(fatalMean))
fMean <- as.data.frame(byEventFatality[1:20,])
fMean$cleanEv <- droplevels(fMean$cleanEv)
fMean$cleanEv <- factor(fMean$cleanEv, levels=fMean$cleanEv[order(-fMean$fatalMean)])
xtfMean <- xtable(fMean)
print(xtfMean, type = "html")
cleanEv | fatalMean | fatalTotal | |
---|---|---|---|
1 | HEAT | 4.49 | 1216.00 |
2 | EXCESSIVE HEAT | 2.71 | 1922.00 |
3 | COLD/WIND CHILL | 1.14 | 158.00 |
4 | EXTREME COLD/WIND CHILL | 0.92 | 287.00 |
5 | RIP CURRENT | 0.90 | 577.00 |
6 | AVALANCHE | 0.84 | 225.00 |
7 | HIGH SURF | 0.78 | 146.00 |
8 | HURRICANE/TYPHOON | 0.57 | 133.00 |
9 | BLIZZARD | 0.40 | 101.00 |
10 | MARINE STRONG WIND | 0.30 | 14.00 |
11 | DENSE FOG | 0.24 | 18.00 |
12 | DUST STORM | 0.21 | 22.00 |
13 | SLEET | 0.20 | 2.00 |
14 | TROPICAL STORM | 0.16 | 66.00 |
15 | WINTER STORM | 0.14 | 217.00 |
16 | TORNADO | 0.14 | 5658.00 |
17 | MARINE THUNDERSTORM WIND | 0.13 | 19.00 |
18 | ICE STORM | 0.12 | 89.00 |
19 | WINTER WEATHER | 0.11 | 61.00 |
20 | STORM SURGE/TIDE | 0.11 | 24.00 |
par(mar=c(12, 4, 4, 2))
with(fMean, plot(cleanEv, fatalMean, type="l", las=2, main="Top 20 weather events having the highest average number of fatalities", ylab="Average number of fatalities" ))
Again we observe that out of all weather events tornados cause the most fatalities in total across US. However, when we look at average number of fatalities per weather event, heat it the leading cause of death.
byEventPropDMG <- datClean %>%
group_by(cleanEv) %>%
summarize(propDmgMean=round(mean(ProperityDamage)/(10^6), digits=2),
propDmgTotal = round(sum(ProperityDamage)/(10^9), digits=2)) %>%
arrange(desc(propDmgTotal))
pTot <- as.data.frame(byEventPropDMG[1:20,])
pTot$cleanEv <- droplevels(pTot$cleanEv)
pTot$cleanEv <- factor(pTot$cleanEv, levels=pTot$cleanEv[order(-pTot$propDmgTotal)])
xtpTot <- xtable(pTot)
print(xtpTot, type = "html")
cleanEv | propDmgMean | propDmgTotal | |
---|---|---|---|
1 | FLOOD | 14.13 | 150.18 |
2 | HURRICANE/TYPHOON | 367.48 | 85.26 |
3 | TORNADO | 1.47 | 58.55 |
4 | STORM SURGE/TIDE | 214.13 | 47.96 |
5 | FLASH FLOOD | 0.81 | 17.59 |
6 | HAIL | 0.60 | 16.02 |
7 | WILDFIRE | 6.78 | 8.49 |
8 | TROPICAL STORM | 18.32 | 7.71 |
9 | WINTER STORM | 4.46 | 6.75 |
10 | HIGH WIND | 0.98 | 6.11 |
11 | THUNDERSTORM WIND | 0.10 | 5.43 |
12 | ICE STORM | 5.53 | 3.95 |
13 | HEAVY RAIN | 2.82 | 3.23 |
14 | DROUGHT | 3.93 | 1.05 |
15 | HEAVY SNOW | 0.68 | 0.95 |
16 | LIGHTNING | 0.07 | 0.93 |
17 | BLIZZARD | 2.59 | 0.66 |
18 | COASTAL FLOOD | 1.78 | 0.41 |
19 | STRONG WIND | 0.05 | 0.18 |
20 | HIGH SURF | 0.60 | 0.11 |
par(mar=c(12, 4, 4, 2))
with(pTot, plot(cleanEv, propDmgTotal, type="l", las=2, main="Top 20 weather events having the highest cost in properity damage", ylab="Total damage (Bln US $)"))
byEventPropDMG <- arrange(byEventPropDMG, desc(propDmgMean))
pMean <- as.data.frame(byEventPropDMG[1:20,])
pMean$cleanEv <- droplevels(pMean$cleanEv)
pMean$cleanEv <- factor(pMean$cleanEv, levels=pMean$cleanEv[order(-pMean$propDmgMean)])
xtpMean <- xtable(pMean)
print(xtpMean, type = "html")
cleanEv | propDmgMean | propDmgTotal | |
---|---|---|---|
1 | HURRICANE/TYPHOON | 367.48 | 85.26 |
2 | STORM SURGE/TIDE | 214.13 | 47.96 |
3 | TROPICAL STORM | 18.32 | 7.71 |
4 | FLOOD | 14.13 | 150.18 |
5 | WILDFIRE | 6.78 | 8.49 |
6 | ICE STORM | 5.53 | 3.95 |
7 | WINTER STORM | 4.46 | 6.75 |
8 | DROUGHT | 3.93 | 1.05 |
9 | HEAVY RAIN | 2.82 | 3.23 |
10 | BLIZZARD | 2.59 | 0.66 |
11 | COASTAL FLOOD | 1.78 | 0.41 |
12 | LAKESHORE FLOOD | 1.51 | 0.01 |
13 | TORNADO | 1.47 | 58.55 |
14 | HIGH TIDE | 1.18 | 0.01 |
15 | HIGH WIND | 0.98 | 6.11 |
16 | WATERSPOUT | 0.95 | 0.06 |
17 | FLASH FLOOD | 0.81 | 17.59 |
18 | HEAVY SNOW | 0.68 | 0.95 |
19 | HAIL | 0.60 | 16.02 |
20 | HIGH SURF | 0.60 | 0.11 |
par(mar=c(12, 4, 4, 2))
with(pMean, plot(cleanEv, propDmgMean, type="l", las=2, main="Top 20 weather events having the highest average cost in properity damage", ylab="Average damage (Mln US$)"))
Summing up total cost in properity damage for each weather event we observe that tornados cause the most properity damage across the US. However, when we look at average cost of properity damage per weather event, huracaines/typhoons cause the most properity damage across US.
byEventCropDMG <- datClean %>%
group_by(cleanEv) %>%
summarize(cropDmgMean=round(mean(CropDamage)/(10^6), digits=2),
cropDmgTotal = round(sum(CropDamage)/(10^9), digits=2)) %>%
arrange(desc(cropDmgTotal))
cTot <- as.data.frame(byEventCropDMG[1:20,])
cTot$cleanEv <- droplevels(cTot$cleanEv)
cTot$cleanEv <- factor(cTot$cleanEv, levels=cTot$cleanEv[order(-cTot$cropDmgTotal)])
xtcTot <- xtable(cTot)
print(xtcTot, type = "html")
cleanEv | cropDmgMean | cropDmgTotal | |
---|---|---|---|
1 | DROUGHT | 52.53 | 13.97 |
2 | FLOOD | 1.02 | 10.85 |
3 | HURRICANE/TYPHOON | 23.73 | 5.51 |
4 | ICE STORM | 7.04 | 5.02 |
5 | HAIL | 0.12 | 3.11 |
6 | FROST/FREEZE | 12.88 | 2.00 |
7 | FLASH FLOOD | 0.07 | 1.53 |
8 | EXTREME COLD/WIND CHILL | 4.22 | 1.31 |
9 | HEAVY RAIN | 0.69 | 0.79 |
10 | HIGH WIND | 0.11 | 0.70 |
11 | TROPICAL STORM | 1.65 | 0.69 |
12 | THUNDERSTORM WIND | 0.01 | 0.63 |
13 | EXCESSIVE HEAT | 0.69 | 0.49 |
14 | TORNADO | 0.01 | 0.42 |
15 | HEAT | 1.52 | 0.41 |
16 | WILDFIRE | 0.32 | 0.40 |
17 | HEAVY SNOW | 0.10 | 0.13 |
18 | BLIZZARD | 0.44 | 0.11 |
19 | COLD/WIND CHILL | 0.48 | 0.07 |
20 | STRONG WIND | 0.02 | 0.07 |
par(mar=c(12, 4, 4, 2))
with(cTot, plot(cleanEv, cropDmgTotal, type="l", las=2, main="Top 20 weather events having the highest total cost due to crop damage", ylab="Total damage (Bln US $)"))
byEventCropDMG <- arrange(byEventCropDMG, desc(cropDmgMean))
cMean <- as.data.frame(byEventCropDMG[1:20,])
cMean$cleanEv <- droplevels(cMean$cleanEv)
cMean$cleanEv <- factor(cMean$cleanEv, levels=cMean$cleanEv[order(-cMean$cropDmgMean)])
xtcMean <- xtable(cMean)
print(xtcMean, type = "html")
cleanEv | cropDmgMean | cropDmgTotal | |
---|---|---|---|
1 | DROUGHT | 52.53 | 13.97 |
2 | HURRICANE/TYPHOON | 23.73 | 5.51 |
3 | FROST/FREEZE | 12.88 | 2.00 |
4 | ICE STORM | 7.04 | 5.02 |
5 | EXTREME COLD/WIND CHILL | 4.22 | 1.31 |
6 | TROPICAL STORM | 1.65 | 0.69 |
7 | HEAT | 1.52 | 0.41 |
8 | FLOOD | 1.02 | 10.85 |
9 | HEAVY RAIN | 0.69 | 0.79 |
10 | EXCESSIVE HEAT | 0.69 | 0.49 |
11 | COLD/WIND CHILL | 0.48 | 0.07 |
12 | BLIZZARD | 0.44 | 0.11 |
13 | WILDFIRE | 0.32 | 0.40 |
14 | HAIL | 0.12 | 3.11 |
15 | HIGH WIND | 0.11 | 0.70 |
16 | HEAVY SNOW | 0.10 | 0.13 |
17 | FLASH FLOOD | 0.07 | 1.53 |
18 | WINTER WEATHER | 0.03 | 0.02 |
19 | DUST STORM | 0.03 | 0.00 |
20 | STRONG WIND | 0.02 | 0.07 |
par(mar=c(12, 4, 4, 2))
with(cMean, plot(cleanEv, cropDmgMean, type="l", las=2, main="Top 20 weather events having the highest average cost due to crop damage", ylab="Average damage (Mln US $)"))
Drought causes the most damage to crop across US.