SYNOPSIS

This raport summarizes exploration of the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage. The aim of this exploration is to determine which types of weather events are most harmful accross the United States with respect to:

  1. population health
  2. economic consequences

DATA IMPORT AND PROCESSING

The data was downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 on January 23, 2015.

Data import

if(!file.exists("./data")){dir.create("./data")}
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
if(!file.exists("./data/stormData.csv.bz2")){download.file(url, destfile = "./data/stormData.csv.bz2")}
(files <- list.files("./data/", full.names=T))
## [1] "./data/stormData.csv.bz2"
dat <- read.csv(files[1])
### load required packages
if(!("dplyr" %in% rownames(installed.packages()))) {
    install.packages("dplyr")
}
require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!("xtable" %in% rownames(installed.packages()))) {
    install.packages("xtable")
}
require(xtable)
## Loading required package: xtable

Data processing

The original data has 902297 observations of 37 variables. However, upon initial exploration it was determined that only some variables were needed for the analysis. These were:

    - event type (EVTYPE)
    - FATALITIES
    - INJURIES
    - properity damage (PROPDMG)
    - Order of magnitude for properity damage (PROPDMGEXP)
    - crop damage (CROPDMG)
    - Order of magnitude for crop damage (CROPDMGEXP)
    

Subset the variables of interest

dat2 <- select(dat, EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP)
rm(dat)

We are also only interested in observations that have more than 0 injuries/fatalities/crop or properity damage. So select only those observations

dat3 <- filter(dat2, FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0)
str(dat3)
## 'data.frame':    254633 obs. of  7 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
rm(dat2)

Clean up event type variable

The event type (EVTYPE) has 985 types of weather events. However, the official NOAA NCDC publication there should be 48 such events (Table 2.1.1). So first clean up the event type variable.

### first change everything to upper case
dat3$EVTYPE <- as.factor(toupper(dat3$EVTYPE))
nlevels(dat3$EVTYPE)
## [1] 447
### create new variable for cleaned event
dat3 <- mutate(dat3, cleanEv = "other")
dat3$cleanEv[grep(".*UR.*ICAN.*", dat3$EVTYPE)] <- "HURRICANE/TYPHOON"
dat3$cleanEv[grep("TYPHOON", dat3$EVTYPE)] <- "HURRICANE/TYPHOON"
dat3$cleanEv[grep(".LOW TIDE*.*", dat3$EVTYPE)] <- "LOW TIDE"
dat3$cleanEv[grep(".HIGH TIDE.*", dat3$EVTYPE)] <- "HIGH TIDE"
dat3$cleanEv[grep("AVALAN", dat3$EVTYPE)] <- "AVALANCHE"
dat3$cleanEv[grep("BLIZ", dat3$EVTYPE)] <- "BLIZZARD"
dat3$cleanEv[grep("FLOOD", dat3$EVTYPE)] <- "FLOOD"
dat3$cleanEv[grep("FLASH FLOOD", dat3$EVTYPE)] <- "FLASH FLOOD"
dat3$cleanEv[grep("^(COASTAL FLOOD)", dat3$EVTYPE)] <- "COASTAL FLOOD"
dat3$cleanEv[grep("^(LAKESHORE FLOOD)", dat3$EVTYPE)] <- "LAKESHORE FLOOD"
dat3$cleanEv[grep("^(COLD/WIND CHILL)|^COLD|^(WIND CHILL)", dat3$EVTYPE)] <- "COLD/WIND CHILL"
dat3$cleanEv[grep("^(EXTREME COLD/WIND CHILL)|^(EXTREME COLD)|^(EXTREME WIND CHILL)", dat3$EVTYPE)] <- "EXTREME COLD/WIND CHILL"
dat3$cleanEv[grep("FREEZ.* FOG", dat3$EVTYPE)] <- "FREEZING FOG"
dat3$cleanEv[grep("DENSE FOG", dat3$EVTYPE)] <- "DENSE FOG"
dat3$cleanEv[grep("DENSE SMOKE", dat3$EVTYPE)] <- "DENSE SMOKE"
dat3$cleanEv[grep("DROUGHT", dat3$EVTYPE)] <- "DROUGHT"
dat3$cleanEv[grep("DUST DEVIL", dat3$EVTYPE)] <- "DUST DEVIL"
dat3$cleanEv[grep("DUST STORM", dat3$EVTYPE)] <- "DUST STORM"
dat3$cleanEv[grep("HEAT", dat3$EVTYPE)] <- "HEAT"
dat3$cleanEv[grep("EXCES.* HEAT", dat3$EVTYPE)] <- "EXCESSIVE HEAT"
dat3$cleanEv[grep("FROST|FREEZE", dat3$EVTYPE)] <- "FROST/FREEZE"
dat3$cleanEv[grep("FUNNEL CLOUD", dat3$EVTYPE)] <- "FUNNEL CLOUD"
dat3$cleanEv[grep("HAIL", dat3$EVTYPE)] <- "HAIL"
dat3$cleanEv[grep("HEAVY RAIN", dat3$EVTYPE)] <- "HEAVY RAIN"
dat3$cleanEv[grep("HEAVY SNOW", dat3$EVTYPE)] <- "HEAVY SNOW"
dat3$cleanEv[grep("HIGH SURF", dat3$EVTYPE)] <- "HIGH SURF"
dat3$cleanEv[grep("HIGH WIND", dat3$EVTYPE)] <- "HIGH WIND"
dat3$cleanEv[grep("ICE STORM", dat3$EVTYPE)] <- "ICE STORM"
dat3$cleanEv[grep("LAKE.*EFFECT SNOW", dat3$EVTYPE)] <- "LAKE EFFECT SNOW"
dat3$cleanEv[grep("^LIGHTNING$", dat3$EVTYPE)] <- "LIGHTNING"
dat3$cleanEv[grep("MARINE HAIL", dat3$EVTYPE)] <- "MARINE HAIL"
dat3$cleanEv[grep("MARINE HIGH WIND", dat3$EVTYPE)] <- "MARINE HIGH WIND"
dat3$cleanEv[grep("STRONG WIND", dat3$EVTYPE)] <- "STRONG WIND"
dat3$cleanEv[grep("MARINE STRONG WIND", dat3$EVTYPE)] <- "MARINE STRONG WIND"
dat3$cleanEv[grep("THUNDE.*R.*STORM.* WIND.*", dat3$EVTYPE)] <- "THUNDERSTORM WIND"
dat3$cleanEv[grep("MARINE T", dat3$EVTYPE)] <- "MARINE THUNDERSTORM WIND"
dat3$cleanEv[grep("RIP", dat3$EVTYPE)] <- "RIP CURRENT"
dat3$cleanEv[grep("SEI", dat3$EVTYPE)] <- "SEICHE"
dat3$cleanEv[grep("SLE", dat3$EVTYPE)] <- "SLEET"
dat3$cleanEv[grep("STORM S.*", dat3$EVTYPE)] <- "STORM SURGE/TIDE"
dat3$cleanEv[grep("TORN", dat3$EVTYPE)] <- "TORNADO"
dat3$cleanEv[grep("WATERSP", dat3$EVTYPE)] <- "WATERSPOUT"
dat3$cleanEv[grep("TROPICAL S", dat3$EVTYPE)] <- "TROPICAL STORM"
dat3$cleanEv[grep("TROPICAL D", dat3$EVTYPE)] <- "TROPICAL DEPRESSION"
dat3$cleanEv[grep("VOLCAN", dat3$EVTYPE)] <- "VOLCANIC ASH"
dat3$cleanEv[grep("WILD", dat3$EVTYPE)] <- "WILDFIRE"
dat3$cleanEv[grep("WINTER S", dat3$EVTYPE)] <- "WINTER STORM"
dat3$cleanEv[grep("WINTER W", dat3$EVTYPE)] <- "WINTER WEATHER"

dat3$cleanEv <- as.factor(dat3$cleanEv)
nlevels(dat3$cleanEv)
## [1] 48

Clean up damage variables

#clean up properity damage exponent variable
sum(is.na(dat3$PROPDMGEXP))
## [1] 0
levels(dat3$PROPDMGEXP)
##  [1] ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
dat3 <- mutate(dat3, propDmgExp="0")
dat3$propDmgExp[dat3$PROPDMGEXP=="0" | dat3$PROPDMGEXP==""] <- 1
dat3$propDmgExp[dat3$PROPDMGEXP=="2" | dat3$PROPDMGEXP=="h" | dat3$PROPDMGEXP=="H"] <- 2
dat3$propDmgExp[dat3$PROPDMGEXP=="3" | dat3$PROPDMGEXP=="K"] <- 3
dat3$propDmgExp[dat3$PROPDMGEXP=="4"] <- 4
dat3$propDmgExp[dat3$PROPDMGEXP=="5"] <- 5
dat3$propDmgExp[dat3$PROPDMGEXP=="6" | dat3$PROPDMGEXP=="m" | dat3$PROPDMGEXP=="M"] <- 6
dat3$propDmgExp[dat3$PROPDMGEXP=="7"] <- 7
dat3$propDmgExp[dat3$PROPDMGEXP=="B"] <- 9
dat3$propDmgExp <- as.numeric(dat3$propDmgExp)

#create new properity damage variable from the propdmg*10^propdmgexp
sum(is.na(dat3$PROPDMG))
## [1] 0
dat3 <- mutate(dat3, ProperityDamage=PROPDMG*10^propDmgExp)

##clean up crop damage exponent variable
sum(is.na(dat3$CROPDMGEXP))
## [1] 0
levels(dat3$CROPDMGEXP)
## [1] ""  "?" "0" "2" "B" "k" "K" "m" "M"
dat3 <- mutate(dat3, cropDmgExp="0")
dat3$cropDmgExp[dat3$CROPDMGEXP=="0" | dat3$CROPDMGEXP==""] <- 1
dat3$cropDmgExp[dat3$CROPDMGEXP=="2"] <- 2
dat3$cropDmgExp[dat3$CROPDMGEXP=="K" | dat3$CROPDMGEXP=="k"] <- 3
dat3$cropDmgExp[dat3$CROPDMGEXP=="m" | dat3$CROPDMGEXP=="M"] <- 6
dat3$cropDmgExp[dat3$CROPDMGEXP=="B"] <- 9
dat3$cropDmgExp <- as.numeric(dat3$cropDmgExp)

#create new crop damage variable from the cropdmg*10^cropdmgexp
sum(is.na(dat3$CROPDMG))
## [1] 0
dat3 <- mutate(dat3, CropDamage=CROPDMG*10^cropDmgExp)

Clean up fatality and injury variables

sum(is.na(dat3$FATALITIES))
## [1] 0
sum(is.na(dat3$INJURIES))
## [1] 0

The injury and fatalities variables look ok

Generate new data set containing only the clean variables

Additionally from the variable ‘cleanEv’ remove the ‘other’ category since it is a mix of, often unrelated, events.

datClean <- select(dat3, FATALITIES, INJURIES, cleanEv, ProperityDamage, CropDamage)
datClean <- filter(datClean, !cleanEv=="other")
str(datClean)
## 'data.frame':    189325 obs. of  5 variables:
##  $ FATALITIES     : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES       : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ cleanEv        : Factor w/ 48 levels "AVALANCHE","BLIZZARD",..: 41 41 41 41 41 41 41 41 41 41 ...
##  $ ProperityDamage: num  25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
##  $ CropDamage     : num  0 0 0 0 0 0 0 0 0 0 ...
summary(datClean)
##    FATALITIES          INJURIES                      cleanEv     
##  Min.   :  0.0000   Min.   :   0.0000   THUNDERSTORM WIND:55892  
##  1st Qu.:  0.0000   1st Qu.:   0.0000   TORNADO          :39962  
##  Median :  0.0000   Median :   0.0000   HAIL             :26606  
##  Mean   :  0.0753   Mean   :   0.6953   FLASH FLOOD      :21597  
##  3rd Qu.:  0.0000   3rd Qu.:   0.0000   LIGHTNING        :13293  
##  Max.   :583.0000   Max.   :1700.0000   FLOOD            :10630  
##                                         (Other)          :21345  
##  ProperityDamage       CropDamage       
##  Min.   :0.000e+00   Min.   :0.000e+00  
##  1st Qu.:2.000e+03   1st Qu.:0.000e+00  
##  Median :1.000e+04   Median :0.000e+00  
##  Mean   :2.228e+06   Mean   :2.551e+05  
##  3rd Qu.:5.000e+04   3rd Qu.:0.000e+00  
##  Max.   :1.150e+11   Max.   :5.000e+09  
## 
rm(dat3)

RESULTS

Across the United States which weather events cause the most damage to public health?

byEventInjury <- datClean %>%
        group_by(cleanEv) %>%
        summarize(injuryMean=mean(INJURIES),
                  injuryTotal=sum(INJURIES)) %>%
        arrange(desc(injuryTotal))

Top 20 weather events having the highest total number of injuries across US

iTot <- as.data.frame(byEventInjury[1:20,])
iTot$cleanEv <- droplevels(iTot$cleanEv)
iTot$cleanEv  <- factor(iTot$cleanEv, levels=iTot$cleanEv[order(-iTot$injuryTotal)])
xtTot <- xtable(iTot)
print(xtTot, type = "html")
cleanEv injuryMean injuryTotal
1 TORNADO 2.29 91364.00
2 FLOOD 0.64 6795.00
3 EXCESSIVE HEAT 9.20 6525.00
4 LIGHTNING 0.39 5230.00
5 HEAT 9.96 2699.00
6 THUNDERSTORM WIND 0.04 2413.00
7 ICE STORM 2.79 1992.00
8 FLASH FLOOD 0.08 1800.00
9 WILDFIRE 1.28 1606.00
10 HIGH WIND 0.24 1507.00
11 HAIL 0.06 1466.00
12 WINTER STORM 0.89 1353.00
13 HURRICANE/TYPHOON 5.75 1333.00
14 HEAVY SNOW 0.74 1034.00
15 BLIZZARD 3.16 805.00
16 WINTER WEATHER 0.98 538.00
17 RIP CURRENT 0.82 529.00
18 DUST STORM 4.27 440.00
19 TROPICAL STORM 0.91 383.00
20 DENSE FOG 4.62 342.00
par(mar=c(12, 4, 4, 2))
with(iTot, plot(cleanEv, injuryTotal/1000, type="l", las=2, main="Top 20 weather events having the highest total number of injuries", ylab="Total number of injuries (x1000)" ))

Top 20 weather events having the highest average number of injuries

byEventInjury <- arrange(byEventInjury, desc(injuryMean))
itMean <- as.data.frame(byEventInjury[1:20,])
itMean$cleanEv <- droplevels(itMean$cleanEv)
itMean$cleanEv  <- factor(itMean$cleanEv, levels=itMean$cleanEv[order(-itMean$injuryMean)])
xtMean <- xtable(itMean)
print(xtMean, type = "html")
cleanEv injuryMean injuryTotal
1 HEAT 9.96 2699.00
2 EXCESSIVE HEAT 9.20 6525.00
3 HURRICANE/TYPHOON 5.75 1333.00
4 DENSE FOG 4.62 342.00
5 DUST STORM 4.27 440.00
6 BLIZZARD 3.16 805.00
7 ICE STORM 2.79 1992.00
8 TORNADO 2.29 91364.00
9 WILDFIRE 1.28 1606.00
10 WATERSPOUT 1.12 72.00
11 HIGH SURF 1.09 204.00
12 WINTER WEATHER 0.98 538.00
13 TROPICAL STORM 0.91 383.00
14 WINTER STORM 0.89 1353.00
15 RIP CURRENT 0.82 529.00
16 EXTREME COLD/WIND CHILL 0.82 255.00
17 HEAVY SNOW 0.74 1034.00
18 FLOOD 0.64 6795.00
19 AVALANCHE 0.63 170.00
20 MARINE STRONG WIND 0.48 22.00
par(mar=c(12, 4, 4, 2))
with(itMean, plot(cleanEv, injuryMean, type="l", las=2, main="Top 20 weather events having the highest average number of injuries", ylab="Average number of injuries" ))

Summing up total number of injuries for each weather event we observe that in total tornados cause the most injuries in the US. However, when we look at average number of injuries per weather event, heat it the leading cause of injury.

byEventFatality <- datClean %>%
        group_by(cleanEv) %>%
        summarize(fatalMean=mean(FATALITIES),
                  fatalTotal=sum(FATALITIES)) %>%
        arrange(desc(fatalTotal))

Top 20 weather events having the highest total number of fatalities across US

fTot <- as.data.frame(byEventFatality[1:20,])
fTot$cleanEv <- droplevels(fTot$cleanEv)
fTot$cleanEv  <- factor(fTot$cleanEv, levels=fTot$cleanEv[order(-fTot$fatalTotal)])
xtfTot <- xtable(fTot)
print(xtfTot, type = "html")
cleanEv fatalMean fatalTotal
1 TORNADO 0.14 5658.00
2 EXCESSIVE HEAT 2.71 1922.00
3 HEAT 4.49 1216.00
4 FLASH FLOOD 0.05 1035.00
5 LIGHTNING 0.06 816.00
6 RIP CURRENT 0.90 577.00
7 FLOOD 0.05 483.00
8 HIGH WIND 0.05 297.00
9 EXTREME COLD/WIND CHILL 0.92 287.00
10 AVALANCHE 0.84 225.00
11 WINTER STORM 0.14 217.00
12 THUNDERSTORM WIND 0.00 199.00
13 COLD/WIND CHILL 1.14 158.00
14 HIGH SURF 0.78 146.00
15 HURRICANE/TYPHOON 0.57 133.00
16 HEAVY SNOW 0.09 127.00
17 STRONG WIND 0.03 111.00
18 BLIZZARD 0.40 101.00
19 HEAVY RAIN 0.09 99.00
20 WILDFIRE 0.07 90.00
par(mar=c(12, 4, 4, 2))
with(fTot, plot(cleanEv, fatalTotal/1000, type="l", las=2, main="Top 20 weather events having the highest total number of fatalities", ylab="Total number of fatalities (x1000)" ))

Top 20 weather events having the highest average number of fatalities

byEventFatality <- arrange(byEventFatality, desc(fatalMean))
fMean <- as.data.frame(byEventFatality[1:20,])
fMean$cleanEv <- droplevels(fMean$cleanEv)
fMean$cleanEv  <- factor(fMean$cleanEv, levels=fMean$cleanEv[order(-fMean$fatalMean)])
xtfMean <- xtable(fMean)
print(xtfMean, type = "html")
cleanEv fatalMean fatalTotal
1 HEAT 4.49 1216.00
2 EXCESSIVE HEAT 2.71 1922.00
3 COLD/WIND CHILL 1.14 158.00
4 EXTREME COLD/WIND CHILL 0.92 287.00
5 RIP CURRENT 0.90 577.00
6 AVALANCHE 0.84 225.00
7 HIGH SURF 0.78 146.00
8 HURRICANE/TYPHOON 0.57 133.00
9 BLIZZARD 0.40 101.00
10 MARINE STRONG WIND 0.30 14.00
11 DENSE FOG 0.24 18.00
12 DUST STORM 0.21 22.00
13 SLEET 0.20 2.00
14 TROPICAL STORM 0.16 66.00
15 WINTER STORM 0.14 217.00
16 TORNADO 0.14 5658.00
17 MARINE THUNDERSTORM WIND 0.13 19.00
18 ICE STORM 0.12 89.00
19 WINTER WEATHER 0.11 61.00
20 STORM SURGE/TIDE 0.11 24.00
par(mar=c(12, 4, 4, 2))
with(fMean, plot(cleanEv, fatalMean, type="l", las=2, main="Top 20 weather events having the highest average number of fatalities", ylab="Average number of fatalities" ))

Again we observe that out of all weather events tornados cause the most fatalities in total across US. However, when we look at average number of fatalities per weather event, heat it the leading cause of death.

Across the United States which weather events cause the most economic damage?

byEventPropDMG <- datClean %>%
        group_by(cleanEv) %>%
        summarize(propDmgMean=round(mean(ProperityDamage)/(10^6), digits=2),
                  propDmgTotal = round(sum(ProperityDamage)/(10^9), digits=2)) %>%
        arrange(desc(propDmgTotal))

Top 20 weather events having the highest total cost in properity damage across US (in billions of US dollars)

pTot <- as.data.frame(byEventPropDMG[1:20,])
pTot$cleanEv <- droplevels(pTot$cleanEv)
pTot$cleanEv  <- factor(pTot$cleanEv, levels=pTot$cleanEv[order(-pTot$propDmgTotal)])
xtpTot <- xtable(pTot)
print(xtpTot, type = "html")
cleanEv propDmgMean propDmgTotal
1 FLOOD 14.13 150.18
2 HURRICANE/TYPHOON 367.48 85.26
3 TORNADO 1.47 58.55
4 STORM SURGE/TIDE 214.13 47.96
5 FLASH FLOOD 0.81 17.59
6 HAIL 0.60 16.02
7 WILDFIRE 6.78 8.49
8 TROPICAL STORM 18.32 7.71
9 WINTER STORM 4.46 6.75
10 HIGH WIND 0.98 6.11
11 THUNDERSTORM WIND 0.10 5.43
12 ICE STORM 5.53 3.95
13 HEAVY RAIN 2.82 3.23
14 DROUGHT 3.93 1.05
15 HEAVY SNOW 0.68 0.95
16 LIGHTNING 0.07 0.93
17 BLIZZARD 2.59 0.66
18 COASTAL FLOOD 1.78 0.41
19 STRONG WIND 0.05 0.18
20 HIGH SURF 0.60 0.11
par(mar=c(12, 4, 4, 2))
with(pTot, plot(cleanEv, propDmgTotal, type="l", las=2, main="Top 20 weather events having the highest cost in properity damage", ylab="Total damage (Bln US $)"))

Top 20 weather events having the highest average cost of properity damage (in mln of US dollars)

byEventPropDMG <- arrange(byEventPropDMG, desc(propDmgMean))
pMean <- as.data.frame(byEventPropDMG[1:20,])
pMean$cleanEv <- droplevels(pMean$cleanEv)
pMean$cleanEv  <- factor(pMean$cleanEv, levels=pMean$cleanEv[order(-pMean$propDmgMean)])
xtpMean <- xtable(pMean)
print(xtpMean, type = "html")
cleanEv propDmgMean propDmgTotal
1 HURRICANE/TYPHOON 367.48 85.26
2 STORM SURGE/TIDE 214.13 47.96
3 TROPICAL STORM 18.32 7.71
4 FLOOD 14.13 150.18
5 WILDFIRE 6.78 8.49
6 ICE STORM 5.53 3.95
7 WINTER STORM 4.46 6.75
8 DROUGHT 3.93 1.05
9 HEAVY RAIN 2.82 3.23
10 BLIZZARD 2.59 0.66
11 COASTAL FLOOD 1.78 0.41
12 LAKESHORE FLOOD 1.51 0.01
13 TORNADO 1.47 58.55
14 HIGH TIDE 1.18 0.01
15 HIGH WIND 0.98 6.11
16 WATERSPOUT 0.95 0.06
17 FLASH FLOOD 0.81 17.59
18 HEAVY SNOW 0.68 0.95
19 HAIL 0.60 16.02
20 HIGH SURF 0.60 0.11
par(mar=c(12, 4, 4, 2))
with(pMean, plot(cleanEv, propDmgMean, type="l", las=2, main="Top 20 weather events having the highest average cost in properity damage", ylab="Average damage (Mln US$)"))

Summing up total cost in properity damage for each weather event we observe that tornados cause the most properity damage across the US. However, when we look at average cost of properity damage per weather event, huracaines/typhoons cause the most properity damage across US.

byEventCropDMG <- datClean %>%
        group_by(cleanEv) %>%
        summarize(cropDmgMean=round(mean(CropDamage)/(10^6), digits=2),
                  cropDmgTotal = round(sum(CropDamage)/(10^9), digits=2)) %>%
        arrange(desc(cropDmgTotal))

Top 20 weather events having the highest total cost in crop damage across US (in billions of US dollars)

cTot <- as.data.frame(byEventCropDMG[1:20,])
cTot$cleanEv <- droplevels(cTot$cleanEv)
cTot$cleanEv  <- factor(cTot$cleanEv, levels=cTot$cleanEv[order(-cTot$cropDmgTotal)])
xtcTot <- xtable(cTot)
print(xtcTot, type = "html")
cleanEv cropDmgMean cropDmgTotal
1 DROUGHT 52.53 13.97
2 FLOOD 1.02 10.85
3 HURRICANE/TYPHOON 23.73 5.51
4 ICE STORM 7.04 5.02
5 HAIL 0.12 3.11
6 FROST/FREEZE 12.88 2.00
7 FLASH FLOOD 0.07 1.53
8 EXTREME COLD/WIND CHILL 4.22 1.31
9 HEAVY RAIN 0.69 0.79
10 HIGH WIND 0.11 0.70
11 TROPICAL STORM 1.65 0.69
12 THUNDERSTORM WIND 0.01 0.63
13 EXCESSIVE HEAT 0.69 0.49
14 TORNADO 0.01 0.42
15 HEAT 1.52 0.41
16 WILDFIRE 0.32 0.40
17 HEAVY SNOW 0.10 0.13
18 BLIZZARD 0.44 0.11
19 COLD/WIND CHILL 0.48 0.07
20 STRONG WIND 0.02 0.07
par(mar=c(12, 4, 4, 2))
with(cTot, plot(cleanEv, cropDmgTotal, type="l", las=2, main="Top 20 weather events having the highest total cost due to crop damage", ylab="Total damage (Bln US $)"))

Top 20 weather events having the highest average cost in crop damage across US (in millions of US dollars)

byEventCropDMG <- arrange(byEventCropDMG, desc(cropDmgMean))
cMean <- as.data.frame(byEventCropDMG[1:20,])
cMean$cleanEv <- droplevels(cMean$cleanEv)
cMean$cleanEv  <- factor(cMean$cleanEv, levels=cMean$cleanEv[order(-cMean$cropDmgMean)])
xtcMean <- xtable(cMean)
print(xtcMean, type = "html")
cleanEv cropDmgMean cropDmgTotal
1 DROUGHT 52.53 13.97
2 HURRICANE/TYPHOON 23.73 5.51
3 FROST/FREEZE 12.88 2.00
4 ICE STORM 7.04 5.02
5 EXTREME COLD/WIND CHILL 4.22 1.31
6 TROPICAL STORM 1.65 0.69
7 HEAT 1.52 0.41
8 FLOOD 1.02 10.85
9 HEAVY RAIN 0.69 0.79
10 EXCESSIVE HEAT 0.69 0.49
11 COLD/WIND CHILL 0.48 0.07
12 BLIZZARD 0.44 0.11
13 WILDFIRE 0.32 0.40
14 HAIL 0.12 3.11
15 HIGH WIND 0.11 0.70
16 HEAVY SNOW 0.10 0.13
17 FLASH FLOOD 0.07 1.53
18 WINTER WEATHER 0.03 0.02
19 DUST STORM 0.03 0.00
20 STRONG WIND 0.02 0.07
par(mar=c(12, 4, 4, 2))
with(cMean, plot(cleanEv, cropDmgMean, type="l", las=2, main="Top 20 weather events having the highest average cost due to crop damage", ylab="Average damage (Mln US $)"))

Drought causes the most damage to crop across US.