Set knitr options

knitr::opts_chunk$set(echo = TRUE)

Synopsis

Severe weather events can cause great harm, both to public health and economic problems.Identifying which causes have the greatest impact can help to show where efforts should be directed.
This study investigates severe weather events in the US, based on the National Oceanic and Atmospheric Administration (NOAA) time series, as well as information on personal injuries and property damage. Data from 1996 were included in this study.
The results show that Tornados are the severe weather event with the greatest impact on public health, while Floods cause the biggest economical damages.

Data Processing

Loading Packages

library(dplyr)
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lubridate)
## Warning: package 'lubridate' was built under R version 4.0.5
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(ggplot2)

Download the dataset

filename <- "repdata_data_StormData.csv.zip"

Checking if archive already exists.

if (!file.exists(filename)){
  fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(url = fileURL, destfile = "StormData")
  # Exit if the file is not available
     if (!file.exists("StormData")) {
          stop("Can't locate file 'StormData'!")
     }
}  

The source data file is downloaded from https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2.

Loading and preprocessing the data

StormData <- read.csv(bzfile("StormData"),sep = ",",header=TRUE)

Structure of the dataset

str(StormData)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

There are 902.297 observations with 37 variables in the file.

Only a subset is required for the analysis.
1. Relevant for the analysis are:
- the date (BGN_DATE);
- event type (EVTYPE);
- counter for the health impact (FATALITIES and INJURIES);
- monetary impact on crop and property (PROPDMG and CROPDMG) as well as their corresponding exponents (PROPDMGEXP and CROPDMGEXP).
2. According to the NOAA the full set of wheather events (48 event types) is available since 1996. Between 1950 and 1995 only a subset (Tornado, Thunderstorm Wind and Hail) of these events is available in the storm database. In order to have o comparable basis for the analysis the dataset is limited to the observations between 1996 and 2011.
3. The dataset contains a lot of observations without any information about health and/or economic damages. These observations are excluded from the analysis.

StormDataSetup <- select(StormData, BGN_DATE, EVTYPE, PROPDMG, PROPDMGEXP, 
                         CROPDMG, CROPDMGEXP, FATALITIES, INJURIES)


# Format the BGN_DATE variable as a date and extract YEAR
StormDataSetup$BGN_DATE <- as.Date(StormDataSetup$BGN_DATE, "%m/%d/%Y")
StormDataSetup$BGN_YEAR <- year(StormDataSetup$BGN_DATE)

# Tornado 1950 - 1954 - Excluded
# Tornado, Thunderstorm Wind, Hail 1955 - 1995 - Excluded
# Only use events since 1996
# 48 event types since 1996
StormDataSetup2 <- filter(StormDataSetup, BGN_YEAR >= 1996)

# Only use events with either health impact or economic damage
StormDataTidy <- filter(StormDataSetup2, 
                        PROPDMG > 0 | 
                          CROPDMG > 0 | 
                          FATALITIES > 0 | 
                          INJURIES > 0)

dim(StormDataTidy)
## [1] 201318      9
str(StormDataTidy)
## 'data.frame':    201318 obs. of  9 variables:
##  $ BGN_DATE  : Date, format: "1996-01-06" "1996-01-11" ...
##  $ EVTYPE    : chr  "WINTER STORM" "TORNADO" "TSTM WIND" "TSTM WIND" ...
##  $ PROPDMG   : num  380 100 3 5 2 400 12 8 12 75 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  38 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "K" "" "" "" ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ INJURIES  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_YEAR  : num  1996 1996 1996 1996 1996 ...
sum(is.na(StormDataTidy))
## [1] 0

The working dataset (StormDataTidy) contains 201.318 observations, 9 variables and no missing values.

Clean Dataset

Clean Event Type Data

There are a total of 222 unique Event Type values in the current tidy dataset.

length(unique(StormDataTidy$EVTYPE))
## [1] 222
unique(StormDataTidy$EVTYPE)
##   [1] "WINTER STORM"              "TORNADO"                  
##   [3] "TSTM WIND"                 "HIGH WIND"                
##   [5] "FLASH FLOOD"               "FREEZING RAIN"            
##   [7] "EXTREME COLD"              "LIGHTNING"                
##   [9] "HAIL"                      "FLOOD"                    
##  [11] "TSTM WIND/HAIL"            "EXCESSIVE HEAT"           
##  [13] "RIP CURRENTS"              "Other"                    
##  [15] "HEAVY SNOW"                "WILD/FOREST FIRE"         
##  [17] "ICE STORM"                 "BLIZZARD"                 
##  [19] "STORM SURGE"               "Ice jam flood (minor"     
##  [21] "DUST STORM"                "STRONG WIND"              
##  [23] "DUST DEVIL"                "Tstm Wind"                
##  [25] "URBAN/SML STREAM FLD"      "FOG"                      
##  [27] "ROUGH SURF"                "Heavy Surf"               
##  [29] "Dust Devil"                "HEAVY RAIN"               
##  [31] "Marine Accident"           "AVALANCHE"                
##  [33] "Freeze"                    "DRY MICROBURST"           
##  [35] "Strong Wind"               "WINDS"                    
##  [37] "COASTAL STORM"             "Erosion/Cstl Flood"       
##  [39] "River Flooding"            "WATERSPOUT"               
##  [41] "DAMAGING FREEZE"           "Damaging Freeze"          
##  [43] "HURRICANE"                 "TROPICAL STORM"           
##  [45] "Beach Erosion"             "High Surf"                
##  [47] "Heavy Rain/High Surf"      "Unseasonable Cold"        
##  [49] "Early Frost"               "Wintry Mix"               
##  [51] "Extreme Cold"              "DROUGHT"                  
##  [53] "Coastal Flooding"          "Torrential Rainfall"      
##  [55] "Landslump"                 "Hurricane Edouard"        
##  [57] "Coastal Storm"             "TIDAL FLOODING"           
##  [59] "Tidal Flooding"            "Strong Winds"             
##  [61] "EXTREME WINDCHILL"         "Glaze"                    
##  [63] "Extended Cold"             "Whirlwind"                
##  [65] "Heavy snow shower"         "Light snow"               
##  [67] "COASTAL FLOOD"             "Light Snow"               
##  [69] "MIXED PRECIP"              "COLD"                     
##  [71] "Freezing Spray"            "DOWNBURST"                
##  [73] "Mudslides"                 "Microburst"               
##  [75] "Mudslide"                  "Cold"                     
##  [77] "SNOW"                      "Coastal Flood"            
##  [79] "Snow Squalls"              "Wind Damage"              
##  [81] "Light Snowfall"            "Freezing Drizzle"         
##  [83] "Gusty wind/rain"           "GUSTY WIND/HVY RAIN"      
##  [85] "Wind"                      "Cold Temperature"         
##  [87] "Heat Wave"                 "Snow"                     
##  [89] "COLD AND SNOW"             "HEAVY SURF"               
##  [91] "RAIN/SNOW"                 "WIND"                     
##  [93] "FREEZE"                    "TSTM WIND (G45)"          
##  [95] "Gusty Winds"               "GUSTY WIND"               
##  [97] "TSTM WIND 40"              "TSTM WIND 45"             
##  [99] "HARD FREEZE"               "TSTM WIND (41)"           
## [101] "HEAT"                      "RIVER FLOOD"              
## [103] "TSTM WIND (G40)"           "RIP CURRENT"              
## [105] "HIGH SURF"                 "MUD SLIDE"                
## [107] "Frost/Freeze"              "SNOW AND ICE"             
## [109] "COASTAL FLOODING"          "AGRICULTURAL FREEZE"      
## [111] "WINTER WEATHER"            "STRONG WINDS"             
## [113] "SNOW SQUALL"               "ICY ROADS"                
## [115] "OTHER"                     "THUNDERSTORM"             
## [117] "Hypothermia/Exposure"      "HYPOTHERMIA/EXPOSURE"     
## [119] "Lake Effect Snow"          "Freezing Rain"            
## [121] "Mixed Precipitation"       "BLACK ICE"                
## [123] "COASTALSTORM"              "LIGHT SNOW"               
## [125] "DAM BREAK"                 "Gusty winds"              
## [127] "blowing snow"              "FREEZING DRIZZLE"         
## [129] "FROST"                     "GRADIENT WIND"            
## [131] "UNSEASONABLY COLD"         "GUSTY WINDS"              
## [133] "TSTM WIND AND LIGHTNING"   "gradient wind"            
## [135] "Gradient wind"             "Freezing drizzle"         
## [137] "WET MICROBURST"            "Heavy surf and wind"      
## [139] "FUNNEL CLOUD"              "TYPHOON"                  
## [141] "LANDSLIDES"                "HIGH SWELLS"              
## [143] "HIGH WINDS"                "SMALL HAIL"               
## [145] "UNSEASONAL RAIN"           "COASTAL FLOODING/EROSION" 
## [147] " TSTM WIND (G45)"          "TSTM WIND  (G45)"         
## [149] "HIGH WIND (G40)"           "TSTM WIND (G35)"          
## [151] "GLAZE"                     "COASTAL EROSION"          
## [153] "UNSEASONABLY WARM"         "SEICHE"                   
## [155] "COASTAL  FLOODING/EROSION" "HYPERTHERMIA/EXPOSURE"    
## [157] "WINTRY MIX"                "RIVER FLOODING"           
## [159] "ROCK SLIDE"                "GUSTY WIND/HAIL"          
## [161] "HEAVY SEAS"                " TSTM WIND"               
## [163] "LANDSPOUT"                 "RECORD HEAT"              
## [165] "EXCESSIVE SNOW"            "LAKE EFFECT SNOW"         
## [167] "FLOOD/FLASH/FLOOD"         "MIXED PRECIPITATION"      
## [169] "WIND AND WAVE"             "FLASH FLOOD/FLOOD"        
## [171] "LIGHT FREEZING RAIN"       "ICE ROADS"                
## [173] "HIGH SEAS"                 "RAIN"                     
## [175] "ROUGH SEAS"                "TSTM WIND G45"            
## [177] "NON-SEVERE WIND DAMAGE"    "WARM WEATHER"             
## [179] "THUNDERSTORM WIND (G40)"   "LANDSLIDE"                
## [181] "HIGH WATER"                " FLASH FLOOD"             
## [183] "LATE SEASON SNOW"          "WINTER WEATHER MIX"       
## [185] "ROGUE WAVE"                "FALLING SNOW/ICE"         
## [187] "NON-TSTM WIND"             "NON TSTM WIND"            
## [189] "MUDSLIDE"                  "BRUSH FIRE"               
## [191] "BLOWING DUST"              "VOLCANIC ASH"             
## [193] "   HIGH SURF ADVISORY"     "HAZARDOUS SURF"           
## [195] "WILDFIRE"                  "COLD WEATHER"             
## [197] "WHIRLWIND"                 "ICE ON ROAD"              
## [199] "SNOW SQUALLS"              "DROWNING"                 
## [201] "EXTREME COLD/WIND CHILL"   "MARINE TSTM WIND"         
## [203] "HURRICANE/TYPHOON"         "DENSE FOG"                
## [205] "WINTER WEATHER/MIX"        "FROST/FREEZE"             
## [207] "ASTRONOMICAL HIGH TIDE"    "HEAVY SURF/HIGH SURF"     
## [209] "TROPICAL DEPRESSION"       "LAKE-EFFECT SNOW"         
## [211] "MARINE HIGH WIND"          "THUNDERSTORM WIND"        
## [213] "TSUNAMI"                   "STORM SURGE/TIDE"         
## [215] "COLD/WIND CHILL"           "LAKESHORE FLOOD"          
## [217] "MARINE THUNDERSTORM WIND"  "MARINE STRONG WIND"       
## [219] "ASTRONOMICAL LOW TIDE"     "DENSE SMOKE"              
## [221] "MARINE HAIL"               "FREEZING FOG"

In the Event Type information we can see that there are many similar values, some with errors, others in the plural and mixed cases. For example, Strong Wind, STRONG WIND, Strong Winds, and STRONG WINDS.

The dataset was normalized by converting all Event Type values to uppercase and combining similar Event Type values into unique categories.

StormDataTidy$EVTYPE <- toupper(StormDataTidy$EVTYPE)
# AVALANCHE
StormDataTidy$EVTYPE <- gsub('.*AVALANCE.*', 'AVALANCHE', StormDataTidy$EVTYPE)
# BLIZZARD
StormDataTidy$EVTYPE <- gsub('.*BLIZZARD.*', 'BLIZZARD', StormDataTidy$EVTYPE)
# CLOUD
StormDataTidy$EVTYPE <- gsub('.*CLOUD.*', 'CLOUD', StormDataTidy$EVTYPE)
# COLD
StormDataTidy$EVTYPE <- gsub('.*COLD.*', 'COLD', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*FREEZ.*', 'COLD', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*FROST.*', 'COLD', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*ICE.*', 'COLD', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*LOW TEMPERATURE RECORD.*', 'COLD', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*LO.*TEMP.*', 'COLD', StormDataTidy$EVTYPE)
# DRY
StormDataTidy$EVTYPE <- gsub('.*DRY.*', 'DRY', StormDataTidy$EVTYPE)
# DUST
StormDataTidy$EVTYPE <- gsub('.*DUST.*', 'DUST', StormDataTidy$EVTYPE)
# FIRE
StormDataTidy$EVTYPE <- gsub('.*FIRE.*', 'FIRE', StormDataTidy$EVTYPE)
# FLOOD
StormDataTidy$EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', StormDataTidy$EVTYPE)
# FOG
StormDataTidy$EVTYPE <- gsub('.*FOG.*', 'FOG', StormDataTidy$EVTYPE)
# HAIL
StormDataTidy$EVTYPE <- gsub('.*HAIL.*', 'HAIL', StormDataTidy$EVTYPE)
# HEAT
StormDataTidy$EVTYPE <- gsub('.*HEAT.*', 'HEAT', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*WARM.*', 'HEAT', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*HIGH.*TEMP.*', 'HEAT', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*RECORD HIGH TEMPERATURES.*', 'HEAT', StormDataTidy$EVTYPE)
# HYPOTHERMIA/EXPOSURE
StormDataTidy$EVTYPE <- gsub('.*HYPOTHERMIA.*', 'HYPOTHERMIA/EXPOSURE', StormDataTidy$EVTYPE)
# LANDSLIDE
StormDataTidy$EVTYPE <- gsub('.*LANDSLIDE.*', 'LANDSLIDE', StormDataTidy$EVTYPE)
# LIGHTNING
StormDataTidy$EVTYPE <- gsub('^LIGHTNING.*', 'LIGHTNING', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('^LIGNTNING.*', 'LIGHTNING', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('^LIGHTING.*', 'LIGHTNING', StormDataTidy$EVTYPE)
# MICROBURST
StormDataTidy$EVTYPE <- gsub('.*MICROBURST.*', 'MICROBURST', StormDataTidy$EVTYPE)
# MUDSLIDE
StormDataTidy$EVTYPE <- gsub('.*MUDSLIDE.*', 'MUDSLIDE', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*MUD SLIDE.*', 'MUDSLIDE', StormDataTidy$EVTYPE)
# MIXED PRECIPITATION
StormDataTidy$EVTYPE <- gsub('.*PRECIP.*', 'MIXED PRECIPITATION', StormDataTidy$EVTYPE)
# RAIN
StormDataTidy$EVTYPE <- gsub('.*RAIN.*', 'RAIN', StormDataTidy$EVTYPE)
# RIP CURRENT
StormDataTidy$EVTYPE <- gsub('.*RIP CURRENT.*', 'RIP CURRENT', StormDataTidy$EVTYPE)
# STORM
StormDataTidy$EVTYPE <- gsub('.*STORM.*', 'STORM', StormDataTidy$EVTYPE)
# TORNADO
StormDataTidy$EVTYPE <- gsub('.*TORNADO.*', 'TORNADO', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*TORNDAO.*', 'TORNADO', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*LANDSPOUT.*', 'TORNADO', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*WATERSPOUT.*', 'TORNADO', StormDataTidy$EVTYPE)
# SURF
StormDataTidy$EVTYPE <- gsub('.*SURF.*', 'SURF', StormDataTidy$EVTYPE)
# VOLCANIC
StormDataTidy$EVTYPE <- gsub('.*VOLCANIC.*', 'VOLCANIC', StormDataTidy$EVTYPE)
# WET
StormDataTidy$EVTYPE <- gsub('.*WET.*', 'WET', StormDataTidy$EVTYPE)
# WIND
StormDataTidy$EVTYPE <- gsub('.*WIND.*', 'WIND', StormDataTidy$EVTYPE)
# WINTER
StormDataTidy$EVTYPE <- gsub('.*WINTER.*', 'WINTER', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*WINTRY.*', 'WINTER', StormDataTidy$EVTYPE)
StormDataTidy$EVTYPE <- gsub('.*SNOW.*', 'WINTER', StormDataTidy$EVTYPE)

After tidying the dataset, the number of unique Event Type values were reduced to 55.

length(unique(StormDataTidy$EVTYPE))
## [1] 55
unique(StormDataTidy$EVTYPE)
##  [1] "STORM"                  "TORNADO"                "WIND"                  
##  [4] "FLOOD"                  "COLD"                   "LIGHTNING"             
##  [7] "HAIL"                   "HEAT"                   "RIP CURRENT"           
## [10] "OTHER"                  "WINTER"                 "FIRE"                  
## [13] "BLIZZARD"               "DUST"                   "URBAN/SML STREAM FLD"  
## [16] "FOG"                    "SURF"                   "RAIN"                  
## [19] "MARINE ACCIDENT"        "AVALANCHE"              "DRY"                   
## [22] "HURRICANE"              "BEACH EROSION"          "DROUGHT"               
## [25] "LANDSLUMP"              "HURRICANE EDOUARD"      "GLAZE"                 
## [28] "MIXED PRECIPITATION"    "DOWNBURST"              "MUDSLIDE"              
## [31] "MICROBURST"             "ICY ROADS"              "HYPOTHERMIA/EXPOSURE"  
## [34] "DAM BREAK"              "CLOUD"                  "TYPHOON"               
## [37] "LANDSLIDE"              "HIGH SWELLS"            "COASTAL EROSION"       
## [40] "SEICHE"                 "HYPERTHERMIA/EXPOSURE"  "ROCK SLIDE"            
## [43] "HEAVY SEAS"             "HIGH SEAS"              "ROUGH SEAS"            
## [46] "HIGH WATER"             "ROGUE WAVE"             "VOLCANIC"              
## [49] "DROWNING"               "HURRICANE/TYPHOON"      "ASTRONOMICAL HIGH TIDE"
## [52] "TROPICAL DEPRESSION"    "TSUNAMI"                "ASTRONOMICAL LOW TIDE" 
## [55] "DENSE SMOKE"

Clean Economic Data

According to the “National Weather Service”, information about Property Damage is logged using two variables: PROPDMG and PROPDMGEXP. PROPDMG is the mantissa (the significant) rounded to three significant digits and PROPDMGEXP is the exponent (the multiplier).
The same approach is used for Crop Damage where the CROPDMG variable is encoded by the CROPDMGEXP variable.

The documentation also specifies that the PROPDMGEXP and CROPDMGEXP are supposed to contain an alphabetical character used to signify magnitude and logs “K” for thousands, “M” for millions, and “B” for billions.

table(toupper(StormDataTidy$PROPDMGEXP))
## 
##             B      K      M 
##   8448     32 185474   7364
table(toupper(StormDataTidy$CROPDMGEXP))
## 
##             B      K      M 
## 102767      2  96787   1762

As we can see, there is another option without any associated letter, for which I will not assume a multiplier.

In order to calculate costs, the PROPDMGEXP and CROPDMGEXP variables will be mapped to a multiplier factor which will then be used to calculate the actual costs for both property and crop damage. Two new variables will be created to store damage costs:

  • PROP_COST
  • CROP_COST
# Function to get Multiplier factor
Multiplier <- function(exp) {
    exp <- toupper(exp);
    if (exp == "")  return (10^0);
    if (exp == "K") return (10^3);
    if (exp == "M") return (10^6);
    if (exp == "B") return (10^9);
    return (NA);
}
# calculate property damage and crop damage costs (in billions)
StormDataTidy$PROP_COST <- with(StormDataTidy, 
                                as.numeric(PROPDMG) * sapply(PROPDMGEXP, Multiplier))/10^9
StormDataTidy$CROP_COST <- with(StormDataTidy, 
                                as.numeric(CROPDMG) * sapply(CROPDMGEXP, Multiplier))/10^9

New Variables

The distinction between Fatalities and Injuries is not important for the existing analysis. Therefore both variables are added to form a new variable HEALTHIMPACT.
A similar approach is used for the economic impact by adding the values of Crop and Property damages, and this is a new variable ECONOMICCOST.

StormDataTidy <- mutate(StormDataTidy, HEALTHIMPACT = FATALITIES + INJURIES)
StormDataTidy <- mutate(StormDataTidy, ECONOMICCOST = PROP_COST + CROP_COST)

Results

The cleaned data frame (StormDataTidy) is been aggregated per EVTYPE (top 10) and provided in a descending order in the new data frame HEALTHIMPACT_DESC.

HEALTHIMPACT_DESC <- StormDataTidy %>% 
                group_by(EVTYPE) %>% 
                summarise(HEALTHIMPACT = sum(HEALTHIMPACT)) %>% 
                arrange(desc(HEALTHIMPACT))
#HEALTHIMPACT_DESC[1:10,]
HEALTHIMPACT_G <- ggplot(HEALTHIMPACT_DESC[1:10,], 
                         aes(x=reorder(EVTYPE, -HEALTHIMPACT),y=HEALTHIMPACT)) + 
      geom_bar(stat="identity", fill="dodgerblue3") + 
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
      xlab("Event type") + ylab("Nr. of Fatalities and Injuries (Healh Impact") +
      theme(legend.position="none") +
      ggtitle("Fatalities and Injuries (Health Impact) in the US caused by Weather Event") +
      theme(plot.title = element_text(hjust = 0.5))
HEALTHIMPACT_G

The barchart shows that Tornados are the most harmful weather events for people’s health.

The cleaned data frame (StormDataTidy) is been aggregated per EVTYPE (top 10) and provided in a descending order in the new data frame ECONOMICCOST_DESC.

ECONOMICCOST_DESC <- StormDataTidy %>% 
                group_by(EVTYPE) %>% 
                summarise(ECONOMICCOST = sum(ECONOMICCOST)) %>% 
                arrange(desc(ECONOMICCOST))
#ECONOMICCOST_DESC[1:10,]
ECONOMICCOST_G <- ggplot(ECONOMICCOST_DESC[1:10,], 
                         aes(x=reorder(EVTYPE, -ECONOMICCOST),y=ECONOMICCOST)) + 
      geom_bar(stat="identity", fill="dodgerblue3") + 
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
      xlab("Event") + ylab("Economic Cost (USD)") +
      theme(legend.position="none") +
      ggtitle("Economic Cost in the US caused by Weather Events") +
      theme(plot.title = element_text(hjust = 0.5))
ECONOMICCOST_G

The barchart shows that Floods cause the biggest economical damages.