Synopsis

The objective of the present work is to explore the NOAA database in order to analyze the meteorological impacts on the health and economy of the United States. Meteorological events cause deaths, injuries and property damages.
This analysis demonstrates which types of events caused the most deaths and injuries and which ones had the greatest economic losses between 1950 and November 2011.

My data analysis provides simple answers to the following questions:

  1. Which types of events have the greatest economic consequences?

  2. Which types of events are most harmful with respect to population health?

The data set was downloaded from NOAA database and a copy is available here The analysis was run in R, using RStudio in a Windows 11 Computer and this document written in RMarkdown and Knit, published originally in RPubs for peers revision.

Preparing data for analysis and processing

To reproduce this analysis the following libraries will be needed:

library(ggplot2)
library(gridExtra)
library (dplyr)
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:gridExtra':
## 
##     combine
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Downloading and setting up

if(!file.exists("./repdata-data-StormData.csv.bz2")) {
        fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
        download.file(fileURL, "repdata-data-StormData.csv.bz2")
}
data_raw <- read.csv(bzfile("repdata-data-StormData.csv.bz2"))
dim(data_raw)
## [1] 902297     37

The data set has 902297 observations (rows) and 37 variables (columns).

The definition of each variable and the parameters for injuries and economic losses can be found in the National Weather Service Instruction 10-1605 and National Climatic Data Center Storm Events FAQ

Transforming data

health <- data_raw %>% select(EVTYPE,BGN_DATE,STATE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP) %>% filter(FATALITIES > 0 | INJURIES > 0)


economy <-data_raw %>% select(EVTYPE,BGN_DATE,STATE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP) %>% filter(PROPDMG > 0 | CROPDMG > 0)

There are two variables in power of 10 exponent format, PROPDMGEXP and CROPDMGEXP.

unique(economy$PROPDMGEXP)
##  [1] "K" "M" "B" "m" ""  "+" "0" "5" "6" "4" "h" "2" "7" "3" "H" "-"
unique(economy$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k"

We need to adjust the size of them, to refer damage in Dollars. We will consider “K” and “k” as 10^3, “M” and “m” as 10^6, “B” as 10^9, 0 to 8 as 10^0 to 10^8, “” as 1 and “+”, “-” and “?” as 0.

# adjusting property damage multiplier
economy$PROPW[economy$PROPDMGEXP == "K"] <- 1000
economy$PROPW[economy$PROPDMGEXP == "M"] <- 1e+06
economy$PROPW[economy$PROPDMGEXP == "m"] <- 1e+06
economy$PROPW[economy$PROPDMGEXP == "B"] <- 1e+09
economy$PROPW[economy$PROPDMGEXP == "0"] <- 1
economy$PROPW[economy$PROPDMGEXP == "5"] <- 1e+05
economy$PROPW[economy$PROPDMGEXP == "6"] <- 1e+06
economy$PROPW[economy$PROPDMGEXP == "4"] <- 10000
economy$PROPW[economy$PROPDMGEXP == "2"] <- 100
economy$PROPW[economy$PROPDMGEXP == "3"] <- 1000
economy$PROPW[economy$PROPDMGEXP == "h"] <- 100
economy$PROPW[economy$PROPDMGEXP == "7"] <- 1e+07
economy$PROPW[economy$PROPDMGEXP == "H"] <- 100
economy$PROPW[economy$PROPDMGEXP == "1"] <- 10
economy$PROPW[economy$PROPDMGEXP == "8"] <- 1e+08
economy$PROPW[economy$PROPDMGEXP == ""] <- 1
economy$PROPW[economy$PROPDMGEXP == "+"] <- 0
economy$PROPW[economy$PROPDMGEXP == "-"] <- 0
economy$PROPW[economy$PROPDMGEXP == "?"] <- 0
# adjusting crop damage multiplier

economy$CROPW[economy$CROPDMGEXP == "K"] <- 1000
economy$CROPW[economy$CROPDMGEXP == "k"] <- 1000
economy$CROPW[economy$CROPDMGEXP == "M"] <- 1e+06
economy$CROPW[economy$CROPDMGEXP == "m"] <- 1e+06
economy$CROPW[economy$CROPDMGEXP == "B"] <- 1e+09
economy$CROPW[economy$CROPDMGEXP == "0"] <- 1
economy$CROPW[economy$CROPDMGEXP == "2"] <- 100
economy$CROPW[economy$CROPDMGEXP == ""] <- 1
economy$CROPW[economy$CROPDMGEXP == "?"] <- 0

# processing total damage values
economy$PROPVAL <- economy$PROPDMG * economy$PROPW
economy$CROPVAL <- economy$CROPDMG * economy$CROPW

# agregating independent variables to dependent variables 
propdmg <- arrange(aggregate(PROPVAL ~ EVTYPE, data = economy, FUN = sum),desc(PROPVAL))
cropdmg <- arrange(aggregate(CROPVAL ~ EVTYPE, data = economy, FUN = sum),desc(CROPVAL))
inj <- arrange(aggregate(INJURIES ~ EVTYPE, data=health, FUN = sum),desc(INJURIES))
fat <- arrange(aggregate(FATALITIES ~ EVTYPE, data=health, FUN = sum),desc(FATALITIES))

Sorting the top 10 list of damages in property and crop

head(propdmg, 10)
##               EVTYPE      PROPVAL
## 1              FLOOD 144657709807
## 2  HURRICANE/TYPHOON  69305840000
## 3            TORNADO  56947380617
## 4        STORM SURGE  43323536000
## 5        FLASH FLOOD  16822673979
## 6               HAIL  15735267513
## 7          HURRICANE  11868319010
## 8     TROPICAL STORM   7703890550
## 9       WINTER STORM   6688497251
## 10         HIGH WIND   5270046260
head(cropdmg, 10)
##               EVTYPE     CROPVAL
## 1            DROUGHT 13972566000
## 2              FLOOD  5661968450
## 3        RIVER FLOOD  5029459000
## 4          ICE STORM  5022113500
## 5               HAIL  3025954473
## 6          HURRICANE  2741910000
## 7  HURRICANE/TYPHOON  2607872800
## 8        FLASH FLOOD  1421317100
## 9       EXTREME COLD  1292973000
## 10      FROST/FREEZE  1094086000

Plotting damages in property and crop

Figure 1

# Ploting using gridExtra (2 plots in one figure)
propdmg10 <- propdmg[1:10,]
cropdmg10 <- cropdmg[1:10,]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg10$PROPVAL/(10^9), las = 3, names.arg = propdmg10$EVTYPE, 
    main = "Events by Property Damages\n(Top 10)", ylab = "Amount of damages ($ billions)", col = "dark blue")
barplot(cropdmg10$CROPVAL/(10^9), las = 3, names.arg = cropdmg10$EVTYPE, main = "Events by Crop Damages\n(Top 10)", ylab = "Amount of damages ($ billions)", col = "dark orange")

Sorting the top 10 list of injuries and fatalities

head(inj, 10)
##               EVTYPE INJURIES
## 1            TORNADO    91346
## 2          TSTM WIND     6957
## 3              FLOOD     6789
## 4     EXCESSIVE HEAT     6525
## 5          LIGHTNING     5230
## 6               HEAT     2100
## 7          ICE STORM     1975
## 8        FLASH FLOOD     1777
## 9  THUNDERSTORM WIND     1488
## 10              HAIL     1361
head(fat, 10)
##            EVTYPE FATALITIES
## 1         TORNADO       5633
## 2  EXCESSIVE HEAT       1903
## 3     FLASH FLOOD        978
## 4            HEAT        937
## 5       LIGHTNING        816
## 6       TSTM WIND        504
## 7           FLOOD        470
## 8     RIP CURRENT        368
## 9       HIGH WIND        248
## 10      AVALANCHE        224

Plotting injuries and fatalities

Figure 2

# Ploting using gridExtra (2 plots in one figure)
inj10 <- inj[1:10,]
fat10 <- fat[1:10,]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(inj10$INJURIES, las = 3, names.arg = inj10$EVTYPE, main = "Events by Amount of Injuries\n(Top 10)", ylab = "Total Number of Injuries", col = "purple")
barplot(fat10$FATALITIES, las = 3, names.arg = fat10$EVTYPE, main = "Events by Amount of Fatalities\n(Top 10)", ylab = "Total Number of Fatalities", col = "dark magenta")

Results

1. Which types of events have the greatest economic consequences?

According to the analysis, the greatest economics consequences in property were caused by flood, hurricane/typhoon and tornado and the greatest economics consequences in crop were caused by drought, flood, river flood and ice storm.

2. Which types of events are most harmful with respect to population health?

According to the analysis, the most harmful events to population health, in amount of injuries with no death were caused by tornado, thunderstorm, flood, excessive heat and lighting and the most harmful events to population health, in amount of fatalities were caused by tornados, excessive heat, flash flood, heat and lightning.

Tornado is, by far, the most harmful meteorological event in the United States.