Processing the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database for patterns of Impacts to Health and Finances.

Synopsis

Patterns exist in severe weather of both health and financial loss with consequences both to the individual and their community as a whole. Some can be preventable while others are only treatable, and death, which is the result of severe weather, either preventable or an almost inconsolable eventuality.

Here we are looking for patterns in the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. With the understanding of blizzards, Tornados, and Hurricanes do create a great toll on life and property.

Fatalities and Injury patterns suggest that Floods are biggest threat, and patterns of Property and Crop damages suggest that Hurricanes are the main influence here.

Data Processing

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## Warning: package 'data.table' was built under R version 4.1.2
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.1.2
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destfile <- "C:\\Users\\shawn\\Downloads\\R programing\\Projects/2FStormData.csv.bz2"
download.file(url, destfile)
StormData <- read.csv(destfile, header = TRUE, sep=",")
str(StormData)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Creating the table to study

colHeaders <- c( "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
File01 <- StormData[, colHeaders]
head((File01))
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Removing missing values

File02 <- subset(File01, EVTYPE != "?" & FATALITIES > 0 & INJURIES > 0 & PROPDMG > 0 & CROPDMG > 0)
head((File02))
##                         EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 187910                 TORNADO          6      130       5          M       5
## 188605                 TORNADO         22      150     500          K      50
## 191345            WINTER STORM          3        5       5          M       5
## 192361 WINTER STORM HIGH WINDS          1       15      60          M       5
## 192372           WINTER STORMS         10       17     500          K     500
## 195017   TROPICAL STORM GORDON          8       43     500          K     500
##        CROPDMGEXP
## 187910          K
## 188605          M
## 191345          M
## 192361          M
## 192372          K
## 195017          K

Examining EVTYPE

table(File02$EVTYPE)
## 
##                BLIZZARD          EXCESSIVE HEAT             FLASH FLOOD 
##                       3                       1                      11 
##                   FLOOD                    HEAT       HEAT WAVE DROUGHT 
##                      14                       2                       1 
##              HEAVY SNOW               HIGH WIND              HIGH WINDS 
##                       2                       7                       1 
##               HURRICANE       HURRICANE/TYPHOON               ICE STORM 
##                       1                       2                       1 
##       THUNDERSTORM WIND      THUNDERSTORM WINDS                 TORNADO 
##                       1                       3                      40 
##          TROPICAL STORM   TROPICAL STORM GORDON               TSTM WIND 
##                       4                       1                       4 
##                 TSUNAMI                WILDFIRE            WINTER STORM 
##                       1                       5                       3 
## WINTER STORM HIGH WINDS           WINTER STORMS 
##                       1                       1

Reclassifying EVTYPE for simplicity

File02$EVTYPE <- gsub('.*HEAT.*', 'HEAT', File02$EVTYPE)
File02$EVTYPE <- gsub('.*FLOOD.*', 'FLOOD', File02$EVTYPE)
File02$EVTYPE <- gsub('.*WIND.*', 'WIND', File02$EVTYPE)
File02$EVTYPE <- gsub('.*BLIZZARD.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*SNOW.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*ICE.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*WINTER.*', 'WINTER STORMS', File02$EVTYPE)
File02$EVTYPE <- gsub('.*HURRICANE.*', 'HURRICANE', File02$EVTYPE)
File02$EVTYPE <- gsub('.*TROPICAL.*', 'HURRICANE', File02$EVTYPE)
File02$EVTYPE <- gsub('.*TSUNAMI.*', 'HURRICANE', File02$EVTYPE)
table(File02$EVTYPE)
## 
##         FLOOD          HEAT     HURRICANE       TORNADO      WILDFIRE 
##            25             4             9            40             5 
##          WIND WINTER STORMS 
##            17            10

Expanding PROPDMGEXP and CROPDMGEXP for calculations

File02$PROPDMGEXP <- gsub('B', 10^9, File02$PROPDMGEXP)
File02$PROPDMGEXP <- gsub('M', 10^6, File02$PROPDMGEXP)
File02$PROPDMGEXP <- gsub('K', 10^3, File02$PROPDMGEXP)
File02$CROPDMGEXP <- gsub('B', 10^9, File02$CROPDMGEXP)
File02$CROPDMGEXP <- gsub('M', 10^6, File02$CROPDMGEXP)
File02$CROPDMGEXP <- gsub('K', 10^3, File02$CROPDMGEXP)

Change to numeric

File02$PROPDMGEXP <- as.numeric(File02$PROPDMGEXP)
File02$CROPDMGEXP <- as.numeric(File02$CROPDMGEXP)

Creating new Columns

File02$PROPDMGAC <- File02$PROPDMG*File02$PROPDMGEXP
File02$CROPDMGACT <- File02$CROPDMG*File02$CROPDMGEXP

Sorting Health Impact

File03 <- aggregate(x = list(H_IMPACT = File02$FATALITIES + File02$INJURIES), 
                                  by = list(EVENT = File02$EVTYPE), 
                                  FUN = sum)
File03 <- File03[order(File03$H_IMPACT, decreasing = TRUE),]
head(File03)
##           EVENT H_IMPACT
## 1         FLOOD     2796
## 7 WINTER STORMS     2077
## 4       TORNADO     1820
## 3     HURRICANE     1404
## 2          HEAT      425
## 6          WIND      196

Sorting Property Damages

File04 <- aggregate(x = list(T_DAMAGES = File02$PROPDMGAC + File02$CROPDMGACT), 
                                  by = list(EVENT = File02$EVTYPE), 
                                  FUN = sum)
File04 <- File04[order(File04$T_DAMAGES, decreasing = TRUE),]
head(File04)
##       EVENT   T_DAMAGES
## 3 HURRICANE 14194485000
## 6      WIND  1482028500
## 5  WILDFIRE  1240270000
## 4   TORNADO  1145427000
## 2      HEAT   493545000
## 1     FLOOD   339128000

Results

Plots of Weather Events most costly to health

ggplot(File03, aes(x = reorder(EVENT, H_IMPACT), y = H_IMPACT, color = EVENT))+
  geom_point()+
  xlab("Weather Events") +
  ylab("Sum of Fatalities and Injures") +
  ggtitle("Weather Events most costly to health")

Plots of Weather Events most costly to property

ggplot(File04, aes(x = reorder(EVENT, T_DAMAGES), y = T_DAMAGES, color = EVENT))+
  geom_point()+
  xlab("Weather Events") +
  ylab("Sum of Property and Crop Damages") +
  ggtitle("Weather Events most costly to Property")

## Conclusion

Floods are responsible for the greatest number of fatalities and injuries.

Hurricanes are responsible for causing the most property damage and crop damage costs.