Synopsis

This data analysis relies on U.S. National Oceanic and Atmospheric Administration (NOAA) storm data to determine 1) the types of events that are most harmful with respect to population health, and 2) the types of events that have the greatest economic consequences.

The analysis used to answer these questions involves a fair bit of processing, including formatting date, normalizing damage data (property and crop) because of its representation in different dollar units, deducing the ten worst (most fatal/injurious/economically damaging) in each dataset, and merging the datasets (property with crop and fatalities with injuries) to conclude the overall most damaging events economically and in terms of human health, respectively.

The analysis shows that the most damaging type of storm event with respect to population health is tornado, and the most damaging type of storm event in terms of economic consequences is flood.

Data Processing

library(data.table)
## Warning: package 'data.table' was built under R version 4.3.3
library(ggplot2)
library(knitr)
library(data.table)
setwd("C:/Users/Loaner - Kirsten/Desktop/Coursera")
unzip("repdata_data_StormData.zip", overwrite=T)
## Warning in unzip("repdata_data_StormData.zip", overwrite = T): error 1 in
## extracting from zip file
stormData <- read.csv("repdata_data_StormData.csv", sep = ",")
dim(stormData)
## [1] 902297     37
head(stormData)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
names(stormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Across the US, which types of events have the greatest economic consequences?

Property and crop damage

keepCols <- c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
stormData_used <- stormData[keepCols]
stormData_used$Year <- as.numeric(format(as.Date(stormData_used$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
unique(stormData_used$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
stormData_used$PROPDMGEXP <- as.character(stormData_used$PROPDMGEXP)
stormData_used$PROPDMGEXP[toupper(stormData_used$PROPDMGEXP) == 'H'] <- "2"
stormData_used$PROPDMGEXP[toupper(stormData_used$PROPDMGEXP) == 'K'] <- "3"
stormData_used$PROPDMGEXP[toupper(stormData_used$PROPDMGEXP) == 'M'] <- "6"
stormData_used$PROPDMGEXP[toupper(stormData_used$PROPDMGEXP) == 'B'] <- "9"
stormData_used$PROPDMGEXP <- as.numeric(stormData_used$PROPDMGEXP)
## Warning: NAs introduced by coercion
stormData_used$PROPDMGEXP[is.na(stormData_used$PROPDMGEXP)] <- 0
stormData_used$TOTALPROPDMG <- stormData_used$PROPDMG * 10^stormData_used$PROPDMGEXP
unique(stormData_used$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"
stormData_used$CROPDMGEXP <- as.character(stormData_used$CROPDMGEXP)
stormData_used$CROPDMGEXP[toupper(stormData_used$CROPDMGEXP) == 'H'] <- "2"
stormData_used$CROPDMGEXP[toupper(stormData_used$CROPDMGEXP) == 'K'] <- "3"
stormData_used$CROPDMGEXP[toupper(stormData_used$CROPDMGEXP) == 'M'] <- "6"
stormData_used$CROPDMGEXP[toupper(stormData_used$CROPDMGEXP) == 'B'] <- "9"
stormData_used$CROPDMGEXP <- as.numeric(stormData_used$CROPDMGEXP)
## Warning: NAs introduced by coercion
stormData_used$CROPDMGEXP[is.na(stormData_used$CROPDMGEXP)] <- 0
stormData_used$TOTALCROPDMG <- stormData_used$CROPDMG * 10^stormData_used$CROPDMGEXP
head(stormData_used)
##             BGN_DATE  EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG
## 1  4/18/1950 0:00:00 TORNADO          0       15    25.0          3       0
## 2  4/18/1950 0:00:00 TORNADO          0        0     2.5          3       0
## 3  2/20/1951 0:00:00 TORNADO          0        2    25.0          3       0
## 4   6/8/1951 0:00:00 TORNADO          0        2     2.5          3       0
## 5 11/15/1951 0:00:00 TORNADO          0        2     2.5          3       0
## 6 11/15/1951 0:00:00 TORNADO          0        6     2.5          3       0
##   CROPDMGEXP Year TOTALPROPDMG TOTALCROPDMG
## 1          0 1950        25000            0
## 2          0 1950         2500            0
## 3          0 1951        25000            0
## 4          0 1951         2500            0
## 5          0 1951         2500            0
## 6          0 1951         2500            0

Across the US, which types of events are most harmful with respect to population health?

Fatalities and injuries caused by severe weather events

TotFatalities <- aggregate(stormData_used$FATALITIES, by = list(stormData_used$EVTYPE), "sum")
names(TotFatalities) <- c("Event", "Fatalities")
TotFatalities <- TotFatalities[order(-TotFatalities$Fatalities), ][1:10, ]
TotFatalities
##              Event Fatalities
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504
## 170          FLOOD        470
## 585    RIP CURRENT        368
## 359      HIGH WIND        248
## 19       AVALANCHE        224
TotInjuries <- aggregate(stormData_used$INJURIES, by = list(stormData_used$EVTYPE), "sum")
names(TotInjuries) <- c("Event", "Injuries")
TotInjuries <- TotInjuries[order(-TotInjuries$Injuries), ][1:10, ]
TotInjuries
##                 Event Injuries
## 834           TORNADO    91346
## 856         TSTM WIND     6957
## 170             FLOOD     6789
## 130    EXCESSIVE HEAT     6525
## 464         LIGHTNING     5230
## 275              HEAT     2100
## 427         ICE STORM     1975
## 153       FLASH FLOOD     1777
## 760 THUNDERSTORM WIND     1488
## 244              HAIL     1361

Results

Across the US, which types of events are most harmful with respect to population health?

Fatalities and injuries caused by severe weather events

TotHealthDamage <- merge(x = TotFatalities, y = TotInjuries, by = "Event", all = TRUE)
TotHealthDamage <- melt(data.table(TotHealthDamage), id.vars = 'Event')
ggplot(TotHealthDamage, aes(Event, value)) +
  geom_bar(aes(fill=variable), position = "dodge", stat="identity") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Event Type") +
  ylab("Damage, USD") + ggtitle("Effect on Healthy damage by Event type")
## Warning: Removed 6 rows containing missing values (`geom_bar()`).

##### Conclusion: Tornado

Across the US, which types of events have the greatest economic consequences?

Property and crop damage

TotPropDmg <- aggregate(stormData_used$TOTALPROPDMG, by = list(stormData_used$EVTYPE), "sum")
names(TotPropDmg) <- c("Event", "Prop_Cost")
TotPropDmg <- TotPropDmg[order(-TotPropDmg$Prop_Cost), ][1:10, ]
TotPropDmg
##                 Event    Prop_Cost
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380677
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673979
## 244              HAIL  15735267513
## 402         HURRICANE  11868319010
## 848    TROPICAL STORM   7703890550
## 972      WINTER STORM   6688497251
## 359         HIGH WIND   5270046295
TotCropDmg <- aggregate(stormData_used$TOTALCROPDMG, by = list(stormData_used$EVTYPE), "sum")
names(TotCropDmg) <- c("Event", "Crop_Cost")
TotCropDmg <- TotCropDmg[order(-TotCropDmg$Crop_Cost), ][1:10, ]
TotCropDmg
##                 Event   Crop_Cost
## 95            DROUGHT 13972566000
## 170             FLOOD  5661968450
## 590       RIVER FLOOD  5029459000
## 427         ICE STORM  5022113500
## 244              HAIL  3025954473
## 402         HURRICANE  2741910000
## 411 HURRICANE/TYPHOON  2607872800
## 153       FLASH FLOOD  1421317100
## 140      EXTREME COLD  1292973000
## 212      FROST/FREEZE  1094086000
TotEcoDamage <- merge(x = TotPropDmg, y= TotCropDmg, by = "Event", all=TRUE)
TotEcoDamage <- melt(data.table(TotEcoDamage), id.vars = "Event")
ggplot(TotEcoDamage, aes(Event, value)) + 
  geom_bar(aes(fill=variable), position = "dodge", stat = "identity") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Event Type") +
  ylab("Damage, USD") + ggtitle("Crop/Property damage by type")
## Warning: Removed 10 rows containing missing values (`geom_bar()`).

##### Conclusion: Flood

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.