Synopsis

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database between the period of 1950-1-3 UTC to 2011-11-30 UTC. The results show that across the United States, Tornadoes are most harmful with respect to population and health. Floods on the other hand are most harmful to the economy in terms of the number of property and crop damages. We explore in more detail below.

Load libraries

library(dplyr)
library(ggplot2)
library(ggrepel)
library(stringr)
library(lubridate)
library(knitr)

Data Processing

df<-read.csv("D:/Documents Ddrive/R/ReproducibleResearch/repdata_data_StormData.csv.bz2")

Check min and max data dates

min(mdy_hms(df$BGN_DATE))
## [1] "1950-01-03 UTC"
max(na.omit(mdy_hms(df$END_DATE)))
## [1] "2011-11-30 UTC"

Question 1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Group by events, keep top 3, plot graph.

df_grp_ev = df %>% group_by(EVTYPE) %>% summarise(total_injuries = sum(INJURIES), total_fatalities = sum(FATALITIES), .groups = 'drop') %>% arrange(desc(total_injuries)) %>% top_n(3,total_injuries)

kable(df_grp_ev)
EVTYPE total_injuries total_fatalities
TORNADO 91346 5633
TSTM WIND 6957 504
FLOOD 6789 470
p1<-ggplot(data = df_grp_ev, aes(x = total_injuries, y = total_fatalities,label = EVTYPE)) + geom_point() + geom_text_repel()
p1

Summary 1:

We can see that Tornadoes cause the most injuries and fatalities.

Question 2. Across the United States, which types of events have the greatest economic consequences?

First we clean up the data for property and crop damages. Convert characters to numeric for graphing.

df2 = df[,c(7,8,25,26,27,28)]

df2$kmd<-df2$PROPDMGEXP

# Clean propdmg
df2$kmd<-sub("K", "1000", df2$PROPDMGEXP, fixed = TRUE) 
df2$kmd<-sub("k", "1000", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("M", "1000000", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("m", "1000000", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("B", "1000000000", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("b", "1000000000", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("B", "1000000000", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("h", "100", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("H", "100", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("-", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("?", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("+", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("2", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("3", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("4", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("5", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("6", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("7", "1", df2$kmd, fixed = TRUE) 
df2$kmd<-sub("8", "1", df2$kmd, fixed = TRUE) 

df2$kmd<-as.numeric(df2$kmd)

df2$kmdp<-df2$PROPDMG*df2$kmd

# Clean cropdmg
df2$kmdc<-df2$CROPDMGEXP

df2$kmdc<-sub("K", "1000", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("k", "1000", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("M", "1000000", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("m", "1000000", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("B", "1000000000", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("b", "1000000000", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("2", "1", df2$kmdc, fixed = TRUE) 
df2$kmdc<-sub("?", "1", df2$kmdc, fixed = TRUE) 

df2$kmdc<-as.numeric(df2$kmdc)

df2$kmdc<-df2$CROPDMG*df2$kmdc

colnames(df2)
## [1] "STATE"      "EVTYPE"     "PROPDMG"    "PROPDMGEXP" "CROPDMG"   
## [6] "CROPDMGEXP" "kmd"        "kmdp"       "kmdc"
df3<-df2[,c(1,2)]

df3$PROPDMG<-df2$kmdp
df3$CROPDMG<-df2$kmdc

# Replace NAs with 0
df3[is.na(df3)]<-0

Next, we group by events and graph

df_grp_ev_dmg = df3 %>% group_by(EVTYPE) %>% 
        summarise(total_propdamage = sum(PROPDMG), total_cropdamage = sum(CROPDMG), .groups = 'drop') %>% 
        arrange(desc(total_propdamage)) %>% 
        top_n(3,total_propdamage)

kable(df_grp_ev_dmg)
EVTYPE total_propdamage total_cropdamage
FLOOD 144657709800 5661968450
HURRICANE/TYPHOON 69305840000 2607872800
TORNADO 56937160642 414953110
p2<-ggplot(data = df_grp_ev_dmg, aes(x = total_propdamage, y = total_cropdamage,label = EVTYPE)) + geom_point() + geom_text_repel()
p2

Summary 2:

We can see that Floods cause the most property and crop damages.

Results:

We can see that Tornadoes cause the most injuries and fatalities and that that Floods cause the most property and crop damages.