Based on analysis of data available in the NDAA database, tornadoes are the weather phenomenon that, by far, are responsible for causing economic damage in the United States. Equally they are also responsible for the largest number of accidents and human fatalities.
The primary objective of this study is to examine the NOAA Storm Database and provide basic responses to inquiries concerning severe weather incidents.
Which weather events are the most detrimental to population health in the United States?
Which events in the United States have the most significant economic repercussions?
Loading R packages needed for the study.
library(tidyverse)
library(lubridate)
library(gridExtra)
Downloading the data in R
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, "StormData.csv.bz2", method="curl")
meteo_data<- read.csv("StormData.csv.bz2", header = TRUE, sep = ",")
Having a first look to the database.
str(meteo_data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
Parsing the date with lubridate package
BGN_DATE<- mdy_hms(meteo_data$BGN_DATE)
meteo_data$BGN_DATE<- BGN_DATE
meteo_data$EVTYPE<- as.character(meteo_data$EVTYPE)
Using aggregate to sum the fatalities and injuries due to each weather fenomena
fatalities<- aggregate(FATALITIES~EVTYPE, meteo_data, sum)
arrange(fatalities, desc(FATALITIES))
injuries<- aggregate(INJURIES~EVTYPE, meteo_data, sum)
arrange(injuries, desc(INJURIES))
Plotting the results. Tornado are in the first position.
fatalities %>%
arrange(desc(FATALITIES)) %>%
top_n(n=10, FATALITIES) %>%
ggplot(aes(x=reorder(EVTYPE,-FATALITIES), y=FATALITIES))+
geom_bar(stat="identity")+
scale_x_discrete(guide = guide_axis(angle = 90))+
labs(x=NULL, title="Top 10 Weather Events for fatalities") ->g1
injuries %>%
arrange(desc(INJURIES)) %>%
top_n(n=10, INJURIES) %>%
ggplot(aes(x=reorder(EVTYPE,-INJURIES), y=INJURIES))+
geom_bar(stat="identity")+
scale_x_discrete(guide = guide_axis(angle = 90))+
labs(x=NULL, title="Top 10 Weather Events for injuries")->g2
grid.arrange(g1, g2, ncol=2)
Calculating the economic burden.
crop_damage<- aggregate(CROPDMG~EVTYPE, meteo_data, sum)
arrange(crop_damage, desc(CROPDMG))
prop_damage<- aggregate(PROPDMG~EVTYPE, meteo_data, sum)
arrange(prop_damage, desc(PROPDMG))
prop_damage %>%
arrange(desc(PROPDMG)) %>%
top_n(n=10, PROPDMG) %>%
ggplot(aes(x=reorder(EVTYPE,-PROPDMG), y=PROPDMG))+
geom_bar(stat="identity")+
scale_x_discrete(guide = guide_axis(angle = 90))+
labs(x=NULL, title="Top 10 Events for property damage")->g3
crop_damage %>%
arrange(desc(CROPDMG)) %>%
top_n(n=10, CROPDMG) %>%
ggplot(aes(x=reorder(EVTYPE,-CROPDMG), y=CROPDMG))+
geom_bar(stat="identity")+
scale_x_discrete(guide = guide_axis(angle = 90))+
labs(x=NULL, title="Top 10 Events for crop damage")->g4
grid.arrange(g3, g4, ncol=2)
meteo_data<- mutate(meteo_data, TOTDMG=PROPDMG+CROPDMG)
Calculating the total economic consequences of the most severe weather conditions.
tot_damage<- aggregate(TOTDMG~EVTYPE, meteo_data, sum)
arrange(tot_damage, desc(TOTDMG))
tot_damage %>%
arrange(desc(TOTDMG)) %>%
top_n(n=10, TOTDMG) %>%
ggplot(aes(x=reorder(EVTYPE,-TOTDMG), y=TOTDMG))+
geom_bar(stat="identity")+
scale_x_discrete(guide = guide_axis(angle = 90))+
labs(x=NULL, title="Top 10 Events for economic damage")
Tornadoes are the weather phenomenon that, by far, are responsible for causing economic damage in the United States. Equally they are responsible for the largest number of accidents and human fatalities.