This report analyzes the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to identify storm event types that are most harmful to population health and those that have the greatest economic consequences across the United States. The data spans from 1950 to November 2011.
# libraries
library(lubridate)
## Warning: 程序包'lubridate'是用R版本4.4.3 来建造的
##
## 载入程序包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(dplyr)
## Warning: 程序包'dplyr'是用R版本4.4.3 来建造的
##
## 载入程序包:'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: 程序包'ggplot2'是用R版本4.4.3 来建造的
library(forcats)
## Warning: 程序包'forcats'是用R版本4.4.3 来建造的
library(maps)
## Warning: 程序包'maps'是用R版本4.4.3 来建造的
# read data
StormData <- read.csv("repdata_data_StormData.csv")
To generate my results, I manage some variables, such as dates and unit variables. To obtain the total damage, it is necessary to use the PROPDMGEXP/CROPDMGEXP variable; however, it is not clearly the correspondence for all values, so I decide to maintain only the following values: ‘h’, ‘k’, ‘m’ and ‘b’ from PROPDMGEXP/CROPDMGEXP.
# dates
StormData$date <- as.Date(StormData$BGN_DATE, format = "%m/%d/%Y %H:%M:%S")
StormData$year <- year(StormData$date)
# units
StormData$units_p <- NA
StormData$units_p[StormData$PROPDMGEXP == "h"] <- 1e2
StormData$units_p[StormData$PROPDMGEXP == "H"] <- 1e2
StormData$units_p[StormData$PROPDMGEXP == "K"] <- 1e3
StormData$units_p[StormData$PROPDMGEXP == "M"] <- 1e6
StormData$units_p[StormData$PROPDMGEXP == "m"] <- 1e6
StormData$units_p[StormData$PROPDMGEXP == "B"] <- 1e9
StormData$units_c <- NA
StormData$units_c[StormData$CROPDMGEXP == "k"] <- 1e3
StormData$units_c[StormData$CROPDMGEXP == "K"] <- 1e3
StormData$units_c[StormData$CROPDMGEXP == "M"] <- 1e6
StormData$units_c[StormData$CROPDMGEXP == "m"] <- 1e6
StormData$units_c[StormData$CROPDMGEXP == "B"] <- 1e9
Also, I generate different datasets that aggregate the data to present the results in a better way.
Total of fatalities and injuries by event type
# aggregation by evtype
data_evtype <- StormData %>%
group_by(EVTYPE) %>%
summarise(total_fat = sum(FATALITIES, na.rm = T),
total_inj = sum(INJURIES, na.rm = T))
# generate the total health damage (total fatalities + total injures)
data_evtype$total_hdam <- data_evtype$total_fat + data_evtype$total_inj
data_evtype <- arrange(data_evtype,desc(total_hdam))
# get the top of evtype
top_evtype <- data_evtype$EVTYPE[1:10]
Total of fatalities and injuries by event type over time
# aggregation by evtype and year
data_evtype_y<- StormData %>%
group_by(EVTYPE,year) %>%
summarise(total_fat = sum(FATALITIES, na.rm = T),
total_inj = sum(INJURIES, na.rm = T))
## `summarise()` has grouped output by 'EVTYPE'. You can override using the
## `.groups` argument.
# generate the total health damage (total fatalities + total injures)
data_evtype_y$total_hdam <- data_evtype_y$total_fat + data_evtype_y$total_inj
# data just for the top evtype
data_evtype_y <- data_evtype_y %>%
filter(EVTYPE %in% top_evtype)
StormData$prop_dmg = StormData$PROPDMG*StormData$units_p
StormData$crop_dmg = StormData$CROPDMG*StormData$units_c
# aggregation by evtype
data_evtype_eco <- StormData %>%
group_by(EVTYPE) %>%
summarise(total_prop = sum(prop_dmg, na.rm = T),
total_crop = sum(crop_dmg, na.rm = T))
# generate the total health damage (total fatalities + total injures)
data_evtype_eco$total_edam <- data_evtype_eco$total_prop + data_evtype_eco$total_crop
data_evtype_eco <- arrange(data_evtype_eco,desc(total_edam))
# get the top of evtype
top_evtype_eco <- data_evtype_eco$EVTYPE[1:10]
# aggregation by evtype and year
data_evtype_y_eco <- StormData %>%
group_by(EVTYPE,year) %>%
summarise(total_prop = sum(prop_dmg, na.rm = T),
total_crop = sum(crop_dmg, na.rm = T))
## `summarise()` has grouped output by 'EVTYPE'. You can override using the
## `.groups` argument.
# generate the total health damage (total fatalities + total injures)
data_evtype_y_eco$total_edam <- data_evtype_y_eco$total_prop + data_evtype_y_eco$total_crop
# data just for the top evtype
data_evtype_y_eco <- data_evtype_y_eco %>%
filter(EVTYPE %in% top_evtype_eco)
A weather event can have different impacts on a country. Two of the most important are related to public health and the economy. Below, I present some significant results on the impact of weather events in the USA.
Which types of events are most harmful to population health?
Table 1 shows the top ten weather events with the greatest health impact. all of US history (according to the data). Tornadoes have had the greatest impact in terms of total fatalities and injuries. Other significant events with a substantial impact include excessive heat, TSTM winds and flooding.
table1 <- data_evtype[1:10,1:4]
knitr::kable(table1,
caption = "Table 1. Impact in population health of weather events",
col.names = c("Event type", "Total fatalities",
"Total injuries", "Total health damage"))
Event type | Total fatalities | Total injuries | Total health damage |
---|---|---|---|
TORNADO | 5633 | 91346 | 96979 |
EXCESSIVE HEAT | 1903 | 6525 | 8428 |
TSTM WIND | 504 | 6957 | 7461 |
FLOOD | 470 | 6789 | 7259 |
LIGHTNING | 816 | 5230 | 6046 |
HEAT | 937 | 2100 | 3037 |
FLASH FLOOD | 978 | 1777 | 2755 |
ICE STORM | 89 | 1975 | 2064 |
THUNDERSTORM WIND | 133 | 1488 | 1621 |
WINTER STORM | 206 | 1321 | 1527 |
It is also interesting to observe how total health damage has evolved over time. This graph shows years with peaks in the total number of fatalities and injuries. Notably, there was a peak in floods in the 1990s and in tornadoes in the 2010s. These peaks could be a sign of of extreme weather events in recent years.
ggplot(filter(data_evtype_y, year > 1990), aes(x = year, y = total_hdam, color = EVTYPE)) +
geom_line(size = 1.2) +
geom_point() +
labs(title = "Total of fatalities and injuries per event type over time",
x = "Year",
y = "Total",
color = "Event type") +
theme_minimal() +
theme(plot.margin = margin(10, 30, 10, 10), plot.title = element_text(hjust = 0.5))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
Which types of events have the greatest economic consequences?
The graph shows that the event with the greatest economic impact is the floods, followed by hurricanes/typhoons, tornadoes and storm surges.
ggplot(data_evtype_eco[1:10,], aes(x = fct_reorder(EVTYPE, total_edam), y = total_edam)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "Top 10 events with most economic impact",
x = "Event type",
y = "Total economic impact") +
theme_minimal() +
theme(plot.margin = margin(10, 30, 10, 10),
plot.title = element_text(hjust = 0.5))