library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.5.3
This report analyzes the NOAA Storm Database to identify weather events that are most harmful to population health and those with the greatest economic consequences in the United States. Fatalities and injuries were used to measure health impact, while property and crop damages were used to measure economic impact. The analysis shows that tornadoes cause the greatest health damage, while floods and hurricanes contribute significantly to economic losses.
storm <- read.csv("repdata_data_StormData.csv")
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
str(storm)
## 'data.frame': 831084 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
storm2 <- storm %>%
select(
EVTYPE,
FATALITIES,
INJURIES,
PROPDMG,
PROPDMGEXP,
CROPDMG,
CROPDMGEXP
)
health <- storm2 %>%
group_by(EVTYPE) %>%
summarize(
fatalities = sum(FATALITIES),
injuries = sum(INJURIES),
total_harm = fatalities + injuries
) %>%
arrange(desc(total_harm))
health_top <- head(health, 10)
health_top
## # A tibble: 10 × 4
## EVTYPE fatalities injuries total_harm
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5035 85049 90084
## 2 EXCESSIVE HEAT 1867 6387 8254
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 400 6658 7058
## 5 LIGHTNING 787 5019 5806
## 6 FLASH FLOOD 900 1747 2647
## 7 HEAT 872 1489 2361
## 8 ICE STORM 89 1975 2064
## 9 WINTER STORM 205 1319 1524
## 10 HIGH WIND 243 1125 1368
convert_exp <- function(exp) {
ifelse(exp == "K", 1e3,
ifelse(exp == "M", 1e6,
ifelse(exp == "B", 1e9, 1)))
}
storm2$prop_mult <- convert_exp(storm2$PROPDMGEXP)
storm2$crop_mult <- convert_exp(storm2$CROPDMGEXP)
storm2$prop_total <- storm2$PROPDMG * storm2$prop_mult
storm2$crop_total <- storm2$CROPDMG * storm2$crop_mult
economic <- storm2 %>%
group_by(EVTYPE) %>%
summarize(
property_damage = sum(prop_total),
crop_damage = sum(crop_total),
total_damage = property_damage + crop_damage
) %>%
arrange(desc(total_damage))
economic_top <- head(economic, 10)
economic_top
## # A tibble: 10 × 4
## EVTYPE property_damage crop_damage total_damage
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 136668419717 5499074450 142167494167
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 46877429590. 383018270 47260447860.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 FLASH FLOOD 14669748367. 1197630100 15867378467.
## 6 HAIL 12287866003. 2891413890 15179279893.
## 7 DROUGHT 1045992000 13938635000 14984627000
## 8 HURRICANE 11857819010 2731410000 14589229010
## 9 RIVER FLOOD 5118945500 5029459000 10148404500
## 10 ICE STORM 3936950360 5022033500 8958983860
ggplot(health_top,
aes(x = reorder(EVTYPE, total_harm),
y = total_harm)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(
title = "Top 10 Weather Events Harmful to Population Health",
x = "Weather Event",
y = "Total Injuries and Fatalities"
)
ggplot(economic_top,
aes(x = reorder(EVTYPE, total_damage),
y = total_damage)) +
geom_bar(stat = "identity", fill = "darkred") +
coord_flip() +
labs(
title = "Top 10 Weather Events Causing Economic Damage",
x = "Weather Event",
y = "Total Economic Damage"
)