This analysis is done to assess what types of weather events cause the most harm to persons and property/crops in the United States. The data is collected between 1950 and November 2011.
We found that:
The major events that cause damage are: FLOOD, HURRICANE/TYPHOON, TORNADO, and STORM SURGE
The major events that cause injuries are: TORNADO, TSTM WIND, FLOOD, EXCESSIVE HEAT, and LIGHTNING
The major events that cause fatalities are: TORNADO, EXCESSIVE HEAT, FLASH FLOOD, HEAT, and LIGHTNING
The data was loaded and the injury/fatality/property damage/crop damage columns were selected for relevancy. The exponents to the damage numbers were applied to the damage value column to acquire the correct numbers in one column.
Each column was summed according to the event type. Events with zero harm were removed from the summary.
storm_data <- read.csv("repdata_data_StormData.csv.bz2")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
storm_data_clean <- select(storm_data, EVTYPE, FATALITIES : CROPDMGEXP)
print(unique(storm_data_clean$PROPDMGEXP))
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
print(unique(storm_data_clean$CROPDMGEXP))
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
storm_data_clean <- storm_data_clean %>%
mutate(PROPDMG = ifelse(grepl("[Bb]", PROPDMGEXP ), 1e9*PROPDMG, PROPDMG)) %>%
mutate(PROPDMG = ifelse(grepl("[Mm]", PROPDMGEXP ), 1e6*PROPDMG, PROPDMG)) %>%
mutate(PROPDMG = ifelse(grepl("[Kk]", PROPDMGEXP ), 1e3*PROPDMG, PROPDMG)) %>%
mutate(PROPDMG = ifelse(grepl("[Hh]", PROPDMGEXP ), 1e2*PROPDMG, PROPDMG)) %>%
mutate(PROPDMG = ifelse(grepl("[0-8+]", PROPDMGEXP ), 10*PROPDMG, PROPDMG)) %>%
mutate(CROPDMG = ifelse(grepl("[Bb]", CROPDMGEXP ), 1e9*CROPDMG, CROPDMG)) %>%
mutate(CROPDMG = ifelse(grepl("[Mm]", CROPDMGEXP ), 1e6*CROPDMG, CROPDMG)) %>%
mutate(CROPDMG = ifelse(grepl("[Kk]", CROPDMGEXP ), 1e3*CROPDMG, CROPDMG)) %>%
mutate(CROPDMG = ifelse(grepl("[0-8+]", CROPDMGEXP ), 10*CROPDMG, CROPDMG))
storm_data_clean <- select(storm_data_clean, -PROPDMGEXP, -CROPDMGEXP)
grouped_storm_data <- group_by(storm_data_clean, EVTYPE)
storm_data_summary <- summarize(grouped_storm_data,
fatalities = sum(FATALITIES ),
injuries = sum(INJURIES ),
damage = sum(PROPDMG) + sum(CROPDMG)
)
storm_data_summary <- rename(storm_data_summary, event=EVTYPE)
storm_data_summary <- filter(storm_data_summary, (fatalities + injuries + damage) > 0 )
damage_data <- storm_data_summary %>%
select(event, damage) %>%
filter(damage > 0) %>%
arrange(desc(damage))
damage_data <- damage_data %>% mutate(event = factor(event, levels = event[order(damage)]))
print(damage_data)
## # A tibble: 431 × 2
## event damage
## <fct> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57352118150
## 4 STORM SURGE 43323541000
## 5 HAIL 18758224587
## 6 FLASH FLOOD 17562132318
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041810
## # ℹ 421 more rows
library(ggplot2)
g <-ggplot(damage_data[1:15, ], aes( y = event, x= damage))
g + geom_col() +
labs(x = "Damage in USD") +
labs(y = "Weather Event") +
labs(title = "Total Property and Crop Damage")
The major events that cause damage are: FLOOD, HURRICANE/TYPHOON, TORNADO, and STORM SURGE
injury_data <- storm_data_summary %>%
select(event, injuries) %>%
filter(injuries > 0) %>%
arrange(desc(injuries))
injury_data <- injury_data %>% mutate(event = factor(event, levels = event[order(injuries)]))
print(injury_data)
## # A tibble: 158 × 2
## event injuries
## <fct> <dbl>
## 1 TORNADO 91346
## 2 TSTM WIND 6957
## 3 FLOOD 6789
## 4 EXCESSIVE HEAT 6525
## 5 LIGHTNING 5230
## 6 HEAT 2100
## 7 ICE STORM 1975
## 8 FLASH FLOOD 1777
## 9 THUNDERSTORM WIND 1488
## 10 HAIL 1361
## # ℹ 148 more rows
fatality_data <- storm_data_summary %>%
select(event, fatalities) %>%
filter(fatalities > 0) %>%
arrange(desc(fatalities))
fatality_data <- fatality_data %>% mutate(event = factor(event, levels = event[order(fatalities)]))
print(fatality_data)
## # A tibble: 168 × 2
## event fatalities
## <fct> <dbl>
## 1 TORNADO 5633
## 2 EXCESSIVE HEAT 1903
## 3 FLASH FLOOD 978
## 4 HEAT 937
## 5 LIGHTNING 816
## 6 TSTM WIND 504
## 7 FLOOD 470
## 8 RIP CURRENT 368
## 9 HIGH WIND 248
## 10 AVALANCHE 224
## # ℹ 158 more rows
g <-ggplot(injury_data[1:15, ], aes( y = event, x= injuries))
g + geom_col() +
labs(x = "Person count") +
labs(y = "Weather Event") +
labs(title = "Injuries")
The major events that cause injuries are: TORNADO, TSTM WIND, FLOOD, EXCESSIVE HEAT, and LIGHTNING
g <-ggplot(fatality_data[1:15, ], aes( y = event, x= fatalities))
g + geom_col() +
labs(x = "Person count") +
labs(y = "Weather Event") +
labs(title = "Fatalities")
The major events that cause fatalities are: TORNADO, EXCESSIVE HEAT, FLASH FLOOD, HEAT, and LIGHTNING