Loading Data
data <- read.csv("repdata_data_StormData.csv.bz2")
head(data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
Most Harmful Events (Health Impact)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
health_data <- data %>%
group_by(EVTYPE) %>%
summarise(
fatalities = sum(FATALITIES, na.rm = TRUE),
injuries = sum(INJURIES, na.rm = TRUE)
) %>%
arrange(desc(fatalities + injuries))
head(health_data, 10)
## # A tibble: 10 × 3
## EVTYPE fatalities injuries
## <chr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 TSTM WIND 504 6957
## 4 FLOOD 470 6789
## 5 LIGHTNING 816 5230
## 6 HEAT 937 2100
## 7 FLASH FLOOD 978 1777
## 8 ICE STORM 89 1975
## 9 THUNDERSTORM WIND 133 1488
## 10 WINTER STORM 206 1321
Top 10 Harmful Events Plot
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.3
top10_health <- head(health_data, 10)
ggplot(top10_health, aes(x = reorder(EVTYPE, fatalities + injuries),
y = fatalities + injuries)) +
geom_bar(stat = "identity", fill = "red") +
coord_flip() +
labs(title = "Top 10 Most Harmful Weather Events",
x = "Event Type",
y = "Total Fatalities + Injuries")

Economic Damage Analysis
# Convert exponent to multiplier
convert_exp <- function(exp) {
if (exp == "K") return(1000)
else if (exp == "M") return(1e6)
else if (exp == "B") return(1e9)
else return(1)
}
data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$CROPDMGEXP <- as.character(data$CROPDMGEXP)
data$prop_dmg <- data$PROPDMG * sapply(data$PROPDMGEXP, convert_exp)
data$crop_dmg <- data$CROPDMG * sapply(data$CROPDMGEXP, convert_exp)
economic_data <- data %>%
group_by(EVTYPE) %>%
summarise(total_damage = sum(prop_dmg + crop_dmg, na.rm = TRUE)) %>%
arrange(desc(total_damage))
head(economic_data, 10)
## # A tibble: 10 × 2
## EVTYPE total_damage
## <chr> <dbl>
## 1 FLOOD 150319678257
## 2 HURRICANE/TYPHOON 71913712800
## 3 TORNADO 57340614060.
## 4 STORM SURGE 43323541000
## 5 HAIL 18752904943.
## 6 FLASH FLOOD 17562129167.
## 7 DROUGHT 15018672000
## 8 HURRICANE 14610229010
## 9 RIVER FLOOD 10148404500
## 10 ICE STORM 8967041360
Top 10 Economic Damage Plot
top10_econ <- head(economic_data, 10)
ggplot(top10_econ, aes(x = reorder(EVTYPE, total_damage),
y = total_damage)) +
geom_bar(stat = "identity", fill = "blue") +
coord_flip() +
labs(title = "Top 10 Events Causing Economic Damage",
x = "Event Type",
y = "Total Damage")
