In this analysis, we take a look at which types of events are most harmful with respect to population health and which types of events have the greatest economic consequences. Wee see that tornados, thunderstorm winds and floods are the most harmful ones for public health and floods, hurricanes, typhoons and tornados have greatest economic consequences. For this analysis we used the storm data from NOAA Storm Database.
#download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "storm_data.bz2")
#library(R.utils)
#bunzip2("storm_data.bz2", "storm_data.csv", remove = FALSE)
storm_data <- read.csv("storm_data.csv",na.strings = c(NA,"?",""))
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 <NA> <NA> <NA> <NA> 0 NA
## 2 0 <NA> <NA> <NA> <NA> 0 NA
## 3 0 <NA> <NA> <NA> <NA> 0 NA
## 4 0 <NA> <NA> <NA> <NA> 0 NA
## 5 0 <NA> <NA> <NA> <NA> 0 NA
## 6 0 <NA> <NA> <NA> <NA> 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 <NA> <NA> 14.0 100 3 0 0 15 25.0
## 2 0 <NA> <NA> 2.0 150 2 0 0 0 2.5
## 3 0 <NA> <NA> 0.1 123 2 0 0 2 25.0
## 4 0 <NA> <NA> 0.0 100 2 0 0 2 2.5
## 5 0 <NA> <NA> 0.0 150 2 0 0 2 2.5
## 6 0 <NA> <NA> 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 <NA> <NA> <NA> <NA> 3040 8812
## 2 K 0 <NA> <NA> <NA> <NA> 3042 8755
## 3 K 0 <NA> <NA> <NA> <NA> 3340 8742
## 4 K 0 <NA> <NA> <NA> <NA> 3458 8626
## 5 K 0 <NA> <NA> <NA> <NA> 3412 8642
## 6 K 0 <NA> <NA> <NA> <NA> 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 <NA> 1
## 2 0 0 <NA> 2
## 3 0 0 <NA> 3
## 4 0 0 <NA> 4
## 5 0 0 <NA> 5
## 6 0 0 <NA> 6
str(storm_data)
## 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr NA NA NA NA ...
## $ BGN_LOCATI: chr NA NA NA NA ...
## $ END_DATE : chr NA NA NA NA ...
## $ END_TIME : chr NA NA NA NA ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr NA NA NA NA ...
## $ END_LOCATI: chr NA NA NA NA ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr NA NA NA NA ...
## $ WFO : chr NA NA NA NA ...
## $ STATEOFFIC: chr NA NA NA NA ...
## $ ZONENAMES : chr NA NA NA NA ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr NA NA NA NA ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
sum(is.na(storm_data))
## [1] 6645733
summary(storm_data[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","CROPDMG")])
## EVTYPE FATALITIES INJURIES PROPDMG
## Length:902297 Min. : 0.0000 Min. : 0.0000 Min. : 0.00
## Class :character 1st Qu.: 0.0000 1st Qu.: 0.0000 1st Qu.: 0.00
## Mode :character Median : 0.0000 Median : 0.0000 Median : 0.00
## Mean : 0.0168 Mean : 0.1557 Mean : 12.06
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000 3rd Qu.: 0.50
## Max. :583.0000 Max. :1700.0000 Max. :5000.00
## CROPDMG
## Min. : 0.000
## 1st Qu.: 0.000
## Median : 0.000
## Mean : 1.527
## 3rd Qu.: 0.000
## Max. :990.000
sum(is.na(storm_data[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG","CROPDMG")]))
## [1] 1
library(dplyr)
library(ggplot2)
library(patchwork)
library(tidyr)
evtype_totals <- storm_data %>%
group_by(EVTYPE) %>%
summarize(total_fatalities = sum(FATALITIES),
total_injuries = sum(INJURIES)) %>%
mutate(total_casualties = total_fatalities + total_injuries) %>%
arrange(desc(total_casualties))
evtype_totals_10 <- evtype_totals[1:10,]
evtype_totals_10
## # A tibble: 10 × 4
## EVTYPE total_fatalities total_injuries total_casualties
## <chr> <dbl> <dbl> <dbl>
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
long_evtype_totals_10 <- evtype_totals_10[2:10,] %>%
pivot_longer(cols = c(total_fatalities, total_injuries, total_casualties),
names_to = "CasualtyType",
values_to = "CasualtyCount")
ggplot(long_evtype_totals_10, aes(x = reorder(EVTYPE, -CasualtyCount), y = CasualtyCount, fill = CasualtyType)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Fatalities, Injuries, and Total Casualties for Most 10 Harmful Events (Excluding Tornado)",
x = "Event Type",
y = "Casualty Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("total_fatalities" = "skyblue", "total_injuries" = "lightgreen", "total_casualties" = "orange"))
storm_data2 <- storm_data
unique(storm_data2$PROPDMGEXP)
## [1] "K" "M" NA "B" "m" "+" "0" "5" "6" "4" "2" "3" "h" "7" "H" "-" "1" "8"
storm_data2$exact_prop_dmg = as.numeric(NA,length = nrow(storm_data2))
storm_data2$exact_crop_dmg = as.numeric(NA,length = nrow(storm_data2))
x <- subset(storm_data2, !(storm_data2$PROPDMGEXP %in% c("K","M","","B","m")))
nrow(x)
## [1] 466255
storm_data2 <- anti_join(storm_data2,x)
## Joining with `by = join_by(STATE__, BGN_DATE, BGN_TIME, TIME_ZONE, COUNTY,
## COUNTYNAME, STATE, EVTYPE, BGN_RANGE, BGN_AZI, BGN_LOCATI, END_DATE, END_TIME,
## COUNTY_END, COUNTYENDN, END_RANGE, END_AZI, END_LOCATI, LENGTH, WIDTH, F, MAG,
## FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP, WFO,
## STATEOFFIC, ZONENAMES, LATITUDE, LONGITUDE, LATITUDE_E, LONGITUDE_, REMARKS,
## REFNUM, exact_prop_dmg, exact_crop_dmg)`
unique(storm_data2$CROPDMGEXP)
## [1] NA "M" "K" "m" "0" "k" "B"
y <- subset(storm_data2, !(storm_data2$CROPDMGEXP %in% c("K","M","","B","m","k")))
nrow(y)
## [1] 156505
storm_data2 <- anti_join(storm_data2,y)
## Joining with `by = join_by(STATE__, BGN_DATE, BGN_TIME, TIME_ZONE, COUNTY,
## COUNTYNAME, STATE, EVTYPE, BGN_RANGE, BGN_AZI, BGN_LOCATI, END_DATE, END_TIME,
## COUNTY_END, COUNTYENDN, END_RANGE, END_AZI, END_LOCATI, LENGTH, WIDTH, F, MAG,
## FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP, WFO,
## STATEOFFIC, ZONENAMES, LATITUDE, LONGITUDE, LATITUDE_E, LONGITUDE_, REMARKS,
## REFNUM, exact_prop_dmg, exact_crop_dmg)`
prop_exps <- unique(storm_data2$PROPDMGEXP)
crop_exps <- unique(storm_data2$CROPDMGEXP)
for (i in prop_exps) {
if (i %in% c("M", "m")) {
storm_data2$exact_prop_dmg[storm_data2$PROPDMGEXP == i] <- storm_data2$PROPDMG[storm_data2$PROPDMGEXP == i] * 10^6
} else if (i == "K") {
storm_data2$exact_prop_dmg[storm_data2$PROPDMGEXP == i] <- storm_data2$PROPDMG[storm_data2$PROPDMGEXP == i] * 10^3
} else if (i == "B") {
storm_data2$exact_prop_dmg[storm_data2$PROPDMGEXP == i] <- storm_data2$PROPDMG[storm_data2$PROPDMGEXP == i] * 10^9
} else {
storm_data2$exact_prop_dmg[storm_data2$PROPDMGEXP == i] <- storm_data2$PROPDMG[storm_data2$PROPDMGEXP == i]
}
}
for (i in crop_exps) {
if (i %in% c("M", "m")) {
storm_data2$exact_crop_dmg[storm_data2$CROPDMGEXP == i] <- storm_data2$CROPDMG[storm_data2$CROPDMGEXP == i] * 10^6
} else if (i %in% c("K","k")) {
storm_data2$exact_crop_dmg[storm_data2$CROPDMGEXP == i] <- storm_data2$CROPDMG[storm_data2$CROPDMGEXP == i] * 10^3
} else if (i == "B") {
storm_data2$exact_crop_dmg[storm_data2$CROPDMGEXP == i] <- storm_data2$CROPDMG[storm_data2$CROPDMGEXP == i] * 10^9
} else {
storm_data2$exact_crop_dmg[storm_data2$CROPDMGEXP == i] <- storm_data2$CROPDMG[storm_data2$CROPDMGEXP == i]
}
}
prop_dmg_by_event <- storm_data2 %>%
group_by(EVTYPE) %>%
summarize(total_prop_dmg = sum(exact_prop_dmg)) %>%
arrange(desc(total_prop_dmg))
crop_dmg_by_event <- storm_data2 %>%
group_by(EVTYPE) %>%
summarize(total_crop_dmg = sum(exact_crop_dmg)) %>%
arrange(desc(total_crop_dmg))
dmg_merged <- merge(prop_dmg_by_event, crop_dmg_by_event, by = "EVTYPE")
dmg_merged$total_dmg <- dmg_merged$total_prop_dmg + dmg_merged$total_crop_dmg
dmg_merged <- dmg_merged[order(-dmg_merged$total_dmg),]
dmg_merged$prop_percent <- round((dmg_merged$total_prop_dmg/dmg_merged$total_dmg),2)
top_10_dmg_merged <- dmg_merged[1:10, ]
top_10_dmg_merged
## EVTYPE total_prop_dmg total_crop_dmg total_dmg prop_percent
## 23 FLOOD 132836489050 5170955450 138007444500 0.96
## 61 HURRICANE/TYPHOON 26740295000 2607872800 29348167800 0.91
## 98 TORNADO 16166771690 353376460 16520148150 0.98
## 56 HURRICANE 9716358000 2688910000 12405268000 0.78
## 74 RIVER FLOOD 5079635000 5028734000 10108369000 0.50
## 37 HAIL 7991783690 2028807900 10020591590 0.80
## 19 FLASH FLOOD 7327856080 1387439050 8715295130 0.84
## 63 ICE STORM 903037300 5022110000 5925147300 0.15
## 84 STORM SURGE/TIDE 4640643000 850000 4641493000 1.00
## 88 THUNDERSTORM WIND 3398942440 414705550 3813647990 0.89
# Reshape data to long format for plotting
long_dmg_merged <- top_10_dmg_merged %>%
pivot_longer(cols = c(total_prop_dmg, total_crop_dmg, total_dmg),
names_to = "DamageType",
values_to = "DamageAmount")
# Create the plot
ggplot(long_dmg_merged, aes(x = reorder(EVTYPE, DamageAmount), y = DamageAmount, fill = DamageType)) +
geom_bar(stat = "identity", position = "dodge") +
labs(title = "Comparison of Property, Crop, and Total Damages for Top 10 Most Damaging Events",
x = "Event Type",
y = "Damage Amount (in dollars)") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_fill_manual(values = c("total_prop_dmg" = "skyblue", "total_crop_dmg" = "lightgreen", "total_dmg" = "orange"))
Tornadoes are by far the most harmful weather events in terms of human casualties, including both fatalities and injuries.
After excluding tornadoes, the most harmful events in terms of total casualties (fatalities and injuries combined) are Excessive Heat, Thunderstorm Wind, Flood, and Lightning. These events collectively account for a significant proportion of weather-related health impacts.
Floods cause the most significant economic damage in the United States, with total damages amounting to approximately $150 billion. The majority of this damage is to property.
Hurricanes/Typhoons and Tornadoes also cause substantial economic damages, primarily to property.
Events such as Droughts and River Floods cause significant crop damage, reflecting their impact on the agriculture.