This report analyzes the NOAA storm database to identify the most harmful weather events in terms of public health impace and economic consequences. The analysis focuses on the number of fatalities, injuries, and financial damages caused by different weather/natural event types.
The dataset was loaded, and key variables such as property and crop damage were transformed to obtain real economic values. Fatalities and injuries were summed for each event type to determine public health impact.
# Inspect the dataset
str(storm_data)
## Classes 'data.table' and 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, ".internal.selfref")=<externalptr>
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## <num> <char> <char> <char> <num> <char> <char>
## 1: 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2: 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3: 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4: 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5: 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6: 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## <char> <num> <char> <char> <char> <char> <num> <lgcl>
## 1: TORNADO 0 0 NA
## 2: TORNADO 0 0 NA
## 3: TORNADO 0 0 NA
## 4: TORNADO 0 0 NA
## 5: TORNADO 0 0 NA
## 6: TORNADO 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES
## <num> <char> <char> <num> <num> <int> <num> <num> <num>
## 1: 0 14.0 100 3 0 0 15
## 2: 0 2.0 150 2 0 0 0
## 3: 0 0.1 123 2 0 0 2
## 4: 0 0.0 100 2 0 0 2
## 5: 0 0.0 150 2 0 0 2
## 6: 0 1.5 177 2 0 0 6
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE
## <num> <char> <num> <char> <char> <char> <char> <num>
## 1: 25.0 K 0 3040
## 2: 2.5 K 0 3042
## 3: 25.0 K 0 3340
## 4: 2.5 K 0 3458
## 5: 2.5 K 0 3412
## 6: 2.5 K 0 3450
## LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## <num> <num> <num> <char> <num>
## 1: 8812 3051 8806 1
## 2: 8755 0 0 2
## 3: 8742 0 0 3
## 4: 8626 0 0 4
## 5: 8642 0 0 5
## 6: 8748 0 0 6
# Define a function to convert exponent values
convert_exp <- function(exp) {
ifelse(exp %in% c("H", "h"), 100,
ifelse(exp %in% c("K", "k"), 1000,
ifelse(exp %in% c("M", "m"), 1e6,
ifelse(exp %in% c("B", "b"), 1e9, 1))))
}
# Apply exponent conversion
storm_data$PROPDMGEXP <- convert_exp(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP <- convert_exp(storm_data$CROPDMGEXP)
# Compute actual damage values
storm_data$PROPDMGVAL <- storm_data$PROPDMG * storm_data$PROPDMGEXP
storm_data$CROPDMGVAL <- storm_data$CROPDMG * storm_data$CROPDMGEXP
# Aggregate fatalities and injuries by event type
health_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(Total_Fatalities = sum(FATALITIES, na.rm = TRUE),
Total_Injuries = sum(INJURIES, na.rm = TRUE)) %>%
arrange(desc(Total_Fatalities))
# Top 10 most harmful events
top_health_events <- health_impact %>% top_n(10, Total_Fatalities)
# Print top harmful events to health
print(top_health_events)
## # A tibble: 10 × 3
## EVTYPE Total_Fatalities Total_Injuries
## <chr> <dbl> <dbl>
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 FLASH FLOOD 978 1777
## 4 HEAT 937 2100
## 5 LIGHTNING 816 5230
## 6 TSTM WIND 504 6957
## 7 FLOOD 470 6789
## 8 RIP CURRENT 368 232
## 9 HIGH WIND 248 1137
## 10 AVALANCHE 224 170
# Aggregate economic impact by event type
economic_impact <- storm_data %>%
group_by(EVTYPE) %>%
summarise(Total_Property_Damage = sum(PROPDMGVAL, na.rm = TRUE),
Total_Crop_Damage = sum(CROPDMGVAL, na.rm = TRUE),
Total_Economic_Loss = Total_Property_Damage + Total_Crop_Damage) %>%
arrange(desc(Total_Economic_Loss))
# Top 10 costliest events
top_economic_events <- economic_impact %>% top_n(10, Total_Economic_Loss)
print(top_economic_events)
## # A tibble: 10 × 4
## EVTYPE Total_Property_Damage Total_Crop_Damage Total_Economic_Loss
## <chr> <dbl> <dbl> <dbl>
## 1 FLOOD 144657709807 5661968450 150319678257
## 2 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3 TORNADO 56937160779. 414953270 57352114049.
## 4 STORM SURGE 43323536000 5000 43323541000
## 5 HAIL 15732267543. 3025954473 18758222016.
## 6 FLASH FLOOD 16140812067. 1421317100 17562129167.
## 7 DROUGHT 1046106000 13972566000 15018672000
## 8 HURRICANE 11868319010 2741910000 14610229010
## 9 RIVER FLOOD 5118945500 5029459000 10148404500
## 10 ICE STORM 3944927860 5022113500 8967041360
ggplot(top_health_events, aes(x = reorder(EVTYPE, -Total_Fatalities), y = Total_Fatalities)) +
geom_bar(stat = "identity", fill = "blue") +
labs(title = "Top 10 Weather Events Causing Fatalities",
x = "Event Type", y = "Total Fatalities") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
ggplot(top_economic_events, aes(x = reorder(EVTYPE, -Total_Economic_Loss), y = Total_Economic_Loss / 1e9)) +
geom_bar(stat = "identity", fill = "green") +
labs(title = "Top 10 Weather Events by Economic Damage",
x = "Event Type", y = "Total Economic Damage (in Billion USD)") +
theme(axis.text.x = element_text(angle = 45, hjust = 1))