Storms and other severe weather can cause both public health and economic problems for communities and municipalities. With high numbers of fatalities, injuries, and property loss, severe weathers become one of the main concern for the US government in order to ensure the people’s safety. This analysis showed the most dangerous storm according to US NOAA’s Storm Database (1950 - 2011). The database tracks characteristics of major storms and weather events in the US, including when and where they occur, as well as estimates of fatalities, injuries, crops and property damage. From our analysis, Tornado is the most harmful in respect to public health, causing 96979 total case of fatalities and injuries. Meanwhile, Flood is considered to have the greatest economic consequences, with a total loss of about $150.3 billion. Therefore, understanding the specific impacts of different storm types is essential for improving disaster preparedness, resource allocation, and reducing both human casualties and economic.
#Create directory peerdata if it's not exist
if (!file.exists("peer2")){
dir.create("peer2")
}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#download zip file
download.file(fileUrl, destfile = "./peer2/StormData.csv.bz2", mode = "wb")
#verifying zip file downloaded
list.files("./peer2")
## [1] "peer2.zip" "StormData.csv.bz2"
dateDownloaded <- date()
dateDownloaded
## [1] "Sat May 2 19:10:21 2026"
#reading the data
storm_data <- read.csv("./peer2/StormData.csv.bz2")
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 K 0 3040 8812
## 2 K 0 3042 8755
## 3 K 0 3340 8742
## 4 K 0 3458 8626
## 5 K 0 3412 8642
## 6 K 0 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
storm_data$EVTYPE <- toupper(storm_data$EVTYPE)
#Extracting columns
event_type <- storm_data$EVTYPE
fatalities <- storm_data$FATALITIES
injuries <- storm_data$INJURIES
prop_dmg <- storm_data$PROPDMG
crop_dmg <- storm_data$CROPDMG
#summing all fatalities based on event_type
sum_fatalities_event <- tapply(fatalities, event_type, sum)
#summing all injuries based on event_type
sum_injuries_event <- tapply(injuries, event_type, sum)
#creating data frame for event, fatalities, and injuries
df_health <- data.frame(
events = names(sum_fatalities_event),
fatality = as.numeric(sum_fatalities_event),
injury = as.numeric(sum_injuries_event)
)
df_health$total <- df_health$fatality + df_health$injury
head(df_health)
## events fatality injury total
## 1 HIGH SURF ADVISORY 0 0 0
## 2 COASTAL FLOOD 0 0 0
## 3 FLASH FLOOD 0 0 0
## 4 LIGHTNING 0 0 0
## 5 TSTM WIND 0 0 0
## 6 TSTM WIND (G45) 0 0 0
#sort by total cases
sorted_health_df <- df_health[order(-df_health$total), ]
sorted_health_df[1:5,] #returning the top 5 maximum fatality and injury
## events fatality injury total
## 758 TORNADO 5633 91346 96979
## 116 EXCESSIVE HEAT 1903 6525 8428
## 779 TSTM WIND 504 6957 7461
## 154 FLOOD 470 6789 7259
## 418 LIGHTNING 816 5230 6046
#converting exponents
convert_exp <- function(exp) {
if (exp %in% c("K", "k")) return(1e3)
if (exp %in% c("M", "m")) return(1e6)
if (exp %in% c("B", "b")) return(1e9)
if (exp %in% c("H", "h")) return(1e2)
# numeric exponent
if (grepl("^[0-9]$", exp)) {
val <- as.numeric(exp)
return(10^val)
}
return(1)
}
storm_data$PROPDMGEXP <- sapply(storm_data$PROPDMGEXP, convert_exp)
prop_dmg <- prop_dmg*storm_data$PROPDMGEXP
storm_data$CROPDMGEXP <- sapply(storm_data$CROPDMGEXP, convert_exp)
crop_dmg <- crop_dmg*storm_data$CROPDMGEXP
head(storm_data)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE EVTYPE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL TORNADO
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL TORNADO
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL TORNADO
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL TORNADO
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL TORNADO
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL TORNADO
## BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1 0 0 NA
## 2 0 0 NA
## 3 0 0 NA
## 4 0 0 NA
## 5 0 0 NA
## 6 0 0 NA
## END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1 0 14.0 100 3 0 0 15 25.0
## 2 0 2.0 150 2 0 0 0 2.5
## 3 0 0.1 123 2 0 0 2 25.0
## 4 0 0.0 100 2 0 0 2 2.5
## 5 0 0.0 150 2 0 0 2 2.5
## 6 0 1.5 177 2 0 0 6 2.5
## PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1 1000 0 1 3040 8812
## 2 1000 0 1 3042 8755
## 3 1000 0 1 3340 8742
## 4 1000 0 1 3458 8626
## 5 1000 0 1 3412 8642
## 6 1000 0 1 3450 8748
## LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3051 8806 1
## 2 0 0 2
## 3 0 0 3
## 4 0 0 4
## 5 0 0 5
## 6 0 0 6
#summing all property dmg based on event_type
sum_prop_dmg_event <- tapply(prop_dmg, event_type, sum)
#summing all crop dmg based on event_type
sum_crop_dmg_event <- tapply(crop_dmg, event_type, sum)
#creating data frame for event, property and crop damage
df_economic <- data.frame(
events = names(sum_fatalities_event),
prop_dmg = as.numeric(sum_prop_dmg_event),
crop_dmg = as.numeric(sum_crop_dmg_event)
)
df_economic$total <- df_economic$prop_dmg + df_economic$crop_dmg
head(df_economic)
## events prop_dmg crop_dmg total
## 1 HIGH SURF ADVISORY 200000 0 200000
## 2 COASTAL FLOOD 0 0 0
## 3 FLASH FLOOD 50000 0 50000
## 4 LIGHTNING 0 0 0
## 5 TSTM WIND 8100000 0 8100000
## 6 TSTM WIND (G45) 8000 0 8000
#sort by total cases
sorted_economic_df <- df_economic[order(-df_economic$total), ]
sorted_economic_df[1:5,]#returning the top 5 maximum prop and crop dmg
## events prop_dmg crop_dmg total
## 154 FLOOD 144657709807 5661968450 150319678257
## 372 HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 758 TORNADO 56947380677 414953270 57362333947
## 599 STORM SURGE 43323536000 5000 43323541000
## 212 HAIL 15735267513 3025954473 18761221986
#Creating plot
par(mfrow = c(1, 2), mar = c(10, 4, 3, 1))
barplot(injury~events, data = sorted_health_df[1:5,],
las = 2, cex.names = 0.6,
main = "Top 5 Events with Highest Injury",
xlab = "Events",
ylab = "Total Cases",
col = "Red")
barplot(fatality~events, data = sorted_health_df[1:5,],
las = 2, cex.names = 0.6,
main = "Top 5 Events with Highest Fatality",
xlab = "Events",
ylab = "Total Cases",
col = "Blue")
Figure 1. Top 5 event with highest fatality and injury.
Tornado is the most harmful with total case of 96979 cases (5633 fatality and 91346 injury).
#Creating plot
par(mfrow = c(1, 2), mar = c(10, 4, 3, 1))
barplot(prop_dmg~events, data = sorted_economic_df[1:5,],
las = 2, cex.names = 0.6,
main = "Top 5 Events with Highest Property Damage",
xlab = "Events",
ylab = "Total Cases",
col = "Red")
barplot(crop_dmg~events, data = sorted_economic_df[1:5,],
las = 2, cex.names = 0.6,
main = "Top 5 Events with Highest Crop Damage",
xlab = "Events",
ylab = "Total Cases",
col = "Blue")
Figure 2. Top 5 event with highest property and crop damage.
Flood is considered to have the greatest economic consequences, with a total loss of about $150.3 billion.