Synposis

This project analyzes the NOAA Storm Database to assess the impact of severe weather events in the United States. We aim to identify the most harmful events to population health and those with the greatest economic consequences. The analysis will involve extracting relevant data on event types, fatalities, injuries, and damage costs, followed by aggregation for quantification. We will visualize the findings using bar charts to clarify the results. The insights gained will aid emergency management agencies and policymakers in prioritizing resources and interventions. Ultimately, this project seeks to enhance preparedness and resilience in communities facing severe weather events.

Data Processing

Download and read data

url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destination_file <- "StormData.csv.bz2"
download.file(url, destfile = destination_file, method = "curl")

storm_data_org <- read.csv("StormData.csv.bz2", sep = ",", header = T)

Clean data: Change data type of date column to date and write year column in full. Remove unnecessary column, keep EVTYPE, YEAR, INJURIES, FATALITIES, PROPDMG, CROPDMG, PROPDMGEXP, AND CROPDMGEXP.

storm_data_org$BGN_DATE<-as.Date(storm_data_org$BGN_DATE, "%m/%d/%Y 0:00:00")
storm_data_org$YEAR <- as.POSIXlt(storm_data_org$BGN_DATE)$year + 1900

storm_data <- storm_data_org[,c('YEAR','EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

Clean EVTYPE column to more consistent and readable format.

storm_data <- storm_data_org[,c('YEAR','EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
storm_data$EVTYPE <- gsub("^\\s+", "", storm_data$EVTYPE)
storm_data$EVTYPE <- sub(".*AVALA.*", "AVALANCHE", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^astronomical.*", "Astronomical Low Tide", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*blizzard.*", "Blizzard", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^cold.*", "COLD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^debris.*", "Debris Flow", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^dense fog.*", "Dense Fog", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^dense smoke.*", "Dense Smoke", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*drought.*", "Drought", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^dust devil.*", "Dust Devil", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^dust storm.*", "Dust Storm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^flood.*", "FLOOD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^funnel.*", "Funnel Cloud", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(frost|freeze).*", "Freeze", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(hail).*", "Hail", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(heat|hot).*", "Heat", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^heavy rain.*", "Heavy Rain", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^heavy snow.*", "Heavy Snow", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^high surf.*", "High Surf", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^high wind.*", "High Wind", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(hurricane|typh).*", "Hurricane", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^ice storm.*", "Ice Storm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^lightning.*", "Lightning", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^rip.*", "Rip Current", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^seiche.*", "Seiche", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*sleet.*", "Sleet", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^storm surge.*", "Storm Surge", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^thunder.*", "Thunderstorm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^tornado.*", "Tornado", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^tropical depression.*", "Tropical Depression", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^tropical storm.*", "Tropical Storm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^tsunami.*", "Tsunami", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^volc.*", "Volcanic Ash", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^waterspout.*", "Waterspout", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^winter storm.*", "Winter Storm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(summary|none|other).*", "unknown", storm_data$EVTYPE,ignore.case = TRUE)

storm_data$EVTYPE <- sub(".*blowing sno.*|^blowing snow.*", "Blizzard", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(coastal f|lakeshore|cstl|coastalflo|tidal flo|beach flo|coastal s).*|.*tidal flo.*", "Coastal Flood", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*cool.*|^(cool|hypoth|hyperth).*", "COLD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(record cold|extreme cold|excessive cold|extended cold|prolong cold|unusually cold|unseasonably cold|extreme/record cold|severe cold|unseasonable cold|RECORD  COLD).*|.*ow temp.*", "COLD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*slid.*|^landslump", "Debris Flow", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^fog$|.*dense fog.*", "Dense Fog", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^smoke.*", "Dense Smoke", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^dry$|.*dry$|^(warm dry|dry hot|dry p|mild/dry|record dry|mild and dry|dry s|dry w|excessively dry|dry c|dryn|below normal prec|record low ra).*", "Drought", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(DUST DEVEL|whirlwin).*", "Dust Devil", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(dustst|blowing du|saharan).*", "Dust Storm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(flash floo|breakup flood|river flood|urban flood|local flood|stream flood|minor flood|rural flood|major flood|street flood|lake flood|dam f|highway flood|snowmelt flood|dam b).*|.*(street flood|flash flood|small flood|stream flood|urban flood|jam flood|STREAM  FLOOD).*", "FLOOD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*fld.*", "FLOOD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*clou.*", "Funnel Cloud", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(freezing f|ice f|fog and).*", "Freezing Fog", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(freeze|frost).*|^(freezing r|freezing d|freezing s|light freez).*", "Freeze", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(glaze|icy).*|^ice$|^(ice j|black ic|ice p|ice ro|patchy ic|ice on r).*", "Freeze", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*hail$|.*(freezing prec|freezing rai).*", "Hail", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(igh temp|warm|hot|heat).*", "Heat", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(rains|rain a|rain/w|rain d).*|^rain$|^(heavy ra|hvy ra|record ra|excessive ra|torrential rain|wet wea|record/exce|monthly ra|unseasonal ra|early ra|prolonged ra|locally heavy ra).*", "Heavy Rain", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(ly wet|ve wet|wet mo|wet y).*", "Heavy Rain", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(vy prec|al prec|ve prec|ly prec|rd prec).*", "Heavy Rain", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*snow.*", "Heavy Snow", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(surf|wave).*|^(high sea|heavy sea|rough sea|high ti).*", "High Surf", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*remn.*", "Hurricane", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*ICE/S.*", "Ice Storm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(lighti|ligntn).*", "Lightning", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*out tid.*", "Rip Current", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(xed prec|heavy m).*", "Sleet", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(marine a|marine h|marine m|marine s|coastal stor|coastalst).*", "Marine High Wind", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*ero.*", "Astronomical Low Tide", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(thu|metro).*|^(tstm|tunder).*", "Thunderstorm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*nad.*", "Tornado", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^water.*|^wayt.*", "Waterspout", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^dri.*", "Drought", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*fir.*", "Wildfire", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*micro.*|^micro.*|.*mico.*", "Thunderstorm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^marine tstm.*", "Marine Thunderstorm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^wind.*", "High Wind", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^vog.*", "Volcanic Ash", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*stre.*|^urban.*", "FLOOD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*bur.*", "Thunderstorm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^gust.*|^strong.*|.*tstm.*|^high wind.*|.*winds.*|^wnd$|^gra.*|.*(wind dam|TURBULENCE).*", "High Wind", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*chill.*", "COLD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*low wind.*", "Thunderstorm", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*swel.*", "High Surf", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*show.*|^rain.*", "Heavy Rain", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(surge|rising wat|high wa).*", "FLOOD", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^(winter m|winter w|WINTRY M|WINTERY M).*", "Winter Weather", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^drow.*", "Rip Current", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*landspout.*", "Tornado", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub("^torn.*", "Tornado", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- sub(".*(record|sever|northern|red|temp|county|SOUTHEAST|patt).*|^(high|EXCESSIVE)$", "unknown", storm_data$EVTYPE,ignore.case = TRUE)
storm_data$EVTYPE <- gsub("\\?", "unknown", storm_data$EVTYPE, ignore.case = TRUE)

storm_data$EVTYPE<- toupper(storm_data$EVTYPE)
unique(storm_data$EVTYPE)
##  [1] "TORNADO"               "THUNDERSTORM"          "HAIL"                 
##  [4] "FREEZE"                "HEAVY SNOW"            "ICE STORM"            
##  [7] "WINTER STORM"          "HURRICANE"             "COLD"                 
## [10] "HEAVY RAIN"            "LIGHTNING"             "DENSE FOG"            
## [13] "RIP CURRENT"           "FLOOD"                 "HIGH WIND"            
## [16] "FUNNEL CLOUD"          "HEAT"                  "WATERSPOUT"           
## [19] "BLIZZARD"              "COASTAL FLOOD"         "HIGH SURF"            
## [22] "UNKNOWN"               "AVALANCHE"             "MARINE HIGH WIND"     
## [25] "DUST STORM"            "SLEET"                 "DUST DEVIL"           
## [28] "WILDFIRE"              "DEBRIS FLOW"           "DROUGHT"              
## [31] "WINTER WEATHER"        "TROPICAL STORM"        "FREEZING FOG"         
## [34] "ASTRONOMICAL LOW TIDE" "VOLCANIC ASH"          "SEICHE"               
## [37] "TROPICAL DEPRESSION"   "DENSE SMOKE"           "MARINE THUNDERSTORM"  
## [40] "TSUNAMI"

PROPDMG and CROPDMG represent the amount of property damage and crop damage, respectively, without any currency units.

PROPDMGEXP and CROPDMGEXP are indicators that express the magnitude of the above amounts as powers of 10.

Create new columns PROPAMT and CROPAMT that show compelete damage amount.

storm_data$PROPDMGEXP<- toupper(storm_data$PROPDMGEXP)
storm_data$CROPDMGEXP<- toupper(storm_data$CROPDMGEXP)

eco_data <- storm_data %>%
  mutate(PROPAMT = case_when(
    PROPDMGEXP == "H" ~ PROPDMG * 100,
    PROPDMGEXP == "K" ~ PROPDMG * 1000,
    PROPDMGEXP == "M" ~ PROPDMG * 10^6,
    PROPDMGEXP == "B" ~ PROPDMG * 10^9,
    TRUE ~ PROPDMG
  ))


eco_data <- eco_data %>%
  mutate(CROPAMT = case_when(
    CROPDMGEXP == "H" ~ CROPDMG * 100,
    CROPDMGEXP == "K" ~ CROPDMG * 1000,
    CROPDMGEXP == "M" ~ CROPDMG * 10^6,
    CROPDMGEXP == "B" ~ CROPDMG * 10^9,
    TRUE ~ CROPDMG 
  ))

Result

The most harmful type is event with respect to population health across US

library(ggplot2)
library(dplyr)
library(tidyr)

health_data<- subset(storm_data,INJURIES != 0 | FATALITIES != 0)

clean1<-subset(health_data,YEAR <1993)
clean2<-subset(health_data,YEAR >1992)


sumbyevent1<- clean1 %>% group_by(EVTYPE) %>% summarize(total_injury = sum(INJURIES),total_fatality = sum(FATALITIES))

pivot_sum1<- pivot_longer(sumbyevent1, cols = c(total_injury,total_fatality),names_to = "type",values_to = "count")

ggplot(pivot_sum1,aes(x=EVTYPE,y=count,fill = type)) +
      geom_bar(stat = "identity", position = "stack") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      labs(title = "Injuries and Fatalities by Event Type (1950-1992)",x="Event Type",y="Count",fill="Type")

sumbyevent2<- clean2 %>% group_by(EVTYPE) %>% summarize(total_injury = sum(INJURIES),total_fatality = sum(FATALITIES))

pivot_sum2<- pivot_longer(sumbyevent2, cols = c(total_injury,total_fatality),names_to = "type",values_to = "count")

ggplot(pivot_sum2,aes(x=EVTYPE,y=count,fill = type)) +
      geom_bar(stat = "identity", position = "stack") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      labs(title = "Injuries and Fatalities by Event Type (1993-2011)",x="Event Type",y="Count",fill="Type")

heat2<-subset(clean2,EVTYPE == "HEAT")
tornado2<-subset(clean2,EVTYPE == "TORNADO")
sum(heat2$INJURIES)
## [1] 8849
sum(heat2$FATALITIES)
## [1] 2995
sum(tornado2$INJURIES)
## [1] 23371
sum(tornado2$FATALITIES)
## [1] 1649

Seperate data into two groups with respect to year because data only contain “TORNADO”, “THUNDER STORM”, and “HAIL” before 1993.From 1950 to 1992, Tornado caused most injury and fatality. From 1993 to 2011, tornado caused most injuries and fatalities combined, with 23328 injuries and 1646 fatalities, while heat caused most fatalities(3143).

The event with greatest economic consequences across US

eco_data1<-subset(eco_data,YEAR <1993)
eco_data2<-subset(eco_data,YEAR >1992)

eco_sum1<- eco_data1 %>% group_by(EVTYPE) %>% summarize(total_prop = sum(PROPAMT),total_crop = sum(CROPAMT))

pivot_sum_eco1<- pivot_longer(eco_sum1, cols = c(total_prop,total_crop),names_to = "type",values_to = "count")

eco_sum2<- eco_data2 %>% group_by(EVTYPE) %>% summarize(total_prop = sum(PROPAMT),total_crop = sum(CROPAMT))

pivot_sum_eco2<- pivot_longer(eco_sum2, cols = c(total_prop,total_crop),names_to = "type",values_to = "count")

ggplot(pivot_sum_eco2,aes(x=EVTYPE,y=count,fill = type)) +
      geom_bar(stat = "identity", position = "stack") +
      theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
      labs(title = "Economic damage by Event Type (1993-2011)",x="Event Type",y="Count",fill="Type")

hail_dmg<- subset(eco_data1,EVTYPE == "HAIL")
TSTM_dmg<- subset(eco_data1,EVTYPE == "THUNDERSTORM")
tornado_dmg<- subset(eco_data1,EVTYPE =="TORNADO")
dry_dmg<- subset(eco_data2,EVTYPE =="DROUGHT")
FLOOD_dmg<- subset(eco_data2,EVTYPE =="FLOOD")

print("HAIL prop damage(1950-1992) is")
## [1] "HAIL prop damage(1950-1992) is"
sum(hail_dmg$PROPAMT)
## [1] 0
print("HAIL crop damage(1950-1992) is")
## [1] "HAIL crop damage(1950-1992) is"
sum(hail_dmg$CROPAMT)
## [1] 0
print("TSTM prop damage(1950-1992) is")
## [1] "TSTM prop damage(1950-1992) is"
sum(TSTM_dmg$PROPAMT)
## [1] 0
print("TSTM crop damage(1950-1992) is")
## [1] "TSTM crop damage(1950-1992) is"
sum(TSTM_dmg$CROPAMT)
## [1] 0
print("TORNADO prop damage(1950-1992) (in billion) is")
## [1] "TORNADO prop damage(1950-1992) (in billion) is"
sum(tornado_dmg$PROPAMT)/(10^9)
## [1] 30.5982
print("TORNADO crop damage(1950-1992) is")
## [1] "TORNADO crop damage(1950-1992) is"
sum(tornado_dmg$CROPAMT)
## [1] 0
print("DROUGHT crop damage(1993-2011) (in billion) is")
## [1] "DROUGHT crop damage(1993-2011) (in billion) is"
sum(dry_dmg$CROPAMT)/(10^9)
## [1] 13.97262
print("FLOOD crop damage(1993-2011) (in billion)is")
## [1] "FLOOD crop damage(1993-2011) (in billion)is"
sum(FLOOD_dmg$CROPAMT)/(10^9)
## [1] 12.38899
print("FLOOD prop damage(1993-2011) (in billion)is")
## [1] "FLOOD prop damage(1993-2011) (in billion)is"
sum(FLOOD_dmg$PROPAMT)/(10^9)
## [1] 215.0909

Seperate data into two groups with respect to year because data only contain “TORNADO”, “THUNDER STORM”, and “HAIL” before 1993.From 1950 to 1992, Tornado caused most economic damage (0 crop damage and 30.6 billion property damage) while neither thundrstorm or hail caused any damages. From 1993 to 2011,Drought caused most crop damage (14 billion) and Flodd caused most economic damage in total with 12.4 billion crop damage and 215.1 billion property damage.