Reproducible Research - Damage Analysis of Storm Data

Synopsis

Storm Data is an official publication of the National Oceanic and Atmospheric Administration (NOAA) which documents:

  1. The occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce

  2. Rare, unusual, weather phenomena that generate media attention, such as snow flurries in South Florida or the San Diego coastal area

  3. Other significant meteorological events, such as record maximum or minimum temperatures or precipitation that occur in connection with another event.

In this report, plots were plotted to find 1. which type of events are most harmful with respect to public health 2. which types of events have the greatest economic consequences.

Data Processing

Loading data

StormData <- read.csv("StormData.csv.bz2")
colnames(StormData)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Identify Event Type labels

event_types <- as.data.frame(table(StormData$EVTYPE))
event_types <- event_types[order(event_types$Var1), ]

Data cleaning

StormData$EVTYPE <- as.character(StormData$EVTYPE)
StormData$EVTYPE[grepl("/|&|and", StormData$EVTYPE,ignore.case = TRUE)] <- "Multiple Event"
StormData$EVTYPE[grepl("volc", StormData$EVTYPE,ignore.case = TRUE)] <- "Volcano"
StormData$EVTYPE[grepl("wind|wnd", StormData$EVTYPE,ignore.case = TRUE)] <- "WIND"
StormData$EVTYPE[grepl("funnel|tornado", StormData$EVTYPE,ignore.case = TRUE)] <- "Tornado"
StormData$EVTYPE[grepl("glaze", StormData$EVTYPE,ignore.case = TRUE)] <- "Glaze"
StormData$EVTYPE[grepl("hail", StormData$EVTYPE,ignore.case = TRUE)] <- "Hail"
StormData$EVTYPE[grepl("dust", StormData$EVTYPE,ignore.case = TRUE)]  <- "DUST"
StormData$EVTYPE[grepl("flood", StormData$EVTYPE,ignore.case = TRUE)] <- "FLOOD"
StormData$EVTYPE[grepl("ic(e|y)", StormData$EVTYPE,ignore.case = TRUE)] <- "Ice"
StormData$EVTYPE[grepl("fire|smoke", StormData$EVTYPE,ignore.case = TRUE)] <- "FIRE"
StormData$EVTYPE[grepl("thunder", StormData$EVTYPE,ignore.case = TRUE)] <- "Thunder Storm"
StormData$EVTYPE[grepl("slide|eros", StormData$EVTYPE,ignore.case = TRUE)] <- "Erosion"
StormData$EVTYPE[grepl("rain", StormData$EVTYPE,ignore.case = TRUE)] <- "Rain"
StormData$EVTYPE[grepl("freez|cold|snow|chill|winter", StormData$EVTYPE,ignore.case = TRUE)] <- "Cold Weather"
StormData$EVTYPE[grepl("TROPICAL.STORM", StormData$EVTYPE,ignore.case = TRUE)] <- "TROPICAL STORM"
StormData$EVTYPE[grepl("heat", StormData$EVTYPE,ignore.case = TRUE)] <- "Heat"
StormData$EVTYPE[grepl("(hurri|opal)", StormData$EVTYPE,ignore.case = TRUE)] <- "Hurricane"

Seperate the data

Question 1. There are only 3 main columns required to find of about the population health - Event type - Fatalities - Injuries

health <- StormData[c("EVTYPE", "FATALITIES", "INJURIES")]

Question 2. There are 5 columns that are required to determine the influence on economic consequences

economic <- StormData[c("EVTYPE","PROPDMG", "PROPDMGEXP", "CROPDMG","CROPDMGEXP")]

Analysing Question 1

Calculate the Health aggregates

health.totals <- aggregate(cbind(FATALITIES,INJURIES) ~ EVTYPE, data = health, sum, na.rm=TRUE)
health.totals$TOTAL <- health.totals$FATALITIES + health.totals$INJURIES
health.totals <- health.totals[order(-health.totals$TOTAL), ]
health.totals <- health.totals[1:25,]

Find the observations with highest fatalities and highest injuries

Here we find observations with fatality more than the median

high_fatalities <- health.totals[which(health.totals$FATALITIES > median(health.totals$FATALITIES)),]

high_injuries <- health.totals[which(health.totals$INJURIES > median(health.totals$INJURIES)),]

Plot

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(high_fatalities$FATALITIES, las = 3, names.arg = high_fatalities$EVTYPE, main = "Events with Highest Fatalities", ylab = "Number of fatalities", col = "wheat3")
barplot(high_injuries$INJURIES, las = 3, names.arg = high_injuries$EVTYPE, main = "Events with Highest Injuries", ylab = "Number of injuries", col = "wheat3")

Analysing Question 2

Economic:

The PROPDMGLT (Property Damage Exponent) has missing and invalid values. Also there are various levels 1. o(one) = 1 2. h(undred)=100 3. k(thousand)=1000 4. m(million)=1000000 5. b(billion)=1000000000

economic$PROPDMGEXP<-factor(economic$PROPDMGEXP,levels=c("H","K","M","B","h","m","O"))
economic$PROPDMGEXP[is.na(economic$PROPDMGEXP)] <- "O"
economic$PROPDMGEXP <- as.character(economic$PROPDMGEXP)

economic$PROPDMGMLT <- 0
economic$PROPDMGMLT[grepl("h", economic$PROPDMGEXP,ignore.case = TRUE)]<-100
economic$PROPDMGMLT[grepl("k", economic$PROPDMGEXP,ignore.case = TRUE)]<-1000
economic$PROPDMGMLT[grepl("m", economic$PROPDMGEXP,ignore.case = TRUE)]<-1000000
economic$PROPDMGMLT[grepl("b", economic$PROPDMGEXP,ignore.case = TRUE)]<-1000000000
economic$PROPDMGMLT[grepl("o", economic$PROPDMGEXP,ignore.case = TRUE)]<-1

Repeat the above steps with CROPDMGLT

economic$CROPDMGEXP<-factor(economic$CROPDMGEXP,levels=c("K","M","B","k","m","O"))
economic$CROPDMGEXP[is.na(economic$CROPDMGEXP)] <- "O"
economic$CROPDMGEXP <- as.character(economic$CROPDMGEXP)
economic$CROPDMGMLT <- 0

economic$CROPDMGMLT[grepl("k", economic$CROPDMGEXP,ignore.case = TRUE)]<-1000
economic$CROPDMGMLT[grepl("m", economic$CROPDMGEXP,ignore.case = TRUE)]<-1000000
economic$CROPDMGMLT[grepl("b", economic$CROPDMGEXP,ignore.case = TRUE)]<-1000000000
economic$CROPDMGMLT[grepl("o", economic$CROPDMGEXP,ignore.case = TRUE)]<-1

Calculate Totals

propdmg <- aggregate(PROPDMG ~ EVTYPE, economic, FUN = sum)
cropdmg <- aggregate(CROPDMG ~ EVTYPE, economic, FUN = sum)

Find the observations with highest property damages and highest crop damages

# Finding events with highest property damage
propdmg <- propdmg[order(-propdmg$PROPDMG), ][1:8, ]
# Finding events with highest crop damage
cropdmg <- cropdmg[order(-cropdmg$CROPDMG), ][1:8, ]

Plot

par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg$PROPDMG, las = 3, names.arg = propdmg$EVTYPE, 
        main = "Events with Highest Property Damages", ylab = "Damage Cost ($ billions)", 
        col = "wheat3")
barplot(cropdmg$CROPDMG, las = 3, names.arg = cropdmg$EVTYPE, 
        main = "Events With Highest Crop Damages", ylab = "Damage Cost ($ billions)", 
        col = "wheat3")

Result

From the above analysis, it is evident that Tornado are most harmful with respect to public health while floods, wind and Storms have the greatest economic consequences.