Storm Data is an official publication of the National Oceanic and Atmospheric Administration (NOAA) which documents:
The occurrence of storms and other significant weather phenomena having sufficient intensity to cause loss of life, injuries, significant property damage, and/or disruption to commerce
Rare, unusual, weather phenomena that generate media attention, such as snow flurries in South Florida or the San Diego coastal area
Other significant meteorological events, such as record maximum or minimum temperatures or precipitation that occur in connection with another event.
In this report, plots were plotted to find 1. which type of events are most harmful with respect to public health 2. which types of events have the greatest economic consequences.
StormData <- read.csv("StormData.csv.bz2")
colnames(StormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
event_types <- as.data.frame(table(StormData$EVTYPE))
event_types <- event_types[order(event_types$Var1), ]
StormData$EVTYPE <- as.character(StormData$EVTYPE)
StormData$EVTYPE[grepl("/|&|and", StormData$EVTYPE,ignore.case = TRUE)] <- "Multiple Event"
StormData$EVTYPE[grepl("volc", StormData$EVTYPE,ignore.case = TRUE)] <- "Volcano"
StormData$EVTYPE[grepl("wind|wnd", StormData$EVTYPE,ignore.case = TRUE)] <- "WIND"
StormData$EVTYPE[grepl("funnel|tornado", StormData$EVTYPE,ignore.case = TRUE)] <- "Tornado"
StormData$EVTYPE[grepl("glaze", StormData$EVTYPE,ignore.case = TRUE)] <- "Glaze"
StormData$EVTYPE[grepl("hail", StormData$EVTYPE,ignore.case = TRUE)] <- "Hail"
StormData$EVTYPE[grepl("dust", StormData$EVTYPE,ignore.case = TRUE)] <- "DUST"
StormData$EVTYPE[grepl("flood", StormData$EVTYPE,ignore.case = TRUE)] <- "FLOOD"
StormData$EVTYPE[grepl("ic(e|y)", StormData$EVTYPE,ignore.case = TRUE)] <- "Ice"
StormData$EVTYPE[grepl("fire|smoke", StormData$EVTYPE,ignore.case = TRUE)] <- "FIRE"
StormData$EVTYPE[grepl("thunder", StormData$EVTYPE,ignore.case = TRUE)] <- "Thunder Storm"
StormData$EVTYPE[grepl("slide|eros", StormData$EVTYPE,ignore.case = TRUE)] <- "Erosion"
StormData$EVTYPE[grepl("rain", StormData$EVTYPE,ignore.case = TRUE)] <- "Rain"
StormData$EVTYPE[grepl("freez|cold|snow|chill|winter", StormData$EVTYPE,ignore.case = TRUE)] <- "Cold Weather"
StormData$EVTYPE[grepl("TROPICAL.STORM", StormData$EVTYPE,ignore.case = TRUE)] <- "TROPICAL STORM"
StormData$EVTYPE[grepl("heat", StormData$EVTYPE,ignore.case = TRUE)] <- "Heat"
StormData$EVTYPE[grepl("(hurri|opal)", StormData$EVTYPE,ignore.case = TRUE)] <- "Hurricane"
Question 1. There are only 3 main columns required to find of about the population health - Event type - Fatalities - Injuries
health <- StormData[c("EVTYPE", "FATALITIES", "INJURIES")]
Question 2. There are 5 columns that are required to determine the influence on economic consequences
economic <- StormData[c("EVTYPE","PROPDMG", "PROPDMGEXP", "CROPDMG","CROPDMGEXP")]
health.totals <- aggregate(cbind(FATALITIES,INJURIES) ~ EVTYPE, data = health, sum, na.rm=TRUE)
health.totals$TOTAL <- health.totals$FATALITIES + health.totals$INJURIES
health.totals <- health.totals[order(-health.totals$TOTAL), ]
health.totals <- health.totals[1:25,]
Here we find observations with fatality more than the median
high_fatalities <- health.totals[which(health.totals$FATALITIES > median(health.totals$FATALITIES)),]
high_injuries <- health.totals[which(health.totals$INJURIES > median(health.totals$INJURIES)),]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(high_fatalities$FATALITIES, las = 3, names.arg = high_fatalities$EVTYPE, main = "Events with Highest Fatalities", ylab = "Number of fatalities", col = "wheat3")
barplot(high_injuries$INJURIES, las = 3, names.arg = high_injuries$EVTYPE, main = "Events with Highest Injuries", ylab = "Number of injuries", col = "wheat3")
Economic:
The PROPDMGLT (Property Damage Exponent) has missing and invalid values. Also there are various levels 1. o(one) = 1 2. h(undred)=100 3. k(thousand)=1000 4. m(million)=1000000 5. b(billion)=1000000000
economic$PROPDMGEXP<-factor(economic$PROPDMGEXP,levels=c("H","K","M","B","h","m","O"))
economic$PROPDMGEXP[is.na(economic$PROPDMGEXP)] <- "O"
economic$PROPDMGEXP <- as.character(economic$PROPDMGEXP)
economic$PROPDMGMLT <- 0
economic$PROPDMGMLT[grepl("h", economic$PROPDMGEXP,ignore.case = TRUE)]<-100
economic$PROPDMGMLT[grepl("k", economic$PROPDMGEXP,ignore.case = TRUE)]<-1000
economic$PROPDMGMLT[grepl("m", economic$PROPDMGEXP,ignore.case = TRUE)]<-1000000
economic$PROPDMGMLT[grepl("b", economic$PROPDMGEXP,ignore.case = TRUE)]<-1000000000
economic$PROPDMGMLT[grepl("o", economic$PROPDMGEXP,ignore.case = TRUE)]<-1
Repeat the above steps with CROPDMGLT
economic$CROPDMGEXP<-factor(economic$CROPDMGEXP,levels=c("K","M","B","k","m","O"))
economic$CROPDMGEXP[is.na(economic$CROPDMGEXP)] <- "O"
economic$CROPDMGEXP <- as.character(economic$CROPDMGEXP)
economic$CROPDMGMLT <- 0
economic$CROPDMGMLT[grepl("k", economic$CROPDMGEXP,ignore.case = TRUE)]<-1000
economic$CROPDMGMLT[grepl("m", economic$CROPDMGEXP,ignore.case = TRUE)]<-1000000
economic$CROPDMGMLT[grepl("b", economic$CROPDMGEXP,ignore.case = TRUE)]<-1000000000
economic$CROPDMGMLT[grepl("o", economic$CROPDMGEXP,ignore.case = TRUE)]<-1
propdmg <- aggregate(PROPDMG ~ EVTYPE, economic, FUN = sum)
cropdmg <- aggregate(CROPDMG ~ EVTYPE, economic, FUN = sum)
# Finding events with highest property damage
propdmg <- propdmg[order(-propdmg$PROPDMG), ][1:8, ]
# Finding events with highest crop damage
cropdmg <- cropdmg[order(-cropdmg$CROPDMG), ][1:8, ]
par(mfrow = c(1, 2), mar = c(12, 4, 3, 2), mgp = c(3, 1, 0), cex = 0.8)
barplot(propdmg$PROPDMG, las = 3, names.arg = propdmg$EVTYPE,
main = "Events with Highest Property Damages", ylab = "Damage Cost ($ billions)",
col = "wheat3")
barplot(cropdmg$CROPDMG, las = 3, names.arg = cropdmg$EVTYPE,
main = "Events With Highest Crop Damages", ylab = "Damage Cost ($ billions)",
col = "wheat3")
From the above analysis, it is evident that Tornado are most harmful with respect to public health while floods, wind and Storms have the greatest economic consequences.