Synopsis

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database (start in the year 1950 and end in November 2011). This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Data Processing

# Loading data
data <- read.csv("./StormData.csv.bz2")
dim(data)
## [1] 902297     37
head(data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6
# Checking for Missing values
sapply(data , function(x) sum(is.na(x)))
##    STATE__   BGN_DATE   BGN_TIME  TIME_ZONE     COUNTY COUNTYNAME 
##          0          0          0          0          0          0 
##      STATE     EVTYPE  BGN_RANGE    BGN_AZI BGN_LOCATI   END_DATE 
##          0          0          0          0          0          0 
##   END_TIME COUNTY_END COUNTYENDN  END_RANGE    END_AZI END_LOCATI 
##          0          0     902297          0          0          0 
##     LENGTH      WIDTH          F        MAG FATALITIES   INJURIES 
##          0          0     843563          0          0          0 
##    PROPDMG PROPDMGEXP    CROPDMG CROPDMGEXP        WFO STATEOFFIC 
##          0          0          0          0          0          0 
##  ZONENAMES   LATITUDE  LONGITUDE LATITUDE_E LONGITUDE_    REMARKS 
##          0         47          0         40          0          0 
##     REFNUM 
##          0

Alphabetical characters are used to signify magnitude of property damage and crop damage that include “K” for thousands, “M” for millions, and “B” for billions.So we have to convert them into numericals

# Converting property damage and crop damage
unique(data$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
data$PROPEXP[data$PROPDMGEXP == "K"] <- 1000
data$PROPEXP[data$PROPDMGEXP == "M"] <- 1e+06
data$PROPEXP[data$PROPDMGEXP == ""]  <- 1
data$PROPEXP[data$PROPDMGEXP == "B"] <- 1e+09
data$PROPEXP[data$PROPDMGEXP == "m"] <- 1e+06
data$PROPEXP[data$PROPDMGEXP == "0"] <- 1
data$PROPEXP[data$PROPDMGEXP == "5"] <- 1e+05
data$PROPEXP[data$PROPDMGEXP == "6"] <- 1e+06
data$PROPEXP[data$PROPDMGEXP == "4"] <- 10000
data$PROPEXP[data$PROPDMGEXP == "2"] <- 100
data$PROPEXP[data$PROPDMGEXP == "3"] <- 1000
data$PROPEXP[data$PROPDMGEXP == "h"] <- 100
data$PROPEXP[data$PROPDMGEXP == "7"] <- 1e+07
data$PROPEXP[data$PROPDMGEXP == "H"] <- 100
data$PROPEXP[data$PROPDMGEXP == "1"] <- 10
data$PROPEXP[data$PROPDMGEXP == "8"] <- 1e+08
data$PROPEXP[data$PROPDMGEXP == "+"] <- 0
data$PROPEXP[data$PROPDMGEXP == "-"] <- 0
data$PROPEXP[data$PROPDMGEXP == "?"] <- 0
data$PROPDMGVAL <- data$PROPDMG * data$PROPEXP
head(data$PROPDMGVAL)
## [1] 25000  2500 25000  2500  2500  2500
# Similarly converting crop damage
unique(data$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
data$CROPEXP[data$CROPDMGEXP == "M"] <- 1e+06
data$CROPEXP[data$CROPDMGEXP == "K"] <- 1000
data$CROPEXP[data$CROPDMGEXP == "m"] <- 1e+06
data$CROPEXP[data$CROPDMGEXP == "B"] <- 1e+09
data$CROPEXP[data$CROPDMGEXP == "0"] <- 1
data$CROPEXP[data$CROPDMGEXP == "k"] <- 1000
data$CROPEXP[data$CROPDMGEXP == "2"] <- 100
data$CROPEXP[data$CROPDMGEXP == ""] <- 1
data$CROPEXP[data$CROPDMGEXP == "?"] <- 0
data$CROPDMGVAL <- data$CROPDMG * data$CROPEXP
head(data$CROPDMGVAL)
## [1] 0 0 0 0 0 0

Answering the given questions

1.Across the United States, which types of events (as indicated in the “EVTYPE”) are most harmful with respect to population health?

Aggregating and Sorting

fatalities <- aggregate(FATALITIES ~ EVTYPE, data=data, FUN = sum)
fatalities <- fatalities[order(fatalities$FATALITIES, decreasing=TRUE),]
injuries <- aggregate(INJURIES ~ EVTYPE, data=data, FUN = sum)
injuries <- injuries[order(injuries$INJURIES, decreasing=TRUE),]
head(fatalities)
##             EVTYPE FATALITIES
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504
head(injuries)
##             EVTYPE INJURIES
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230
## 275           HEAT     2100

Plot 1

par(mfrow=c(1,2))
p1 <- barplot (height = fatalities$FATALITIES[1:10], names.arg = fatalities$EVTYPE[1:10], las = 2, cex.names= 0.6, col = "light blue" , main = "Fatalities Count vs Event Type")

p2 <- barplot (height = injuries$INJURIES[1:10], names.arg = injuries$EVTYPE[1:10], las = 2, cex.names= 0.6, col = "light blue" , main = "Injuries Count vs Event Type")

2.Across the United States, which types of events have the greatest economic consequences?

Aggregating and Sorting

property <- aggregate(PROPDMGVAL ~ EVTYPE, data, FUN = sum)
property <- property[order(property$PROPDMGVAL, decreasing=TRUE),]
crop <- aggregate(CROPDMGVAL ~ EVTYPE, data, FUN = sum)
crop <- crop[order(crop$CROPDMGVAL, decreasing=TRUE),]
head(property)
##                EVTYPE   PROPDMGVAL
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380617
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673979
## 244              HAIL  15735267513
head(crop)
##          EVTYPE  CROPDMGVAL
## 95      DROUGHT 13972566000
## 170       FLOOD  5661968450
## 590 RIVER FLOOD  5029459000
## 427   ICE STORM  5022113500
## 244        HAIL  3025954473
## 402   HURRICANE  2741910000

Plot 2

par(mfrow=c(1,2))
barplot (height = property$PROPDMGVAL[1:10]/(10^9), names.arg = property$EVTYPE[1:10], las = 2, cex.names= 0.6,col = "blue" , main = "Property Damage vs Event Type")

barplot (height = crop$CROPDMGVAL[1:10]/(10^9), names.arg = crop$EVTYPE[1:10], las = 2, cex.names= 0.6,col = "blue" , main = "Crop Damage vs Event Type")

Results

From Plot 1 , Tornados are the most harmful events that caused maximum number of fatalities and injuries followed by Excessive Heat and Thunderstorm wind in case of fatalities and injuries respectively.

From Plot 2 , Floods are the events that have the greatest economic consequences causing the maximum property damage while Drought caused the maximum crop damage followed by Hurricanes and Floods for property damage and crop damage respectively