Exploring the NOAA Storm Database

Synopsis

Natural calamities like storms, floods can cause damage to public health and economy. Such damages caused due to these events can be prevented or reduced. This excersize helps respective authorities in understanding the seviourity of the events. Mainly the events causing most harm to the public health, the events having strong negative impact on economy.

Data Processing

Downloading the required content for the project:

library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## Registered S3 method overwritten by 'R.oo':
##   method        from       
##   throw.default R.methodsS3
## R.oo v1.22.0 (2018-04-21) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## The following objects are masked from 'package:methods':
## 
##     getClasses, getMethods
## The following objects are masked from 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.9.0 successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
## 
##     timestamp
## The following objects are masked from 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, nullfile,
##     parse, warnings
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
if(!file.exists("/stormData.csv.bz2"))
{
#  download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile="./stormData.csv.bz2")
}
## NULL

Uncompress the zip file

if(!file.exists("stormdata.csv"))
{
  bunzip2("stormData.csv.bz2","stormdata.csv",remove=F)
}

Load the data file into a variable.

stormdata<- read.csv("stormdata.csv", header = TRUE, sep = ",")

```

View Headers

head(stormdata)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Results

First analysis to analyze the impact of severe weather events on public health.

populationhealth<- aggregate(cbind(FATALITIES,INJURIES)~EVTYPE, data = stormdata, sum, na.rm=TRUE)
populationhealth<- arrange(populationhealth, desc(FATALITIES+INJURIES))
populationhealth<- populationhealth[1:10,]
populationhealth
##               EVTYPE FATALITIES INJURIES
## 1            TORNADO       5633    91346
## 2     EXCESSIVE HEAT       1903     6525
## 3          TSTM WIND        504     6957
## 4              FLOOD        470     6789
## 5          LIGHTNING        816     5230
## 6               HEAT        937     2100
## 7        FLASH FLOOD        978     1777
## 8          ICE STORM         89     1975
## 9  THUNDERSTORM WIND        133     1488
## 10      WINTER STORM        206     1321
x <- populationhealth$EVTYPE

health<- as.matrix(t(populationhealth[,-1]))
colnames(health)<-x
barplot(health, col = c("grey", "blue"), main = "Impact of Severe Weather Events on Population Health", log = "y")
legend("topright", c("Fatalities","Injuries"), fill = c("grey", "blue"), bty = "x")

Second analysis to view the impact of severe weather events on economics.

In order to do so we must first translate the values for PROPDMG and CROPDMG based on the EXP field. In looking at the field we see some anomolies in the data.

table(stormdata$PROPDMGEXP)
## 
##             -      ?      +      0      1      2      3      4      5 
## 465934      1      8      5    216     25     13      4      4     28 
##      6      7      8      B      h      H      K      m      M 
##      4      5      1     40      1      6 424665      7  11330
table(stormdata$CROPDMGEXP)
## 
##             ?      0      2      B      k      K      m      M 
## 618413      7     19      1      9     21 281832      1   1994

Convert the exponents into numeric value and thus calculate the property damage

stormdata$PROPDMG[stormdata$PROPDMGEXP == "K"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "K"] * 1000
stormdata$PROPDMG[stormdata$PROPDMGEXP == "M"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "M"] * (10^6)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "H"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "H"] * 100
stormdata$PROPDMG[stormdata$PROPDMGEXP == "h"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "h"] * 100
stormdata$PROPDMG[stormdata$PROPDMGEXP == ""] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == ""] * 1
stormdata$PROPDMG[stormdata$PROPDMGEXP == "B"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "B"] * (10^9)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "m"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "m"] * (10^6)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "0"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "0"] * 1
stormdata$PROPDMG[stormdata$PROPDMGEXP == "1"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "1"] * 10
stormdata$PROPDMG[stormdata$PROPDMGEXP == "2"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "2"] * 100
stormdata$PROPDMG[stormdata$PROPDMGEXP == "3"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "3"] * 1000
stormdata$PROPDMG[stormdata$PROPDMGEXP == "4"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "4"] * (10^4)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "5"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "5"] * (10^5)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "6"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "6"] * (10^6)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "7"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "7"] * (10^7)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "8"] <- stormdata$PROPDMG[stormdata$PROPDMGEXP == "8"] * (10^8)
stormdata$PROPDMG[stormdata$PROPDMGEXP == "+"] <- 0
stormdata$PROPDMG[stormdata$PROPDMGEXP == "-"] <- 0
stormdata$PROPDMG[stormdata$PROPDMGEXP == "?"] <- 0
head(stormdata[,c("EVTYPE","PROPDMG","PROPDMGEXP")])
##    EVTYPE PROPDMG PROPDMGEXP
## 1 TORNADO   25000          K
## 2 TORNADO    2500          K
## 3 TORNADO   25000          K
## 4 TORNADO    2500          K
## 5 TORNADO    2500          K
## 6 TORNADO    2500          K

Convert the exponents into numeric value and thus calculate the crop damage

stormdata$CROPDMG[stormdata$CROPDMGEXP == "M"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "M"] * (10^6)
stormdata$CROPDMG[stormdata$CROPDMGEXP == "K"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "K"] * 1000
stormdata$CROPDMG[stormdata$CROPDMGEXP == "m"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "m"] * (10^6)
stormdata$CROPDMG[stormdata$CROPDMGEXP == "B"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "B"] * (10^9)
stormdata$CROPDMG[stormdata$CROPDMGEXP == "k"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "k"] * 1000
stormdata$CROPDMG[stormdata$CROPDMGEXP == "0"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "0"] * 1
stormdata$CROPDMG[stormdata$CROPDMGEXP == "2"] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == "2"] * 100
stormdata$CROPDMG[stormdata$CROPDMGEXP == ""] <- stormdata$CROPDMG[stormdata$CROPDMGEXP == ""] * 1
stormdata$CROPDMG[stormdata$CROPDMGEXP == "?"] <- 0
head(stormdata[,c("EVTYPE","CROPDMG","CROPDMGEXP")])
##    EVTYPE CROPDMG CROPDMGEXP
## 1 TORNADO       0           
## 2 TORNADO       0           
## 3 TORNADO       0           
## 4 TORNADO       0           
## 5 TORNADO       0           
## 6 TORNADO       0

Property Damage

aggregate the property damage by the event type and sort the output it in descending order

prop <- aggregate(PROPDMG~EVTYPE,data=stormdata,FUN=sum,na.rm=TRUE)
prop <- prop[with(prop,order(-PROPDMG)),]
prop <- head(prop,10)
print(prop)
##                EVTYPE      PROPDMG
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380617
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673979
## 244              HAIL  15735267513
## 402         HURRICANE  11868319010
## 848    TROPICAL STORM   7703890550
## 972      WINTER STORM   6688497251
## 359         HIGH WIND   5270046260

Crop Damage

aggregate the crop damage by the event type and sort the output it in descending order

crop <- aggregate(CROPDMG~EVTYPE,data=stormdata,FUN=sum,na.rm=TRUE)
crop <- crop[with(crop,order(-CROPDMG)),]
crop <- head(crop,10)
print(crop)
##                EVTYPE     CROPDMG
## 95            DROUGHT 13972566000
## 170             FLOOD  5661968450
## 590       RIVER FLOOD  5029459000
## 427         ICE STORM  5022113500
## 244              HAIL  3025954473
## 402         HURRICANE  2741910000
## 411 HURRICANE/TYPHOON  2607872800
## 153       FLASH FLOOD  1421317100
## 140      EXTREME COLD  1292973000
## 212      FROST/FREEZE  1094086000

Bar graphs displaying property and crop damages

par(mfrow=c(1,2),mar=c(11,3,3,2))
##plot the graph showing the top 10 property and crop damages
barplot(prop$PROPDMG/(10^9),names.arg=prop$EVTYPE,las=2,col="yellow",ylab="Prop.damage(billions)",main="Events Vs Top10 Prop.Damages")
barplot(crop$CROPDMG/(10^9),names.arg=crop$EVTYPE,las=2,col="coral",ylab="Crop damage(billions)",main="Events Vs Top10 Crop.Damages")