Synopsis

The U.S. National Oceanic and Atmospheric Administration (NOAA) provides a database that tracks characteristics of major storms and weather events including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

It’s importat to know what kind of events can cause both public health and economic problems for communities and municipalities because many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

The goal of this document is to explore the dataset and answer two basic questions:

1.Across the United States, which types of events are most harmful with respect to population health?

2.Across the United States, which types of events have the greatest economic consequences?

Data Processing

Loading libraries:

require(ggplot2)
## Loading required package: ggplot2

Downloading file

filename <- "repdata_data_StormData.csv.bz2"
if (!file.exists(filename)){
  fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  download.file(fileURL, filename, method="curl")
}

Loading and processing file

Creating a dataframe with the file:

eventsDF<-read.table(filename,header = TRUE,sep = ",")
head(eventsDF)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

Number of rows and columns of the dataset:

dim(eventsDF)
## [1] 902297     37

Some of the values on the column EVTYPE has lower and upper case letters

eventsDF$EVTYPE<-toupper(eventsDF$EVTYPE)

Results

1.Across the United States, which types of events are most harmful with respect to population health?

There are two variables for meassure the population health problems: Fatalities and Injuries.

Fatalities

Creating a dataset with the summarize of Fatalities by event type, and selecting the top 10 events.

fatalDF<-aggregate(FATALITIES~EVTYPE,eventsDF,FUN=sum)
fatalDF<-subset(fatalDF,FATALITIES>=1)
fatalDF<-head(fatalDF[order(fatalDF$FATALITIES,decreasing = TRUE),],10)
fatalDF
##             EVTYPE FATALITIES
## 754        TORNADO       5633
## 109 EXCESSIVE HEAT       1903
## 132    FLASH FLOOD        978
## 237           HEAT        937
## 408      LIGHTNING        816
## 777      TSTM WIND        504
## 148          FLOOD        470
## 520    RIP CURRENT        368
## 315      HIGH WIND        248
## 11       AVALANCHE        224

Plotting the results:

g1<-ggplot(fatalDF,aes(EVTYPE,FATALITIES,fill=FATALITIES))
g1+geom_bar(stat = "identity")+ggtitle("Weather Events with Most Fatalities")+xlab("EVENT TYPE")+theme(axis.text.x = element_text(angle=90))

Injuries

Creating a dataset with the summarize of Fatalities by event type, and selecting the top 10 events.

injuryDF<-aggregate(INJURIES~EVTYPE,eventsDF,FUN=sum)
injuryDF<-subset(injuryDF,INJURIES>=1)
injuryDF<-head(injuryDF[order(injuryDF$INJURIES,decreasing = TRUE),],10)
injuryDF
##                EVTYPE INJURIES
## 754           TORNADO    91346
## 777         TSTM WIND     6957
## 148             FLOOD     6789
## 109    EXCESSIVE HEAT     6525
## 408         LIGHTNING     5230
## 237              HEAT     2100
## 383         ICE STORM     1975
## 132       FLASH FLOOD     1777
## 684 THUNDERSTORM WIND     1488
## 206              HAIL     1361

Plotting the results:

g2<-ggplot(injuryDF,aes(EVTYPE,INJURIES,fill=INJURIES))
g2+geom_bar(stat = "identity")+ggtitle("Weather Events with Most Injuries")+xlab("EVENT TYPE")+theme(axis.text.x = element_text(angle=90))

In both cases, the common events are:

intersect(injuryDF[,1],fatalDF[,1])
## [1] "TORNADO"        "TSTM WIND"      "FLOOD"          "EXCESSIVE HEAT"
## [5] "LIGHTNING"      "HEAT"           "FLASH FLOOD"

2.Across the United States, which types of events have the greatest economic consequences?

In the Cookbook of the dataset there’s a description about the exponential values. In order to calculate the correct amount, it’s necessary multiply the property damage and crop value with its corresponding exponent

eventsDF$DMGEXPVAL<-1
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="h"]<-100
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="H"]<-100
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="k"]<-1000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="K"]<-1000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="m"]<-1000000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="M"]<-1000000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="B"]<-1000000000

eventsDF$CRPEXPVAL<-1
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="h"]<-100
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="H"]<-100
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="k"]<-1000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="K"]<-1000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="m"]<-1000000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="M"]<-1000000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="B"]<-1000000000

Dataset with the top 10 events by damage of property in $USD:

dmgDF<-aggregate((PROPDMG*DMGEXPVAL)~EVTYPE,eventsDF,FUN=sum)
names(dmgDF)<-c("EVTYPE","VAL")
dmgDF<-subset(dmgDF,VAL>=1)
dmgDF<-head(dmgDF[order(dmgDF$VAL,decreasing = TRUE),],10)
dmgDF
##                EVTYPE          VAL
## 148             FLOOD 144657709807
## 367 HURRICANE/TYPHOON  69305840000
## 754           TORNADO  56937160779
## 595       STORM SURGE  43323536000
## 132       FLASH FLOOD  16140812067
## 206              HAIL  15732267543
## 358         HURRICANE  11868319010
## 768    TROPICAL STORM   7703890550
## 888      WINTER STORM   6688497251
## 315         HIGH WIND   5270046295

Dataset with the top 10 events by crop damage in $USD:

cropDF<-aggregate((CROPDMG*CRPEXPVAL)~EVTYPE,eventsDF,FUN=sum)
names(cropDF)<-c("EVTYPE","VAL")
cropDF<-subset(cropDF,VAL>=1)
cropDF<-head(cropDF[order(cropDF$VAL,decreasing = TRUE),],10)
cropDF
##                EVTYPE         VAL
## 77            DROUGHT 13972566000
## 148             FLOOD  5661968450
## 525       RIVER FLOOD  5029459000
## 383         ICE STORM  5022113500
## 206              HAIL  3025954473
## 358         HURRICANE  2741910000
## 367 HURRICANE/TYPHOON  2607872800
## 132       FLASH FLOOD  1421317100
## 118      EXTREME COLD  1312973000
## 181      FROST/FREEZE  1094186000

Mixing Property and Crop damage:

dmgDF$DMG<-"PROPERTY DAMAGE"
cropDF$DMG<-"CROP DAMAGE"

ecomDF<-rbind(dmgDF,cropDF)
ecomDF$VAL<-round(ecomDF$VAL/1000000000)

head(ecomDF)
##                EVTYPE VAL             DMG
## 148             FLOOD 145 PROPERTY DAMAGE
## 367 HURRICANE/TYPHOON  69 PROPERTY DAMAGE
## 754           TORNADO  57 PROPERTY DAMAGE
## 595       STORM SURGE  43 PROPERTY DAMAGE
## 132       FLASH FLOOD  16 PROPERTY DAMAGE
## 206              HAIL  16 PROPERTY DAMAGE

Plotting result of economic damage:

g3<-ggplot(ecomDF,aes(EVTYPE,VAL,fill=DMG))
g3+geom_bar(stat = "identity")+ggtitle("Higher Economic Damage of Weather Events")+xlab("EVENT TYPE")+ylab("BILLIONS (USD)")+theme(axis.text.x = element_text(angle=90))