The U.S. National Oceanic and Atmospheric Administration (NOAA) provides a database that tracks characteristics of major storms and weather events including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
It’s importat to know what kind of events can cause both public health and economic problems for communities and municipalities because many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
The goal of this document is to explore the dataset and answer two basic questions:
1.Across the United States, which types of events are most harmful with respect to population health?
2.Across the United States, which types of events have the greatest economic consequences?
Loading libraries:
require(ggplot2)
## Loading required package: ggplot2
filename <- "repdata_data_StormData.csv.bz2"
if (!file.exists(filename)){
fileURL <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileURL, filename, method="curl")
}
Creating a dataframe with the file:
eventsDF<-read.table(filename,header = TRUE,sep = ",")
head(eventsDF)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
Number of rows and columns of the dataset:
dim(eventsDF)
## [1] 902297 37
Some of the values on the column EVTYPE has lower and upper case letters
eventsDF$EVTYPE<-toupper(eventsDF$EVTYPE)
There are two variables for meassure the population health problems: Fatalities and Injuries.
Fatalities
Creating a dataset with the summarize of Fatalities by event type, and selecting the top 10 events.
fatalDF<-aggregate(FATALITIES~EVTYPE,eventsDF,FUN=sum)
fatalDF<-subset(fatalDF,FATALITIES>=1)
fatalDF<-head(fatalDF[order(fatalDF$FATALITIES,decreasing = TRUE),],10)
fatalDF
## EVTYPE FATALITIES
## 754 TORNADO 5633
## 109 EXCESSIVE HEAT 1903
## 132 FLASH FLOOD 978
## 237 HEAT 937
## 408 LIGHTNING 816
## 777 TSTM WIND 504
## 148 FLOOD 470
## 520 RIP CURRENT 368
## 315 HIGH WIND 248
## 11 AVALANCHE 224
Plotting the results:
g1<-ggplot(fatalDF,aes(EVTYPE,FATALITIES,fill=FATALITIES))
g1+geom_bar(stat = "identity")+ggtitle("Weather Events with Most Fatalities")+xlab("EVENT TYPE")+theme(axis.text.x = element_text(angle=90))
Injuries
Creating a dataset with the summarize of Fatalities by event type, and selecting the top 10 events.
injuryDF<-aggregate(INJURIES~EVTYPE,eventsDF,FUN=sum)
injuryDF<-subset(injuryDF,INJURIES>=1)
injuryDF<-head(injuryDF[order(injuryDF$INJURIES,decreasing = TRUE),],10)
injuryDF
## EVTYPE INJURIES
## 754 TORNADO 91346
## 777 TSTM WIND 6957
## 148 FLOOD 6789
## 109 EXCESSIVE HEAT 6525
## 408 LIGHTNING 5230
## 237 HEAT 2100
## 383 ICE STORM 1975
## 132 FLASH FLOOD 1777
## 684 THUNDERSTORM WIND 1488
## 206 HAIL 1361
Plotting the results:
g2<-ggplot(injuryDF,aes(EVTYPE,INJURIES,fill=INJURIES))
g2+geom_bar(stat = "identity")+ggtitle("Weather Events with Most Injuries")+xlab("EVENT TYPE")+theme(axis.text.x = element_text(angle=90))
In both cases, the common events are:
intersect(injuryDF[,1],fatalDF[,1])
## [1] "TORNADO" "TSTM WIND" "FLOOD" "EXCESSIVE HEAT"
## [5] "LIGHTNING" "HEAT" "FLASH FLOOD"
In the Cookbook of the dataset there’s a description about the exponential values. In order to calculate the correct amount, it’s necessary multiply the property damage and crop value with its corresponding exponent
eventsDF$DMGEXPVAL<-1
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="h"]<-100
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="H"]<-100
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="k"]<-1000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="K"]<-1000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="m"]<-1000000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="M"]<-1000000
eventsDF$DMGEXPVAL[eventsDF$PROPDMGEXP=="B"]<-1000000000
eventsDF$CRPEXPVAL<-1
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="h"]<-100
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="H"]<-100
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="k"]<-1000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="K"]<-1000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="m"]<-1000000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="M"]<-1000000
eventsDF$CRPEXPVAL[eventsDF$CROPDMGEXP=="B"]<-1000000000
Dataset with the top 10 events by damage of property in $USD:
dmgDF<-aggregate((PROPDMG*DMGEXPVAL)~EVTYPE,eventsDF,FUN=sum)
names(dmgDF)<-c("EVTYPE","VAL")
dmgDF<-subset(dmgDF,VAL>=1)
dmgDF<-head(dmgDF[order(dmgDF$VAL,decreasing = TRUE),],10)
dmgDF
## EVTYPE VAL
## 148 FLOOD 144657709807
## 367 HURRICANE/TYPHOON 69305840000
## 754 TORNADO 56937160779
## 595 STORM SURGE 43323536000
## 132 FLASH FLOOD 16140812067
## 206 HAIL 15732267543
## 358 HURRICANE 11868319010
## 768 TROPICAL STORM 7703890550
## 888 WINTER STORM 6688497251
## 315 HIGH WIND 5270046295
Dataset with the top 10 events by crop damage in $USD:
cropDF<-aggregate((CROPDMG*CRPEXPVAL)~EVTYPE,eventsDF,FUN=sum)
names(cropDF)<-c("EVTYPE","VAL")
cropDF<-subset(cropDF,VAL>=1)
cropDF<-head(cropDF[order(cropDF$VAL,decreasing = TRUE),],10)
cropDF
## EVTYPE VAL
## 77 DROUGHT 13972566000
## 148 FLOOD 5661968450
## 525 RIVER FLOOD 5029459000
## 383 ICE STORM 5022113500
## 206 HAIL 3025954473
## 358 HURRICANE 2741910000
## 367 HURRICANE/TYPHOON 2607872800
## 132 FLASH FLOOD 1421317100
## 118 EXTREME COLD 1312973000
## 181 FROST/FREEZE 1094186000
Mixing Property and Crop damage:
dmgDF$DMG<-"PROPERTY DAMAGE"
cropDF$DMG<-"CROP DAMAGE"
ecomDF<-rbind(dmgDF,cropDF)
ecomDF$VAL<-round(ecomDF$VAL/1000000000)
head(ecomDF)
## EVTYPE VAL DMG
## 148 FLOOD 145 PROPERTY DAMAGE
## 367 HURRICANE/TYPHOON 69 PROPERTY DAMAGE
## 754 TORNADO 57 PROPERTY DAMAGE
## 595 STORM SURGE 43 PROPERTY DAMAGE
## 132 FLASH FLOOD 16 PROPERTY DAMAGE
## 206 HAIL 16 PROPERTY DAMAGE
Plotting result of economic damage:
g3<-ggplot(ecomDF,aes(EVTYPE,VAL,fill=DMG))
g3+geom_bar(stat = "identity")+ggtitle("Higher Economic Damage of Weather Events")+xlab("EVENT TYPE")+ylab("BILLIONS (USD)")+theme(axis.text.x = element_text(angle=90))