This document is used to analyze the impact of stroms and other severe weather events on public health and economy.The data analysis mainly address the following questions: (1) Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health. (2) Across the United States, which types of events have the greatest economic consequences.
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(reshape2)
library(ggplot2)
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.0 (2015-02-19) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.19.0 (2015-02-27) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
##
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
##
## The following objects are masked from 'package:base':
##
## attach, detach, gc, load, save
##
## R.utils v2.1.0 (2015-05-27) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
##
## The following object is masked from 'package:utils':
##
## timestamp
##
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, inherits, isOpen, parse, warnings
require(gridExtra)
## Loading required package: gridExtra
Download data, unzip data, and read data
## download files
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
file <- "stormdata.csv.bz2"
if(!file.exists(file)){
download.file(url, file)
}
## unzip and create folders (if those ain't exist)
unzip <- bzfile("stormData.csv.bz2", "r")
stormData <- read.csv(unzip, stringsAsFactors = FALSE)
close(unzip)
## get familiar with data
dim(stormData)
## [1] 902297 37
names(stormData)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
head(stormData)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
List the stormData by year from 1980 to 2010
if (dim(stormData)[2] == 37) {
stormData$year <- as.numeric(format(as.Date(stormData$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))
}
hist(stormData$year, breaks = 50)
We select the data after 1980. Because “In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years should be considered more complete.”
## subset the data from 1980 to 2010
storm <- stormData[stormData$year > 1980, ]
dim(storm)
## [1] 820785 38
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
## sum of fatalities by event type, sort them
fatalities <- aggregate(FATALITIES~EVTYPE,storm,sum)
sort.fatalities <- fatalities[order(-fatalities$FATALITIES),]
fatalities30 <- sort.fatalities[1:30,]
Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
## sum of injuries by event type, sort them
injuries <- aggregate(INJURIES~EVTYPE,storm,sum)
sort.injuries <- injuries[order(-injuries$INJURIES),]
injuries30<- sort.injuries[1:30,]
head(fatalities30)
## EVTYPE FATALITIES
## 834 TORNADO 2246
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
head(injuries30)
## EVTYPE INJURIES
## 834 TORNADO 36814
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
Across the United States, which types of events have the greatest economic consequences? We focus on the property damage and crop damage that types of events cause in order to see the most costly. Sum of property damage multiplied by their respective exponent by event type, sort them
unique(storm$PROPDMGEXP)
## [1] "" "M" "K" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-"
## [18] "1" "8"
##lets make them numeric by the multipliers in data documentation
storm$PROPEXPN[storm$PROPDMGEXP ==""] <- 1
storm$PROPEXPN[storm$PROPDMGEXP =="-"] <- 0
storm$PROPEXPN[storm$PROPDMGEXP =="?"] <- 0
storm$PROPEXPN[storm$PROPDMGEXP =="+"] <- 0
storm$PROPEXPN[storm$PROPDMGEXP =="0"] <- 1
storm$PROPEXPN[storm$PROPDMGEXP =="1"] <- 1
storm$PROPEXPN[storm$PROPDMGEXP =="2"] <- 100
storm$PROPEXPN[storm$PROPDMGEXP =="3"] <- 1000
storm$PROPEXPN[storm$PROPDMGEXP =="4"] <- 10000
storm$PROPEXPN[storm$PROPDMGEXP =="5"] <- 100000
storm$PROPEXPN[storm$PROPDMGEXP =="6"] <- 1e+06
storm$PROPEXPN[storm$PROPDMGEXP =="7"] <- 1e+07
storm$PROPEXPN[storm$PROPDMGEXP =="8"] <- 1e+08
storm$PROPEXPN[storm$PROPDMGEXP =="B"] <- 1e+09
storm$PROPEXPN[storm$PROPDMGEXP =="h"] <- 100
storm$PROPEXPN[storm$PROPDMGEXP =="H"] <- 100
storm$PROPEXPN[storm$PROPDMGEXP =="K"] <- 1000
storm$PROPEXPN[storm$PROPDMGEXP =="m"] <- 1e+06
storm$PROPEXPN[storm$PROPDMGEXP =="M"] <- 1e+06
storm$PROPDMGWITHEXP <- storm$PROPDMG * storm$PROPEXPN
pdmg <- aggregate(PROPDMGWITHEXP~EVTYPE,storm,sum)
sort.pdmg <- pdmg[order(-pdmg$PROPDMGWITHEXP),]
pdmg30 <- sort.pdmg[1:30,]
Sum of crop damage multiplied by their respective exponent by event type, sort them
unique(storm$CROPDMGEXP) ##to see the levels
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
## lets make them numeric by the multipliers in data documentation
storm$CROPEXPN[storm$CROPDMGEXP == ""] <- 1
storm$CROPEXPN[storm$CROPDMGEXP == "?"] <- 0
storm$CROPEXPN[storm$CROPDMGEXP == "0"] <- 1
storm$CROPEXPN[storm$CROPDMGEXP == "2"] <- 10
storm$CROPEXPN[storm$CROPDMGEXP == "B"] <- 1e+9
storm$CROPEXPN[storm$CROPDMGEXP == "k"] <- 1000
storm$CROPEXPN[storm$CROPDMGEXP == "K"] <- 1000
storm$CROPEXPN[storm$CROPDMGEXP == "m"] <- 1e+06
storm$CROPEXPN[storm$CROPDMGEXP == "M"] <- 1e+06
storm$CROPDMGWITHEXP <- storm$CROPDMG * storm$CROPEXPN
cdmg <- aggregate(CROPDMGWITHEXP~EVTYPE,storm,sum)
sort.cdmg <- cdmg[order(-cdmg$CROPDMGWITHEXP),]
cdmg30 <- sort.cdmg[1:30,]
## List of the events with the largest number of fatalities.
fatalities30
## EVTYPE FATALITIES
## 834 TORNADO 2246
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
## 170 FLOOD 470
## 585 RIP CURRENT 368
## 359 HIGH WIND 248
## 19 AVALANCHE 224
## 972 WINTER STORM 206
## 586 RIP CURRENTS 204
## 278 HEAT WAVE 172
## 140 EXTREME COLD 160
## 760 THUNDERSTORM WIND 133
## 310 HEAVY SNOW 127
## 141 EXTREME COLD/WIND CHILL 125
## 676 STRONG WIND 103
## 30 BLIZZARD 101
## 350 HIGH SURF 101
## 290 HEAVY RAIN 98
## 142 EXTREME HEAT 96
## 79 COLD/WIND CHILL 95
## 427 ICE STORM 89
## 957 WILDFIRE 75
## 411 HURRICANE/TYPHOON 64
## 786 THUNDERSTORM WINDS 64
## 188 FOG 62
## 402 HURRICANE 61
## 848 TROPICAL STORM 58
## List of the events with the largest number of injuries.
injuries30
## EVTYPE INJURIES
## 834 TORNADO 36814
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
## 427 ICE STORM 1975
## 153 FLASH FLOOD 1777
## 760 THUNDERSTORM WIND 1488
## 244 HAIL 1361
## 972 WINTER STORM 1321
## 411 HURRICANE/TYPHOON 1275
## 359 HIGH WIND 1137
## 310 HEAVY SNOW 1021
## 957 WILDFIRE 911
## 786 THUNDERSTORM WINDS 908
## 30 BLIZZARD 805
## 188 FOG 734
## 955 WILD/FOREST FIRE 545
## 117 DUST STORM 440
## 978 WINTER WEATHER 398
## 89 DENSE FOG 342
## 848 TROPICAL STORM 340
## 278 HEAT WAVE 309
## 376 HIGH WINDS 302
## 586 RIP CURRENTS 297
## 676 STRONG WIND 280
## 290 HEAVY RAIN 251
## 585 RIP CURRENT 232
## 140 EXTREME COLD 231
Graphs of the fatalities and injuries by event.
library(ggplot2)
fatalitiesPlot <- ggplot(fatalities30,aes(EVTYPE,FATALITIES))+
geom_bar(stat="identity",fill="purple")+
coord_flip()+xlab("Weather Event Type")+
ylab("Fatalities")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
injuriesPlot <- ggplot(injuries30,aes(EVTYPE,INJURIES))+
geom_bar(stat="identity",fill="red")+
coord_flip()+
xlab("Weather Event Type")+
ylab("Injuries")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
## putting the plots in a single row
library(gridExtra)
## Loading required package: grid (some problem)
grid.arrange(fatalitiesPlot,injuriesPlot,ncol = 2)
It is shown from the figure that tornadoes and excessive heat are the top 2 most significant impact on the public health in terms of injuries and fatalities.
# List of the events with the highest economical impact(considering properties).
pdmg30
## EVTYPE PROPDMGWITHEXP
## 170 FLOOD 144657709807
## 411 HURRICANE/TYPHOON 69305840000
## 670 STORM SURGE 43323536000
## 834 TORNADO 41495981937
## 153 FLASH FLOOD 16822673979
## 244 HAIL 15735267513
## 402 HURRICANE 11868319010
## 848 TROPICAL STORM 7703890550
## 972 WINTER STORM 6688497251
## 359 HIGH WIND 5270046260
## 590 RIVER FLOOD 5118945500
## 957 WILDFIRE 4765114000
## 671 STORM SURGE/TIDE 4641188000
## 856 TSTM WIND 4484928495
## 427 ICE STORM 3944927860
## 760 THUNDERSTORM WIND 3483122472
## 409 HURRICANE OPAL 3172846000
## 955 WILD/FOREST FIRE 3001829500
## 298 HEAVY RAIN/SEVERE WEATHER 2500000000
## 786 THUNDERSTORM WINDS 1944590859
## 842 TORNADOES, TSTM WIND, HAIL 1600000000
## 604 SEVERE THUNDERSTORM 1205360000
## 95 DROUGHT 1046106000
## 310 HEAVY SNOW 932759140
## 464 LIGHTNING 930379430
## 290 HEAVY RAIN 694248090
## 30 BLIZZARD 659213950
## 954 WILD FIRES 624100000
## 376 HIGH WINDS 608323733
## 879 TYPHOON 600230000
# List of the events with the highest economical impact(considering crops).
cdmg30
## EVTYPE CROPDMGWITHEXP
## 95 DROUGHT 13972566000
## 170 FLOOD 5661968450
## 590 RIVER FLOOD 5029459000
## 427 ICE STORM 5022113500
## 244 HAIL 3025954473
## 402 HURRICANE 2741910000
## 411 HURRICANE/TYPHOON 2607872800
## 153 FLASH FLOOD 1421317100
## 140 EXTREME COLD 1292973000
## 212 FROST/FREEZE 1094086000
## 290 HEAVY RAIN 733399800
## 848 TROPICAL STORM 678346000
## 359 HIGH WIND 638571300
## 856 TSTM WIND 554007350
## 130 EXCESSIVE HEAT 492402000
## 192 FREEZE 446225000
## 834 TORNADO 414953270
## 760 THUNDERSTORM WIND 414843050
## 275 HEAT 401461500
## 957 WILDFIRE 295472800
## 87 DAMAGING FREEZE 262100000
## 786 THUNDERSTORM WINDS 190654788
## 136 EXCESSIVE WETNESS 142000000
## 406 HURRICANE ERIN 136010000
## 310 HEAVY SNOW 134653100
## 182 FLOOD/RAIN/WINDS 112800000
## 30 BLIZZARD 112060000
## 955 WILD/FOREST FIRE 106796830
## 177 FLOOD/FLASH FLOOD 95034000
## 73 COLD AND WET CONDITIONS 66000000
Graphs of the property and crop damages by event.
propertyPlot <- ggplot(pdmg30,aes(EVTYPE,PROPDMGWITHEXP))+
geom_bar(stat="identity",fill="purple")+
coord_flip()+
xlab("Weather Event Type")+
ylab("Property Damage")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
cropPlot<- ggplot(cdmg30,aes(EVTYPE,CROPDMGWITHEXP))+
geom_bar(stat="identity",fill="red")+
coord_flip()+xlab("Weather Event Type")+
ylab("Crop Damage")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
##putting the plots in a single row
library(gridExtra)
##putting the plots in a single row (some problem)
grid.arrange(propertyPlot,cropPlot,ncol = 2)
It is shown from the figure that floods are the leading cause of property damage followed by hurricanes/typhoons and storm surges. Leading cause of damage to crops are draughts and floods.
It is shown from the data that (1) tornadoes and excessive heat are the top 2 most harmful weather events for public health. (2) Floods followed by hurricanes/typhoons and storm surges are the costliest severe weather events considering property damage. (3) Draughts and floods are the costliest considering crop damage.