We examined the impact of severe US weather events from 1950 to November 2011 on human health and property using the NOAA National Weather Service data set.
Across the United States, the following five types of events are most harmful with respect to population health as measured in human fatalities (from the highest to lowest order): Tornado, Excessive Heat, Flash Flood, Heat, and Lightning. The most harmful events in terms of human injuries are: Tornado, Thunderstorm Winds, Flood, Excessive Heat, and Lightning. The number of injuries caused by the events are much higher than the number of fatalities.
The following five types of events have the greatest economic consequences in terms of total damages as measured in billions of dollars: Thunderstorm Winds, Flash flood, Hail, Tornado, and High Tides. In general, the property damages are much higher than the crop damages.
library("data.table")
df <- data.table::fread(input = "./Project2/StormData.csv")
dim(df)
## [1] 902297 37
The data set has 37 variables and 902297 observations.
str(df)
## Classes 'data.table' and 'data.frame': 902297 obs. of 37 variables:
## $ STATE__ : num 1 1 1 1 1 1 1 1 1 1 ...
## $ BGN_DATE : chr "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
## $ BGN_TIME : chr "0130" "0145" "1600" "0900" ...
## $ TIME_ZONE : chr "CST" "CST" "CST" "CST" ...
## $ COUNTY : num 97 3 57 89 43 77 9 123 125 57 ...
## $ COUNTYNAME: chr "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
## $ STATE : chr "AL" "AL" "AL" "AL" ...
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ BGN_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ BGN_AZI : chr "" "" "" "" ...
## $ BGN_LOCATI: chr "" "" "" "" ...
## $ END_DATE : chr "" "" "" "" ...
## $ END_TIME : chr "" "" "" "" ...
## $ COUNTY_END: num 0 0 0 0 0 0 0 0 0 0 ...
## $ COUNTYENDN: logi NA NA NA NA NA NA ...
## $ END_RANGE : num 0 0 0 0 0 0 0 0 0 0 ...
## $ END_AZI : chr "" "" "" "" ...
## $ END_LOCATI: chr "" "" "" "" ...
## $ LENGTH : num 14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
## $ WIDTH : num 100 150 123 100 150 177 33 33 100 100 ...
## $ F : int 3 2 2 2 2 2 2 1 3 3 ...
## $ MAG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: chr "K" "K" "K" "K" ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: chr "" "" "" "" ...
## $ WFO : chr "" "" "" "" ...
## $ STATEOFFIC: chr "" "" "" "" ...
## $ ZONENAMES : chr "" "" "" "" ...
## $ LATITUDE : num 3040 3042 3340 3458 3412 ...
## $ LONGITUDE : num 8812 8755 8742 8626 8642 ...
## $ LATITUDE_E: num 3051 0 0 0 0 ...
## $ LONGITUDE_: num 8806 0 0 0 0 ...
## $ REMARKS : chr "" "" "" "" ...
## $ REFNUM : num 1 2 3 4 5 6 7 8 9 10 ...
## - attr(*, ".internal.selfref")=<externalptr>
#summary(df) # commented out to save space
There are 37 variables, but we need only Event types, Fatalities, Injuries, Property Damages, and Crop Damages. Damages have associated exponent codes associated with them.
d1 <- subset(df, select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG",
"PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))
dim(d1)
## [1] 902297 7
Next, we keep only those observations where eventtypes that are known and some losses were incurred.
d1 <- subset(d1, EVTYPE != "?")
dim(d1)
## [1] 902296 7
d <- subset(d1, INJURIES > 0 | FATALITIES > 0 | PROPDMG > 0 | CROPDMG > 0)
dim(d)
## [1] 254632 7
# check how many NA values are there
d_NA <- d[rowSums(is.na(d)) > 0,]
dim(d_NA)
## [1] 0 7
head(d_NA)
## Empty data.table (0 rows) of 7 cols: EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG...
There are no NA values.
Damages have associated exponent codes and need to be translated.
# make NA values to be 0
d$PROPDMGEXP[is.na(d$PROPDMGEXP)] <- 0
d$CROPDMGEXP[is.na(d$CROPDMGEXP)] <- 0
# convert exponents to uppercase for translation
d$PROPDMGEXP <- toupper(as.character(d$PROPDMGEXP))
d$CROPDMGEXP <- toupper(as.character(d$CROPDMGEXP))
# translate the exponent codes
d$CROPDMGEXP[(d$CROPDMGEXP == "+")] <- 0
d$CROPDMGEXP[(d$CROPDMGEXP == "-")] <- 0
d$CROPDMGEXP[(d$CROPDMGEXP == "?")] <- 0
d$CROPDMGEXP[(d$CROPDMGEXP == "")] <- 0
d$CROPDMGEXP[(d$CROPDMGEXP == "H")] <- 2
d$CROPDMGEXP[(d$CROPDMGEXP == "K")] <- 3
d$CROPDMGEXP[(d$CROPDMGEXP == "M")] <- 6
d$CROPDMGEXP[(d$CROPDMGEXP == "B")] <- 9
d$PROPDMGEXP[(d$PROPDMGEXP == "+")] <- 0
d$PROPDMGEXP[(d$PROPDMGEXP == "-")] <- 0
d$PROPDMGEXP[(d$PROPDMGEXP == "?")] <- 0
d$PROPDMGEXP[(d$PROPDMGEXP == "")] <- 0
d$PROPDMGEXP[(d$PROPDMGEXP == "H")] <- 2
d$PROPDMGEXP[(d$PROPDMGEXP == "K")] <- 3
d$PROPDMGEXP[(d$PROPDMGEXP == "M")] <- 6
d$PROPDMGEXP[(d$PROPDMGEXP == "B")] <- 9
# reset the exponents to be integer for computing damage values
d$PROPDMGEXP <- as.integer(d$PROPDMGEXP)
d$CROPDMGEXP <- as.integer(d$CROPDMGEXP)
d$PropDamage <- d$PROPDMG * 10^d$PROPDMGEXP
d$CropDamage<- d$CROPDMG * 10^d$CROPDMGEXP
head(d)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1: TORNADO 0 15 25.0 3 0 0
## 2: TORNADO 0 0 2.5 3 0 0
## 3: TORNADO 0 2 25.0 3 0 0
## 4: TORNADO 0 2 2.5 3 0 0
## 5: TORNADO 0 2 2.5 3 0 0
## 6: TORNADO 0 6 2.5 3 0 0
## PropDamage CropDamage
## 1: 25000 0
## 2: 2500 0
## 3: 25000 0
## 4: 2500 0
## 5: 2500 0
## 6: 2500 0
str(d)
## Classes 'data.table' and 'data.frame': 254632 obs. of 9 variables:
## $ EVTYPE : chr "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
## $ FATALITIES: num 0 0 0 0 0 0 0 0 1 0 ...
## $ INJURIES : num 15 0 2 2 2 6 1 0 14 0 ...
## $ PROPDMG : num 25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
## $ PROPDMGEXP: int 3 3 3 3 3 3 3 3 3 3 ...
## $ CROPDMG : num 0 0 0 0 0 0 0 0 0 0 ...
## $ CROPDMGEXP: int 0 0 0 0 0 0 0 0 0 0 ...
## $ PropDamage: num 25000 2500 25000 2500 2500 2500 2500 2500 25000 25000 ...
## $ CropDamage: num 0 0 0 0 0 0 0 0 0 0 ...
## - attr(*, ".internal.selfref")=<externalptr>
Since fatalities are more serious, we sort the health effects by fatalities, rather than the total of fatalities and injuries.
tHarm <- d[, .(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES), Totals = sum(FATALITIES) + sum(INJURIES)), by = .(EVTYPE)]
tHarm <- tHarm[order(-FATALITIES), ]
head(tHarm,5 )
## EVTYPE FATALITIES INJURIES Totals
## 1: TORNADO 5633 91346 96979
## 2: EXCESSIVE HEAT 1903 6525 8428
## 3: FLASH FLOOD 978 1777 2755
## 4: HEAT 937 2100 3037
## 5: LIGHTNING 816 5230 6046
tHarmInjuries <- tHarm[order(-INJURIES), ]
head(tHarmInjuries, 5)
## EVTYPE FATALITIES INJURIES Totals
## 1: TORNADO 5633 91346 96979
## 2: TSTM WIND 504 6957 7461
## 3: FLOOD 470 6789 7259
## 4: EXCESSIVE HEAT 1903 6525 8428
## 5: LIGHTNING 816 5230 6046
Across the United States, the following five types of events are most harmful with respect to population health as measured in human fatalities (from the highest to lowest order): Tornado, Excessive Heat, Flash Flood, Heat, and Lightning. The most harmful events in terms of human injuries are: Tornado, Thunderstorm Winds, Flood, Excessive Heat, and Lightning. The number of injuries caused by the events are much higher than the number of fatalities.
tDamage <- d[, .(PropDamage = sum(PropDamage), CropDamage = sum(CropDamage), Totals = sum(PropDamage) + sum(CropDamage)), by = .(EVTYPE)]
tDamage <- tDamage[order(-Totals)]
head(tDamage,10)
## EVTYPE PropDamage CropDamage Totals
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380677 414953270 57362333947
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: HAIL 15735267513 3025954473 18761221986
## 6: FLASH FLOOD 16822673979 1421317100 18243991079
## 7: DROUGHT 1046106000 13972566000 15018672000
## 8: HURRICANE 11868319010 2741910000 14610229010
## 9: RIVER FLOOD 5118945500 5029459000 10148404500
## 10: ICE STORM 3944927860 5022113500 8967041360
The following five types of events have the greatest economic consequences in terms of total damages: Thunderstorm winds, Flash flood, Hail, Tornado, and High Tides.
selHarm = 5
topHarm = tHarm[1:selHarm,]
We transform the data to make it easier to visualize (following the suggestion by Hadley Wickham).
harm <- melt(topHarm, id.vars='EVTYPE', variable.name='harm_type')
harm
## EVTYPE harm_type value
## 1: TORNADO FATALITIES 5633
## 2: EXCESSIVE HEAT FATALITIES 1903
## 3: FLASH FLOOD FATALITIES 978
## 4: HEAT FATALITIES 937
## 5: LIGHTNING FATALITIES 816
## 6: TORNADO INJURIES 91346
## 7: EXCESSIVE HEAT INJURIES 6525
## 8: FLASH FLOOD INJURIES 1777
## 9: HEAT INJURIES 2100
## 10: LIGHTNING INJURIES 5230
## 11: TORNADO Totals 96979
## 12: EXCESSIVE HEAT Totals 8428
## 13: FLASH FLOOD Totals 2755
## 14: HEAT Totals 3037
## 15: LIGHTNING Totals 6046
#Create chart
library(ggplot2)
harmChart <- ggplot(harm, aes(x=reorder(EVTYPE, -value), y=value/1000))
harmChart <- harmChart +
geom_bar(stat="identity", aes(fill=harm_type), position="dodge") +
ylab("Frequency (in thousands of events)") +
xlab("Weather Event Type") +
theme(axis.text.x = element_text(angle=30, hjust=1)) +
ggtitle("Top Harmful Weather Events") +
theme(plot.title = element_text(hjust = 0.5))
harmChart
selDamage = 5
topDamage = tDamage[1:selDamage,]
head(topDamage,5)
## EVTYPE PropDamage CropDamage Totals
## 1: FLOOD 144657709807 5661968450 150319678257
## 2: HURRICANE/TYPHOON 69305840000 2607872800 71913712800
## 3: TORNADO 56947380677 414953270 57362333947
## 4: STORM SURGE 43323536000 5000 43323541000
## 5: HAIL 15735267513 3025954473 18761221986
We transform the data to make it easier to visualize (following the suggestion by Hadley Wickham).
Damage <- melt(topDamage, id.vars='EVTYPE', variable.name='Damage_type')
Damage
## EVTYPE Damage_type value
## 1: FLOOD PropDamage 144657709807
## 2: HURRICANE/TYPHOON PropDamage 69305840000
## 3: TORNADO PropDamage 56947380677
## 4: STORM SURGE PropDamage 43323536000
## 5: HAIL PropDamage 15735267513
## 6: FLOOD CropDamage 5661968450
## 7: HURRICANE/TYPHOON CropDamage 2607872800
## 8: TORNADO CropDamage 414953270
## 9: STORM SURGE CropDamage 5000
## 10: HAIL CropDamage 3025954473
## 11: FLOOD Totals 150319678257
## 12: HURRICANE/TYPHOON Totals 71913712800
## 13: TORNADO Totals 57362333947
## 14: STORM SURGE Totals 43323541000
## 15: HAIL Totals 18761221986
# Create chart
DamageChart <- ggplot(Damage, aes(x=reorder(EVTYPE, -value), y=value/10^9))
DamageChart <- DamageChart +
geom_bar(stat="identity", aes(fill=Damage_type), position="dodge") +
ylab("Damage (in billion US dollars)") +
xlab("Event Type") +
theme(axis.text.x = element_text(angle=30, hjust=1)) +
ggtitle("Top Damaging Weather Events") +
theme(plot.title = element_text(hjust = 0.5))
DamageChart