Synopsis:

The NOAA Storm Database during the period 1950-2011 was analyzed to study the risk to population health(fatalites and injuries) and economic damage(property and crop).

The worst damaging event for human casualties is tornadoes, which estimated 5,644 fatalities and 91,346 injuries. The worst event for property damage is flood, which reaches $144.6 billion USD, while the worst crop damage reaches $13.9 billion USD by drought. Total damage of crop and property from flood has reached $150.3 billion USD.

Therefore, we conclude the TORNADOES is the most harmful with respect to population health, while the FLOOD have the greatest economic consequences.

Data Processing

The data for this analysis come in the form of a comma-separated-value file compressed via the bzip2 algorithm to reduce its size. We can download the file from the course web site:

S_data = read.csv(bzfile('repdata-data-StormData.csv.bz2'), header=T)

We will have all the code required for the work in our analysis. This analysis will be focusing on physical harm to people by and the economic consequences of, storms and severe weather in the United States. Initially the data will be trimmed to contain only the necessary variables.

S_data <- S_data[, c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
S_data <- S_data[complete.cases(S_data),]

population health(fatalites and injuries)

Fatalities <- aggregate(FATALITIES~EVTYPE, data=S_data, FUN=sum)
Fatalities <- Fatalities[order(-Fatalities$FATALITIES), , drop = FALSE]
Injuries <- aggregate(INJURIES~EVTYPE, data=S_data, FUN=sum)
Injuries <- Injuries[order(-Injuries$INJURIES), , drop = FALSE]

economic damage(property and crop damage)

S_data$PROPDMG[S_data$PROPDMGEXP == "K"] <- S_data$PROPDMG[S_data$PROPDMGEXP == "K"]*1000
S_data$PROPDMG[S_data$PROPDMGEXP == "M"] <- S_data$PROPDMG[S_data$PROPDMGEXP == "M"]*1000000
S_data$PROPDMG[S_data$PROPDMGEXP == "B"] <- S_data$PROPDMG[S_data$PROPDMGEXP == "B"]*1000000000
 

S_data$CROPDMG[S_data$CROPDMGEXP == "K"] <- S_data$CROPDMG[S_data$CROPDMGEXP == "K"]*1000
S_data$CROPDMG[S_data$CROPDMGEXP == "M"] <- S_data$CROPDMG[S_data$CROPDMGEXP == "M"]*1000000
S_data$CROPDMG[S_data$CROPDMGEXP == "B"] <- S_data$CROPDMG[S_data$CROPDMGEXP == "B"]*1000000000
                                                                                                                                      
S_data$TOTAL <- S_data$CROPDMG + S_data$PROPDMG

damage_total <- aggregate(TOTAL~EVTYPE, data=S_data,FUN = sum)
damage_total <- damage_total[order(-damage_total$TOTAL), ,drop=FALSE]

damage_prop <- aggregate(PROPDMG~EVTYPE, data=S_data,FUN = sum)
damage_prop <- damage_prop[order(-damage_prop$PROPDMG), ,drop=FALSE]

damage_crop <- aggregate(CROPDMG~EVTYPE, data=S_data,FUN = sum)
damage_crop <- damage_crop[order(-damage_crop$CROPDMG), ,drop=FALSE]

Results

In the first part, we will consider both Injuries and Fatalities. We will sum them up according to the EVTYPE, plot a Barplot of Top 10 Events to get the most damaging events with respect to population health.

In the second part, we will consider both Property and Crop damages. We will sum them up according to the EVTYPE, plot a Barplot of Top 10 Events to get the most damaging events with respect to economical impact.

population health(fatalites and injuries)

Top 5 fatalites and injuries

knitr::kable(head(Fatalities,5),align = 'c')
EVTYPE FATALITIES
834 TORNADO 5633
130 EXCESSIVE HEAT 1903
153 FLASH FLOOD 978
275 HEAT 937
464 LIGHTNING 816
knitr::kable(head(Injuries,5),align = 'c')
EVTYPE INJURIES
834 TORNADO 91346
856 TSTM WIND 6957
170 FLOOD 6789
130 EXCESSIVE HEAT 6525
464 LIGHTNING 5230

The top 5 most harmful severe weather events to population health in the United States (1950-2011) are represented in the figure below. As can be seen the most harmful events are tornadoes followed by excessive heat.

par( mfrow = c(1,2) )
#par(mai=c(2.5,0.82,1.0,0.42))
barplot(head(Fatalities$FATALITIES, 10), las=3, cex.names = 0.7, col = terrain.colors(10),names.arg=head(Fatalities$EVTYPE, 10), ylim=c(0,95000),main ="Fatalities")
barplot(head(Injuries$INJURIES, 10), las=3, cex.names = 0.7, col = terrain.colors(10),names.arg=head(Injuries$EVTYPE, 10),ylim=c(0,95000),main="Injuries")

economic damage(property and crop damage)

Top 5 property,crop and total damages

knitr::kable(head(damage_prop,5),align = 'c')
EVTYPE PROPDMG
170 FLOOD 144657709807
411 HURRICANE/TYPHOON 69305840000
834 TORNADO 56925660790
670 STORM SURGE 43323536000
153 FLASH FLOOD 16140812067
knitr::kable(head(damage_crop,5),align = 'c')
EVTYPE CROPDMG
95 DROUGHT 13972566000
170 FLOOD 5661968450
590 RIVER FLOOD 5029459000
427 ICE STORM 5022113500
244 HAIL 3025537890
knitr::kable(head(damage_total,5),align = 'c')
EVTYPE TOTAL
170 FLOOD 150319678257
411 HURRICANE/TYPHOON 71913712800
834 TORNADO 57340614060
670 STORM SURGE 43323541000
244 HAIL 18752904943

The top 10 most harmful severe weather events to economical damage in the United States (1950-2011) are represented in the figure below. As can be seen the most harmful events are FLOOD.

par(mfrow = c(1, 3))
barplot(sort(tapply(S_data$TOTAL, S_data$EVTYPE, sum), decreasing = TRUE)[1:10], 
        las = 2, cex.names = 0.7, col = terrain.colors(10), main = "Property and Crop Damage", 
        ylab = "$",ylim=c(0,150319678257))
barplot(sort(tapply(S_data$PROPDMG, S_data$EVTYPE, sum), decreasing = TRUE)[1:10], 
        las = 2, cex.names = 0.7, col = terrain.colors(10), main = "Property Damage", 
        ylab = "$",ylim=c(0,150319678257))
barplot(sort(tapply(S_data$CROPDMG, S_data$EVTYPE, sum), decreasing = TRUE)[1:10], 
        las = 2, cex.names = 0.7, col = terrain.colors(10), main = "Crop Damage", 
        ylab = "$",ylim=c(0,150319678257))