Synopsis

This report consists in analyzing the NOAA storm database containing data on extreme climate events. This data was collected during the period from 1950 through 2011. The purpose of this analysis is to answer the following two questions:

  1. Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

For population health, fatalities and injuries were used as measurements. For economic consequences, property and crop damage were used as measurements."

Data Processing

Use the ggplot2 library and the gridExtra package (after installing both packages, if not already installed)

library(ggplot2)
require(gridExtra)
## Loading required package: gridExtra
## Loading required package: grid

Load the data (csv format) into R

wea <- read.csv("storm.csv")

Add a year column

wea$YEAR <- as.numeric(format(as.Date(wea$BGN_DATE, format = "%m/%d/%Y %H:%M:%S"), "%Y"))

Create histogram for Number of storms per year. Let’s use all the data from all the years here.

hist(wea$YEAR, xlab="Year", ylab="Number of Storms", main="Number of storms per year")

Population Health

Fatalities

Subset the data for fatalities per event type

fatal <- wea[,c("EVTYPE","FATALITIES")]

Aggregate the fatalities by event type, sum the fatalities by event type

fatal <- aggregate(FATALITIES ~ EVTYPE, data=fatal, FUN=sum)

Order the fatalities in descending order

fatal <- fatal[order(fatal$FATALITIES, decreasing=TRUE),]

Get the top five most dangerous fatalities

fatalsum <- fatal[1:5,]
fatalsum
##             EVTYPE FATALITIES
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816

Create plot the top five most dangerous fatalities

fplot <- ggplot(data = fatalsum, aes(x = fatalsum$EVTYPE, y = fatalsum$FATALITIES)) + 
     geom_bar(colour = "black", fill = "grey", stat = "identity") + xlab("Event Type") + 
     ylab("Number of Fatalities") + ggtitle("Total Number of Fatalities \n in the U.S. \n from 1950 - 2011") + 
     theme(axis.text.x = element_text(angle = 45, hjust = 1))

Injuries

Subset the data for injuries per event type

inj <- wea[,c("EVTYPE","INJURIES")]

Aggregate the injuries by event type, sum the injuries by event type

injury <- aggregate(INJURIES ~ EVTYPE, data = inj, FUN = sum)

Order the injuries in descending order

injury <- injury[order(injury$INJURIES, decreasing = TRUE), ]

Get the top five most dangerous injuries

injurysum <- injury[1:5, ]
injurysum
##             EVTYPE INJURIES
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230

Plot the top five most dangerous injuries

iplot <- ggplot(data = injurysum, aes(x = injurysum$EVTYPE, y = injurysum$INJURIES)) + 
    geom_bar(colour = "black", fill = "grey", stat = "identity") + xlab("Event Type") + 
    ylab("Number of Injuries") + ggtitle("Total Number of Injuries \n in the U.S. \n from 1950 - 2011") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Economic Consequences

Subset damage columns

dam <- wea[,c("EVTYPE","PROPDMG","PROPDMGEXP", "CROPDMG","CROPDMGEXP")]

Property Damage

Convert exponent to character for Property Damage Exponent column

dam$PROPDMGEXP <- as.character(dam$PROPDMGEXP)

Convert letters to numbers for Property Damage Exponent column

dam$PROPDMGEXP <- gsub("[Hh]","2",dam$PROPDMGEXP)
dam$PROPDMGEXP <- gsub("[Kk]","3",dam$PROPDMGEXP)
dam$PROPDMGEXP <- gsub("[Mm]","6",dam$PROPDMGEXP)
dam$PROPDMGEXP <- gsub("[Bb]","9",dam$PROPDMGEXP)
dam$PROPDMGEXP <- gsub("\\-|\\+|\\?", "0",dam$PROPDMGEXP)

Convert character to number for Property Damage Exponent column

dam$PROPDMGEXP <- as.numeric(dam$PROPDMGEXP)

Change missing values to zero for Property Damage Exponent column

dam$PROPDMGEXP[is.na(dam$PROPDMGEXP)] = 0

Add calculated Property column to the data

dam$PROP <- dam$PROPDMG*10^dam$PROPDMGEXP

Subset for property damage per event type

prop <- dam[,c("EVTYPE","PROP")]

Aggregate the property damage by event type, sum the property damage by event type

prop <- aggregate(PROP ~ EVTYPE, data = prop, FUN = sum)

Order the property damage in descending order

prop <- prop[order(prop$PROP, decreasing = TRUE), ]

Get the top five most affected property damage

propsum <- prop[1:5, ]
propsum
##                EVTYPE         PROP
## 170             FLOOD 144657709807
## 411 HURRICANE/TYPHOON  69305840000
## 834           TORNADO  56947380676
## 670       STORM SURGE  43323536000
## 153       FLASH FLOOD  16822673978

Plot the top five most affected property damage

pplot <- ggplot(data = propsum, aes(x = propsum$EVTYPE, y = propsum$PROP)) + 
    geom_bar(colour = "black", fill = "grey", stat = "identity") + xlab("Event Type") + 
    ylab("Instances of Property Damage") + ggtitle("Total Instances \n of Property Damage \n in the U.S. \n from 1950 - 2011") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Crop Damage

Convert exponent to character for Crop Damage Exponent column

dam$CROPDMGEXP <- as.character(dam$CROPDMGEXP)

Convert letters to numbers for Crop Damage Exponent column

dam$CROPDMGEXP <- gsub("[Hh]","2",dam$CROPDMGEXP)
dam$CROPDMGEXP <- gsub("[Kk]","3",dam$CROPDMGEXP)
dam$CROPDMGEXP <- gsub("[Mm]","6",dam$CROPDMGEXP)
dam$CROPDMGEXP <- gsub("[Bb]","9",dam$CROPDMGEXP)
dam$CROPDMGEXP <- gsub("\\-|\\+|\\?", "0",dam$CROPDMGEXP)

Convert character to number for Crop Damage Exponent column

dam$CROPDMGEXP <- as.numeric(dam$CROPDMGEXP)

Change missing values to zero for Crop Damage Exponent column

dam$CROPDMGEXP[is.na(dam$CROPDMGEXP)] = 0

Add calculated Crop column to the data

dam$CROP <- dam$CROPDMG*10^dam$CROPDMGEXP

Subset the data for crop damage per event type

crop <- dam[,c("EVTYPE","CROP")]

Aggregate the crop damage by event type, sum the crop damage by event type

crop <- aggregate(CROP ~ EVTYPE, data = crop, FUN = sum)

Order the crop damage in descending order

crop <- crop[order(crop$CROP, decreasing = TRUE), ]

Get the top five most affected property damage

cropsum <- crop[1:5, ]
cropsum
##          EVTYPE        CROP
## 95      DROUGHT 13972566000
## 170       FLOOD  5661968450
## 590 RIVER FLOOD  5029459000
## 427   ICE STORM  5022113500
## 244        HAIL  3025954473

Code for the plot of the top five most affected crop damage

cplot <- ggplot(data = cropsum, aes(x = cropsum$EVTYPE, y = cropsum$CROP)) + 
    geom_bar(colour = "black", fill = "grey", stat = "identity") + xlab("Event Type") + 
    ylab("Instances of Crop Damage") + ggtitle("Total Instances \n of Crop Damage \n in the U.S. \n from 1950 - 2011") + 
    theme(axis.text.x = element_text(angle = 45, hjust = 1))

Results

Question 1: Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

These graphs show the top five event types that led to the most fatalities and injuries over the years of 1950 to 2011 in the United States. Tornadoes led to the most fatalities as well as the most injuries.

Question 2: Across the United States, which types of events have the greatest economic consequences?

These graphs show the top five event types that led to the most property and crop damage over the years of 1950 to 2011 in the United States. Floods led to the most property damage while droughts led to the most crop damage.

Conclusion

Population Health

Tornadoes.

Economic Consequences

Floods and droughts.