Synopsis

The purpose of this analysis consists in identifying which types of storm events are most harmful with respect to population health and which types of storm events have the greatest economic consequences. The raw data was taken from the NOAA Storm Database. The codebook for the data was taken from the Investigative Reporters & Editors web site. The event types are described in NOAA web site. The following columns were used to address the answers for the two questions mentioned previously:

As the most recent years should be considered more complete, the analysis will cover only the year of 2011.

Data Processing

Loading the data

  • The data file was downloaded from the NOAA Storm Database.
  • The CSV file was extracted to the working directory.
  • The data were loaded into R:
Sys.setlocale("LC_ALL", "English")
## [1] "LC_COLLATE=English_United States.1252;LC_CTYPE=English_United States.1252;LC_MONETARY=English_United States.1252;LC_NUMERIC=C;LC_TIME=English_United States.1252"
storms_full <- read.csv("repdata-data-StormData.csv", stringsAsFactors=FALSE)

Cleaning the data

  • A new data frame was loaded only with the desired data:
storms_full$BGN_DATE <- as.Date(strptime(storms_full$BGN_DATE,"%m/%d/%Y %H:%M:%S"))
storms <- subset(storms_full, storms_full$BGN_DATE >= as.Date("01/01/2011 0:00:00", "%m/%d/%Y %H:%M:%S"))
storms <- subset(storms, select=c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP"))
  • Number of observations:
nrow(storms)
## [1] 62174
  • The event types were cleaned and replaced by the proper ones:
storms$EVTYPE <- toupper(storms$EVTYPE)
storms$EVTYPE <- gsub("HURRICANE", "HURRICANE (TYPHOON)", storms$EVTYPE)
storms$EVTYPE <- gsub("LANDSLIDE", "AVALANCHE", storms$EVTYPE)
  • NAs were replaced by zeros on FATALITIES and INJURIES columns:
storms$FATALITIES[is.na(storms$FATALITIES)] <- 0
storms$INJURIES[is.na(storms$INJURIES)] <- 0
  • Adding FATALITIES and INJURIES columns and saving in a separate column:
storms$HEALTH_TOTAL <- storms$FATALITIES + storms$INJURIES
  • Processing PROPDMG and PROPDMGEXP columns in a separate column:
PROPTOTAL <- vector()
for (i in 1:nrow(storms))  {
        total <- 0
        if(!is.na(storms[i, ]$PROPDMG)){
                factor <- 1
                if(!is.na(storms[i, ]$PROPDMGEXP)){
                        if(storms[i, ]$PROPDMGEXP == "H")    {
                                factor <- 100
                                }
                        else if(storms[i, ]$PROPDMGEXP == "K"){ 
                                factor <- 1000        
                                }
                        else if(storms[i, ]$PROPDMGEXP == "M"){
                                factor <- 1000000        
                                }
                        else if(storms[i, ]$PROPDMGEXP == "B"){
                                factor <- 1000000000
                                }
                        }
                   total <- factor * storms[i, ]$PROPDMG
                   }
        PROPTOTAL <- c(PROPTOTAL, total)
        }
storms$PROPTOTAL <- PROPTOTAL
  • Processing CROPDMG and CROPDMGEXP columns in a separate column:
CROPTOTAL <- vector()
for (i in 1:nrow(storms))  {
        total <- 0
        if(!is.na(storms[i, ]$CROPDMG)){
                factor <- 1
                if(!is.na(storms[i, ]$CROPDMGEXP)){
                        if(storms[i, ]$CROPDMGEXP == "H")    {
                                factor <- 100
                                }
                        else if(storms[i, ]$CROPDMGEXP == "K"){ 
                                factor <- 1000        
                                }
                        else if(storms[i, ]$CROPDMGEXP == "M"){
                                factor <- 1000000        
                                }
                        else if(storms[i, ]$CROPDMGEXP == "B"){
                                factor <- 1000000000
                                }
                        }
                   total <- factor * storms[i, ]$CROPDMG
                   }
        CROPTOTAL <- c(CROPTOTAL, total)
        }
storms$CROPTOTAL <- CROPTOTAL
  • Adding PROPTOTAL and CROPTOTAL columns and saving in a separate column:
storms$ECONOMIC_TOTAL <- storms$PROPTOTAL + storms$CROPTOTAL

Results

Which types of storm events are most harmful with respect to population health?

This result was obtained by aggregating the clean dataset by the columns EVTYPE and HEALTH_TOTAL and ordering it by the column HEALTH_TOTAL in a decreasing way.

storms_1 <- aggregate(HEALTH_TOTAL ~ EVTYPE, storms, sum)
storms_1 <- storms_1[order(-storms_1$HEALTH_TOTAL), ]
storms_1$EVTYPE
##  [1] "TORNADO"                  "HEAT"                    
##  [3] "THUNDERSTORM WIND"        "LIGHTNING"               
##  [5] "EXCESSIVE HEAT"           "WILDFIRE"                
##  [7] "FLASH FLOOD"              "FLOOD"                   
##  [9] "RIP CURRENT"              "STRONG WIND"             
## [11] "HAIL"                     "COLD/WIND CHILL"         
## [13] "HIGH SURF"                "AVALANCHE"               
## [15] "MARINE THUNDERSTORM WIND" "HIGH WIND"               
## [17] "DUST DEVIL"               "MARINE STRONG WIND"      
## [19] "TROPICAL STORM"           "DUST STORM"              
## [21] "EXTREME COLD/WIND CHILL"  "BLIZZARD"                
## [23] "COASTAL FLOOD"            "HEAVY RAIN"              
## [25] "WINTER WEATHER"           "TSUNAMI"                 
## [27] "WINTER STORM"             "ASTRONOMICAL LOW TIDE"   
## [29] "DENSE FOG"                "DENSE SMOKE"             
## [31] "DROUGHT"                  "FREEZING FOG"            
## [33] "FROST/FREEZE"             "FUNNEL CLOUD"            
## [35] "HEAVY SNOW"               "HURRICANE (TYPHOON)"     
## [37] "ICE STORM"                "LAKE-EFFECT SNOW"        
## [39] "LAKESHORE FLOOD"          "MARINE HAIL"             
## [41] "MARINE HIGH WIND"         "SEICHE"                  
## [43] "SLEET"                    "STORM SURGE/TIDE"        
## [45] "WATERSPOUT"

The top 5 types of storm events were shown as plot.

storms_1 <- head(storms_1, 5)
barplot(storms_1$HEALTH_TOTAL, main="Most harmful storm events (population health)", xlab="Type", ylab="Factor", names.arg=storms_1$EVTYPE, cex.names=0.6)

Which types of storm events have the greatest economic consequences?

This result was obtained by aggregating the clean dataset by the columns EVTYPE and ECONOMIC_TOTAL and ordering it by the column ECONOMIC_TOTAL in a decreasing way.

storms_2 <- aggregate(ECONOMIC_TOTAL ~ EVTYPE, storms, sum)
storms_2 <- storms_2[order(-storms_2$ECONOMIC_TOTAL), ]
storms_2$EVTYPE
##  [1] "TORNADO"                  "FLOOD"                   
##  [3] "FLASH FLOOD"              "WILDFIRE"                
##  [5] "HAIL"                     "THUNDERSTORM WIND"       
##  [7] "TROPICAL STORM"           "HIGH WIND"               
##  [9] "TSUNAMI"                  "LIGHTNING"               
## [11] "STORM SURGE/TIDE"         "HEAVY RAIN"              
## [13] "STRONG WIND"              "DROUGHT"                 
## [15] "COASTAL FLOOD"            "AVALANCHE"               
## [17] "HURRICANE (TYPHOON)"      "FROST/FREEZE"            
## [19] "WINTER STORM"             "HEAVY SNOW"              
## [21] "ICE STORM"                "LAKESHORE FLOOD"         
## [23] "EXTREME COLD/WIND CHILL"  "WATERSPOUT"              
## [25] "BLIZZARD"                 "WINTER WEATHER"          
## [27] "EXCESSIVE HEAT"           "MARINE HIGH WIND"        
## [29] "LAKE-EFFECT SNOW"         "DUST STORM"              
## [31] "MARINE STRONG WIND"       "HIGH SURF"               
## [33] "DENSE FOG"                "MARINE THUNDERSTORM WIND"
## [35] "COLD/WIND CHILL"          "FUNNEL CLOUD"            
## [37] "DUST DEVIL"               "ASTRONOMICAL LOW TIDE"   
## [39] "DENSE SMOKE"              "FREEZING FOG"            
## [41] "HEAT"                     "MARINE HAIL"             
## [43] "RIP CURRENT"              "SEICHE"                  
## [45] "SLEET"

The top 5 types of storm events were shown as plot.

storms_2 <- head(storms_2, 5)
storms_2$ECONOMIC_TOTAL <- storms_2$ECONOMIC_TOTAL / 1000000000
barplot(storms_2$ECONOMIC_TOTAL, main="Most harmful storm events (economics)", xlab="Type", ylab="Dollars (billions)", names.arg=storms_2$EVTYPE, cex.names=0.6)