Synopsis
This report intends to demonstrate the effects of meteorological phenomena on the American population as storms and other severe climatic events. These phenomena can cause public and economic health problems for communities and municipalities. To perform this work, we will explore the US National Oceanic and Atmospheric Administration (NOAA) storm database. This database tracks the characteristics of major storms and climatic events in the United States and has information from the year 1950 to 2011. More recent years should be considered more complete. This information includes when and where these events occur, as well as estimates of any fatalities, injuries and property damage. This report aims to answer some questions such as: What types of events are most harmful to the health of the population and which types of events produce the greatest economic consequences.

Loading and Processing the Raw Data

require(dplyr)
## Loading required package: dplyr
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggplot2)
## Loading required package: ggplot2
library(tidyr)

the dataset was obtained from the address below

fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"

download.file(fileUrl, destfile = paste0(getwd(), "/repdata%2Fdata%2FStormData.csv.bz2"), method = "curl")

stormdata <- read.csv(file="repdata%2Fdata%2FStormData.csv.bz2", header=TRUE, sep=",")

dim(stormdata)
## [1] 902297     37
head(stormdata)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

We will also convert the units to damage estimates that are in the form of letters for numerical multipliers.

stormdata$PROPDMGEXP <- gsub("[^HhKkMmBb]", "1", stormdata$PROPDMGEXP)
    stormdata$PROPDMGEXP <- gsub("[Hh]", "100", stormdata$PROPDMGEXP)
    stormdata$PROPDMGEXP <- gsub("[Kk]", "1000", stormdata$PROPDMGEXP)
    stormdata$PROPDMGEXP <- gsub("[Mm]", "1000000", stormdata$PROPDMGEXP)
    stormdata$PROPDMGEXP <- gsub("[Bb]", "1000000000", stormdata$PROPDMGEXP)
    stormdata$PROPDMGEXP[stormdata$PROPDMGEXP == ""] <- "1"
    stormdata$PROPDMGEXP <- as.numeric(stormdata$PROPDMGEXP)

    stormdata$CROPDMGEXP <- gsub("[^HhKkMmBb]", "1", stormdata$CROPDMGEXP)
    stormdata$CROPDMGEXP <- gsub("[Hh]", "100", stormdata$CROPDMGEXP)
    stormdata$CROPDMGEXP <- gsub("[Kk]", "1000", stormdata$CROPDMGEXP)
    stormdata$CROPDMGEXP <- gsub("[Mm]", "1000000", stormdata$CROPDMGEXP)
    stormdata$CROPDMGEXP <- gsub("[Bb]", "1000000000", stormdata$CROPDMGEXP)
    stormdata$CROPDMGEXP[stormdata$CROPDMGEXP == ""] <- "1"
    stormdata$CROPDMGEXP <- as.numeric(stormdata$CROPDMGEXP)

We will select variables that relate to events that are detrimental to population health

library(dplyr, warn.conflicts = FALSE, quietly=TRUE)
library(reshape2)
## 
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
## 
##     smiths
health <- stormdata %>% 
          select(EVTYPE, FATALITIES, INJURIES) %>% 
          group_by(EVTYPE) %>% 
          summarize(EVENTS = n(), FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES)) %>%
          mutate(TOTINJ = FATALITIES + INJURIES) %>%
          arrange(desc(TOTINJ))

we will select only 5 events for better visualization

Red_Health <- health[1:5,]
Red_Health$EVTYPE <- as.character(Red_Health$EVTYPE)
Red_Health$EVTYPE <- factor(Red_Health$EVTYPE, levels=unique(Red_Health$EVTYPE))
Red_Health <- melt(Red_Health, id = c("EVTYPE", "EVENTS"), measure.vars = c("FATALITIES","INJURIES"))

Events that have the greatest economic consequences

economy <- stormdata %>% 
          mutate(PROPDMG = PROPDMG * PROPDMGEXP / 1e9, CROPDMG = CROPDMG * CROPDMGEXP / 1e9) %>%
          group_by(EVTYPE) %>% 
          summarize(EVENTS = n(), PROPDMG = sum(PROPDMG), CROPDMG = sum(CROPDMG)) %>% 
          mutate(TOTDMG = PROPDMG + CROPDMG) %>%
          arrange(desc(TOTDMG))

Red_economy <- economy[1:5,]
Red_economy$EVTYPE <- as.character(Red_economy$EVTYPE)
Red_economy$EVTYPE <- factor(Red_economy$EVTYPE, levels=unique(Red_economy$EVTYPE))
Red_economy <- melt(Red_economy, id = c("EVTYPE", "EVENTS"), measure.vars = c("PROPDMG","CROPDMG"))

Results

he graph below shows that the Tornado is the storm-related event most damaging to people’s health

ggplot(data=Red_Health, aes(x=EVTYPE, y=value, fill=variable)) + 
        geom_bar(stat="Identity") +
        ggtitle("Events with Largest Health Impact") +
        xlab("Event Type") +
        ylab("Injuries & Fatalities") +
        theme(legend.position=c(0.9,0.8), legend.title=element_blank())

The chart below shows that Flood and Hurricanes were identified as the most economically productive events in the population

ggplot(data=Red_economy, aes(x=EVTYPE,y=value, fill=variable)) + 
        geom_bar(stat="Identity") +
        ggtitle("Events with Largest Economic Impact") +
        xlab("Event Type") +
        ylab("Total Cost ($Billion USD)") +
        theme(legend.position=c(0.9,0.8), legend.title=element_blank()) +
        scale_fill_discrete(labels=c("Property Damage","Crop Damage"))