Synopsis

The damage from severe weather events was based on the analysis of data available in the United States Oceanic and Atmospheric Administration (NOAA) storm database. For that, fatality, personal injury, property damage and damage to crops resulting from the events were considered as variables. From the interpretation of the graphs, tornado and heat showed the greatest health damage (fatalities and injury variables), being the most impactful for the population. In turn, thunderstorms, rains and storms presented the greatest damage to properties and crops, having the greatest economic consequences.

Data Processing

Setting the directory and data loading

setwd("~/Coursera_directory/RepData_project2")

if (!file.exists("repdata-data-StormData.csv.bz2")){
        URL<- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
        download.file(url=URL, destfile = "repdata-data-StormData.csv.bz2")
}
data<- read.csv("repdata-data-StormData.csv.bz2", header = T)
data[1:5,]
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5

Packages

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(reshape2)
library(ggplot2)
library(ggthemes)

Target variables

data<- mutate(data, year=year(mdy_hms(data$BGN_DATE)))

health_total <- aggregate(cbind(FATALITIES,INJURIES) ~ EVTYPE + year, data, sum)
economic_total <- aggregate(cbind(PROPDMG,CROPDMG) ~ EVTYPE + year, data, sum)

Variables of fatality and injury

health_total_arranged<- arrange(health_total, desc(FATALITIES), desc(INJURIES))
TopRank<-health_total_arranged[1:5,]
TopRank
##           EVTYPE year FATALITIES INJURIES
## 1           HEAT 1995        687      808
## 2        TORNADO 2011        587     6163
## 3        TORNADO 1953        519     5131
## 4 EXCESSIVE HEAT 1999        500     1461
## 5        TORNADO 1974        366     6824
health_total_arranged$EVTYPE<-gsub("EXCESSIVE HEAT", "HEAT", health_total_arranged$EVTYPE)

Unification of property and crop variables in a single column

economic_total_long <- melt(economic_total, id=c("year", "EVTYPE"))
economic_total_long[1:5,]
##   year  EVTYPE variable    value
## 1 1950 TORNADO  PROPDMG 16999.15
## 2 1951 TORNADO  PROPDMG 10560.99
## 3 1952 TORNADO  PROPDMG 16679.74
## 4 1953 TORNADO  PROPDMG 19182.20
## 5 1954 TORNADO  PROPDMG 23367.82

Reassigning the event names to enable the grouping of the events (EVTYPE) for comparison

Grouping each event by total sum

economic_total_plot <- aggregate(value~year+EVTYPE+variable, economic_total_long, sum)
economic_total_plot[1:5,]
##   year         EVTYPE variable    value
## 1 1950 COASTAL ISSUES  PROPDMG 16999.15
## 2 1951 COASTAL ISSUES  PROPDMG 10560.99
## 3 1952 COASTAL ISSUES  PROPDMG 16679.74
## 4 1953 COASTAL ISSUES  PROPDMG 19182.20
## 5 1954 COASTAL ISSUES  PROPDMG 23367.82

The economic damage variables were renamed

economic_total_plot$variable<-as.character(economic_total_plot$variable)
economic_total_plot$variable[grepl("PROPDMG", economic_total_plot$variable, ignore.case=T)]<-"PROPERTY DAMAGE"
economic_total_plot$variable[grepl("CROPDMG", economic_total_plot$variable, ignore.case=T)]<-"CROP DAMAGE"
names(economic_total_plot)[2]="Type of event"

Results

Types of harmful events to the population

g1<- ggplot(aes(x = year, y= FATALITIES), data=health_total_arranged[1:10,])
        g1+geom_col(color = "black",size = 1)+
        facet_grid(.~EVTYPE)+
        labs(y="Occurrences", x="Year", title= "Fatalities per event")+ 
        theme_igray()

g2<- ggplot(aes(x = year, y= INJURIES), data=health_total_arranged[1:10,])
        g2+geom_col(color = "black",size = 1)+
        facet_grid(.~EVTYPE)+
        labs(y="Occurrences", x="Year", title= "Injuries per event")+
        theme_igray()

Considering the variables fatalities and injuries, tornado and heat have shown the greatest occurrences. Thus, it can be concluded that tornado and heat presented the greatest damage to population among the types of events.

Types of events with greatest economic consequences

g3<- ggplot(data=economic_total_plot,  aes(x=year, y=value, color=`Type of event`))+
                geom_line(lwd=1)+
                facet_grid(.~variable)+
                labs(y="Occurrences", x="Year", title= "Economical damage per event")+
                theme_igray() 
g3

As noted, thunderstorms and rain and storms were the main causes of damage to property and crops. Thus, it can be concluded that storms have greater economic consequences.