library(knitr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
library(ggplot2)
library(tidyr)

directory=getwd()

workspace=getwd()
workspace=sprintf("%s/.RData",workspace)

1.- Introduction

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

The purpose of this analysis is to prevent such negative outcomes to the extent possible.

The two questions to be answered are:

2.- Data Processing

2.1 Data Loading

2.1.- Code for reading in the dataset and/or processing the data.

dataset <- read.csv(bzfile("repdata_data_StormData.csv.bz2"))

head(dataset)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6

2.2.- The events in the database start in the year 1950 and end in November 2011. In the earlier years of the database there are generally fewer events recorded.

dataset$BGN_DATE <- as.Date(dataset$BGN_DATE, "%m/%d/%Y %H:%M:%S")

summary(dataset$BGN_DATE)
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "1950-01-03" "1995-04-20" "2002-03-18" "1998-12-27" "2007-07-28" "2011-11-30"

2.3.- Prepare data for question 1:

¿Most harmful event types to population health?

Group data by variable “EVTYPE” and calculate harmful by FATALITIES and by INJURIES.

healthdata <- dataset %>% 
  group_by(EVTYPE) %>% 
  summarise(FATALITIES = sum(FATALITIES), INJURIES = sum(INJURIES) ) %>% arrange(desc(FATALITIES+INJURIES)) %>% as.data.table()

mostHarm <- healthdata[1:10,]
print(mostHarm)
##                EVTYPE FATALITIES INJURIES
##  1:           TORNADO       5633    91346
##  2:    EXCESSIVE HEAT       1903     6525
##  3:         TSTM WIND        504     6957
##  4:             FLOOD        470     6789
##  5:         LIGHTNING        816     5230
##  6:              HEAT        937     2100
##  7:       FLASH FLOOD        978     1777
##  8:         ICE STORM         89     1975
##  9: THUNDERSTORM WIND        133     1488
## 10:      WINTER STORM        206     1321

2.4.- Prepare data for question 2:

¿Event types with the greatest economic consequences?

Property damage is indicated by two variables PROPDMG and PROPDMGEXP, same for crop damage. This step firstly converts magnitude characters in PROPDMGEXP and CROPDMGEXP to numeric values and multiplies the values with PROPDMG and CROPDMG respectively.

dataset1 <- dataset

dataset1$PROPDMGEXP <- recode(dataset1$PROPDMGEXP,'K'=1000,'M'=1000000,'B'=1000000000,.default=1)
dataset1$CROPDMGEXP <- recode(dataset1$CROPDMGEXP,'K'=1000,'M'=1000000,'B'=1000000000,.default=1)

dataset1$PROPDMGVALUE <- dataset1$PROPDMG*dataset1$PROPDMGEXP
dataset1$CROPDMGVALUE <- dataset1$CROPDMG*dataset1$CROPDMGEXP

The greatest economic consequences are evaluated by property and crop damage collectively. The top 10 event types with greatest economic consequences are shown below.

econdata <- dataset1 %>% 
  group_by(EVTYPE) %>% 
  summarise(PROPDMGVALUE = sum(PROPDMGVALUE), CROPDMGVALUE = sum(CROPDMGVALUE) ) %>%     arrange(desc(PROPDMGVALUE + CROPDMGVALUE)) %>% as.data.table()

mostEcon <- econdata[1:10,]
print(mostEcon)
##                EVTYPE PROPDMGVALUE CROPDMGVALUE
##  1:             FLOOD 144657709807   5661968450
##  2: HURRICANE/TYPHOON  69305840000   2607872800
##  3:           TORNADO  56925660790    414953270
##  4:       STORM SURGE  43323536000         5000
##  5:              HAIL  15727367053   3025537890
##  6:       FLASH FLOOD  16140812067   1421317100
##  7:           DROUGHT   1046106000  13972566000
##  8:         HURRICANE  11868319010   2741910000
##  9:       RIVER FLOOD   5118945500   5029459000
## 10:         ICE STORM   3944927860   5022113500

3.- Results.

3.1.- The harm of events to population health is evaluated by fatalities and injuries caused collectively. The top 10 most harmful event types are illustrated below.

plotdata <- gather(mostHarm, TYPE, VALUE, FATALITIES:INJURIES)

ggplot(plotdata, aes(x=reorder(EVTYPE,-VALUE), y=VALUE, fill=TYPE)) +
  geom_bar(stat = "identity") +
  labs(title = "Harmful Events to Population Health", x =" Event Type", y = "Count") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.0, size = 5.5))

The plot reveals that tornados are the event the most harmful for population health followed by excessive heat

3.2.- The greatest economic consequences are evaluated by property and crop damage collectively. The top 10 event types with greatest economic consequences are shown below.

plotdata2 <- gather(mostEcon, TYPE, VALUE, PROPDMGVALUE:CROPDMGVALUE)

ggplot(plotdata2, aes(x=reorder(EVTYPE,-VALUE), y=VALUE, 
                      fill=factor(TYPE, labels=c("crop damage", "property damage")))) +
  geom_bar(stat = "identity") +
  labs(title  ="Economically Harmful Events", x = "Event Type", y = "Count") +
  guides(fill = guide_legend(title = "Type of damage")) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.0, size = 5.5))

It can be seen that floods cause the greatest economic consequences