The Project

The basic goal is to explore the NOAA Storm Database in relation severe weather events and answer the following questions:

1.Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

2.Across the United States, which types of events have the greatest economic consequences?

Data Processing

The NOAA database was provided as StormData.csv.bz2, the file contains 902,297 observatiion and 37 variables.

library(readr)
## Warning: package 'readr' was built under R version 3.3.3
stormdata <- read.csv("StormData.csv.bz2")
# data dimensions
dim(stormdata)
## [1] 902297     37
# Variables Present
names(stormdata)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Only the following variables are of our interest for the present analysis:

    EVTYPE: Type of weather event.
    FATALITIES: Number of fatalities.
    INJURIES: Number of injuries.
    PROPDMG: Amount of property damage.
    PROPDMGEXP: Exponential of property damage.
    CROPDMG: Amount of crop damage.
    CROPDMGEXP:Exponential of crop damage.
    
library(dplyr)

# subsetting the database

data <- stormdata %>% select(EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP, 
                             CROPDMG, CROPDMGEXP)


# gettin exponents in numeric format

# for properties 

data$PROPDMGEXP <- as.character(data$PROPDMGEXP)
data$PROPDMGEXP[toupper(data$PROPDMGEXP) == 'H'] <- "1e2"
data$PROPDMGEXP[toupper(data$PROPDMGEXP) == 'K'] <- "1e3"
data$PROPDMGEXP[toupper(data$PROPDMGEXP) == 'M'] <- "1e6"
data$PROPDMGEXP[toupper(data$PROPDMGEXP) == 'B'] <- "1e9"
data$PROPDMGEXP <- as.numeric(data$PROPDMGEXP)
## Warning: NAs introduced by coercion
## for crops

data$CROPDMGEXP <- as.character(data$CROPDMGEXP)
data$CROPDMGEXP[toupper(data$CROPDMGEXP) == 'H'] <- "1e2"
data$CROPDMGEXP[toupper(data$CROPDMGEXP) == 'K'] <- "1e3"
data$CROPDMGEXP[toupper(data$CROPDMGEXP) == 'M'] <- "1e6"
data$CROPDMGEXP[toupper(data$CROPDMGEXP) == 'B'] <- "1e9"
data$CROPDMGEXP <- as.numeric(data$CROPDMGEXP)
## Warning: NAs introduced by coercion
# incorprating the exponent to the oroginal data and eliminatiing NA's

# for properties 
data$PROPDMGEXP[is.na(data$PROPDMGEXP)] <- 0
data$PROPDMG <- data$PROPDMG * data$PROPDMGEXP
## for crops
data$CROPDMGEXP[is.na(data$CROPDMGEXP)] <- 0
data$CROPDMG <- data$CROPDMG * data$CROPDMGEXP

Results

Events most harmful for population health

library(dplyr)

# Calculating Fatalities per Event

Fatalities<-group_by(data, EVENT_TYPE =EVTYPE) %>% 
        summarise( Total_Fatalities = sum(FATALITIES), 
                   Number_of_Events = n()) %>% arrange(desc(Total_Fatalities))
library(xtable)
xtf <- xtable(head(Fatalities, 20), aling ="c")

print(xtf, type = "html")
EVENT_TYPE Total_Fatalities Number_of_Events
1 TORNADO 5633.00 60652
2 EXCESSIVE HEAT 1903.00 1678
3 FLASH FLOOD 978.00 54277
4 HEAT 937.00 767
5 LIGHTNING 816.00 15754
6 TSTM WIND 504.00 219940
7 FLOOD 470.00 25326
8 RIP CURRENT 368.00 470
9 HIGH WIND 248.00 20212
10 AVALANCHE 224.00 386
11 WINTER STORM 206.00 11433
12 RIP CURRENTS 204.00 304
13 HEAT WAVE 172.00 74
14 EXTREME COLD 160.00 655
15 THUNDERSTORM WIND 133.00 82563
16 HEAVY SNOW 127.00 15708
17 EXTREME COLD/WIND CHILL 125.00 1002
18 STRONG WIND 103.00 3566
19 BLIZZARD 101.00 2719
20 HIGH SURF 101.00 725
# Calculating Injuries per Event

Injuries<-group_by(data, EVENT_TYPE =EVTYPE) %>% 
        summarise( Total_Injuries = sum(INJURIES), 
                   Number_of_Events = n()) %>% arrange(desc(Total_Injuries))

xti <- xtable(head(Injuries, 20), aling ="c")
print(xti, type = "html")
EVENT_TYPE Total_Injuries Number_of_Events
1 TORNADO 91346.00 60652
2 TSTM WIND 6957.00 219940
3 FLOOD 6789.00 25326
4 EXCESSIVE HEAT 6525.00 1678
5 LIGHTNING 5230.00 15754
6 HEAT 2100.00 767
7 ICE STORM 1975.00 2006
8 FLASH FLOOD 1777.00 54277
9 THUNDERSTORM WIND 1488.00 82563
10 HAIL 1361.00 288661
11 WINTER STORM 1321.00 11433
12 HURRICANE/TYPHOON 1275.00 88
13 HIGH WIND 1137.00 20212
14 HEAVY SNOW 1021.00 15708
15 WILDFIRE 911.00 2761
16 THUNDERSTORM WINDS 908.00 20843
17 BLIZZARD 805.00 2719
18 FOG 734.00 538
19 WILD/FOREST FIRE 545.00 1457
20 DUST STORM 440.00 427
library(ggplot2)

ggplot(data = Fatalities[1:20,], aes(EVENT_TYPE,Total_Fatalities, fill=EVENT_TYPE)) +
        geom_bar(stat = "identity") +
        theme(text = element_text(size=10),
              axis.text.x = element_text(angle=45, hjust=1), legend.position="none") 

ggplot(data = Injuries[1:20,],aes(EVENT_TYPE,Total_Injuries, fill=EVENT_TYPE)) +
        geom_bar(stat = "identity") +
        theme(text = element_text(size=10),
              axis.text.x = element_text(angle=45, hjust=1), legend.position="none") 

Events with the greatest economic consequences

$US Damages per event at properties

library(dplyr)
library(xtable)
# Calculation of total damages
Property<-group_by(data, EVENT_TYPE =EVTYPE) %>% 
        summarise( Total_Property_Damage = sum(PROPDMG), 
                   Number_of_Events = n()) %>% arrange(desc(Total_Property_Damage))
xtp<-xtable(head(Property, 20), aling ="c")
print(xtp, type="html")
EVENT_TYPE Total_Property_Damage Number_of_Events
1 FLOOD 144657709800.00 25326
2 HURRICANE/TYPHOON 69305840000.00 88
3 TORNADO 56937160991.00 60652
4 STORM SURGE 43323536000.00 261
5 FLASH FLOOD 16140812086.80 54277
6 HAIL 15732267370.00 288661
7 HURRICANE 11868319010.00 174
8 TROPICAL STORM 7703890550.00 690
9 WINTER STORM 6688497250.00 11433
10 HIGH WIND 5270046260.00 20212
11 RIVER FLOOD 5118945500.00 173
12 WILDFIRE 4765114000.00 2761
13 STORM SURGE/TIDE 4641188000.00 148
14 TSTM WIND 4484928440.00 219940
15 ICE STORM 3944927810.00 2006
16 THUNDERSTORM WIND 3483121164.00 82563
17 HURRICANE OPAL 3172846000.00 9
18 WILD/FOREST FIRE 3001829500.00 1457
19 HEAVY RAIN/SEVERE WEATHER 2500000000.00 2
20 THUNDERSTORM WINDS 1735955613.50 20843

$US Damages per event at crops

library(dplyr)
library(xtable)
# Calculation of total damages
Crops<-group_by(data, EVENT_TYPE =EVTYPE) %>% 
        summarise( Total_Crops_Damage = sum(CROPDMG), 
                   Number_of_Events = n()) %>% arrange(desc(Total_Crops_Damage))
xtc <- xtable(head(Crops, 20), aling ="c")
print(xtc, type="html")
EVENT_TYPE Total_Crops_Damage Number_of_Events
1 DROUGHT 13972566000.00 2488
2 FLOOD 5661968450.00 25326
3 RIVER FLOOD 5029459000.00 173
4 ICE STORM 5022113500.00 2006
5 HAIL 3025954450.00 288661
6 HURRICANE 2741910000.00 174
7 HURRICANE/TYPHOON 2607872800.00 88
8 FLASH FLOOD 1421317100.00 54277
9 EXTREME COLD 1292973000.00 655
10 FROST/FREEZE 1094086000.00 1342
11 HEAVY RAIN 733399800.00 11723
12 TROPICAL STORM 678346000.00 690
13 HIGH WIND 638571300.00 20212
14 TSTM WIND 554007350.00 219940
15 EXCESSIVE HEAT 492402000.00 1678
16 FREEZE 446225000.00 74
17 TORNADO 414953110.00 60652
18 THUNDERSTORM WIND 414843050.00 82563
19 HEAT 401461500.00 767
20 WILDFIRE 295472800.00 2761

Conclusions

Based on the tables and graphics shown previously we can conclude from the analysis of the NOAA Storm database that:

Tornados cause by far the most injuries and fatalies across the US, at these follow excesive heat and flooding as causes of death and injury by meteorological events.

Floods cause the most economic damage in the US to properties, nevertheless for crops the number one damager are droughts followed by floods as well.