In this analysis we try to figure out which Event type is more harmful with respect to population health and which type of Event has the greatest economic consequences. This analysis is made accross the USA to have a general overview of the situation

1.Data Processing

# Loading the needed packages
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0     v purrr   0.2.5
## v tibble  1.4.2     v dplyr   0.7.6
## v tidyr   0.8.1     v stringr 1.3.1
## v readr   1.1.1     v forcats 0.3.0
## -- Conflicts ------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
# Downloading and Reading the file 
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "stormData.csv.bz2")
# Reading and saving the file
stormData <- read_csv("stormData.csv.bz2")
## Parsed with column specification:
## cols(
##   .default = col_character(),
##   STATE__ = col_double(),
##   COUNTY = col_double(),
##   BGN_RANGE = col_double(),
##   COUNTY_END = col_double(),
##   END_RANGE = col_double(),
##   LENGTH = col_double(),
##   WIDTH = col_double(),
##   F = col_integer(),
##   MAG = col_double(),
##   FATALITIES = col_double(),
##   INJURIES = col_double(),
##   PROPDMG = col_double(),
##   CROPDMG = col_double(),
##   LATITUDE = col_double(),
##   LONGITUDE = col_double(),
##   LATITUDE_E = col_double(),
##   LONGITUDE_ = col_double(),
##   REFNUM = col_double()
## )
## See spec(...) for full column specifications.
# Summary to have an overview
summary(stormData)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI       
##  Min.   :   0.000   Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character  
##  Mean   :   1.484                                        
##  3rd Qu.:   1.000                                        
##  Max.   :3749.000                                        
##                                                          
##    END_DATE           END_TIME           COUNTY_END  COUNTYENDN       
##  Length:902297      Length:902297      Min.   :0    Length:902297     
##  Class :character   Class :character   1st Qu.:0    Class :character  
##  Mode  :character   Mode  :character   Median :0    Mode  :character  
##                                        Mean   :0                      
##                                        3rd Qu.:0                      
##                                        Max.   :0                      
##                                                                       
##    END_RANGE          END_AZI           END_LOCATI       
##  Min.   :  0.0000   Length:902297      Length:902297     
##  1st Qu.:  0.0000   Class :character   Class :character  
##  Median :  0.0000   Mode  :character   Mode  :character  
##  Mean   :  0.9862                                        
##  3rd Qu.:  0.0000                                        
##  Max.   :925.0000                                        
##                                                          
##      LENGTH              WIDTH                F               MAG         
##  Min.   :   0.0000   Min.   :   0.000   Min.   :0.0      Min.   :    0.0  
##  1st Qu.:   0.0000   1st Qu.:   0.000   1st Qu.:0.0      1st Qu.:    0.0  
##  Median :   0.0000   Median :   0.000   Median :1.0      Median :   50.0  
##  Mean   :   0.2301   Mean   :   7.503   Mean   :0.9      Mean   :   46.9  
##  3rd Qu.:   0.0000   3rd Qu.:   0.000   3rd Qu.:1.0      3rd Qu.:   75.0  
##  Max.   :2315.0000   Max.   :4400.000   Max.   :5.0      Max.   :22000.0  
##                                         NA's   :843563                    
##    FATALITIES          INJURIES            PROPDMG       
##  Min.   :  0.0000   Min.   :   0.0000   Min.   :   0.00  
##  1st Qu.:  0.0000   1st Qu.:   0.0000   1st Qu.:   0.00  
##  Median :  0.0000   Median :   0.0000   Median :   0.00  
##  Mean   :  0.0168   Mean   :   0.1557   Mean   :  12.06  
##  3rd Qu.:  0.0000   3rd Qu.:   0.0000   3rd Qu.:   0.50  
##  Max.   :583.0000   Max.   :1700.0000   Max.   :5000.00  
##                                                          
##   PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Length:902297      Min.   :  0.000   Length:902297     
##  Class :character   1st Qu.:  0.000   Class :character  
##  Mode  :character   Median :  0.000   Mode  :character  
##                     Mean   :  1.527                     
##                     3rd Qu.:  0.000                     
##                     Max.   :990.000                     
##                                                         
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 
stormDataProcess <- stormData
# Change BGN_DATE and END_DATE from chr to date type

stormDataProcess$BGN_DATE <- parse_date_time(stormDataProcess$BGN_DATE, orders = "mdy HMS")
stormDataProcess$END_DATE <- parse_date_time(stormDataProcess$END_DATE, orders = "mdy HMS")
stormDataProcess$STATE <- as.factor(stormDataProcess$STATE)
stormDataProcess$EVTYPE <- as.factor(stormDataProcess$EVTYPE)
# Turning all damage into number  

stormDataProcess <- stormDataProcess %>%
        mutate(PropDamageExp = case_when(PROPDMGEXP == "K"  ~ 1000,
                                         PROPDMGEXP == "M" | PROPDMGEXP == "m" ~ 1000000,
                                         PROPDMGEXP == "B" ~ 1000000000,
                                         PROPDMGEXP == "h" | PROPDMGEXP == "H" ~ 100),
               propDamage = PROPDMG * as.double(PropDamageExp),
               cropDamageExp = case_when(CROPDMGEXP == "K" ~ 1000,
                                         CROPDMGEXP == "M" | CROPDMGEXP == "m" ~  1000000,
                                         CROPDMGEXP == "B" ~ 1000000000,
                                         CROPDMGEXP == "h" | CROPDMGEXP =="H" ~ 100),
               cropDamage = CROPDMG * as.double(cropDamageExp)) %>%
        select(EVTYPE, STATE,FATALITIES,INJURIES, propDamage, cropDamage)
  1. Finding all the fatalities and injuries in the USA
# Top 5 fatalities by EVENT TYPE
(stormDataProcess_evtype <- stormDataProcess %>%
        group_by(EVTYPE) %>%
        summarize(totalFatalities = sum(FATALITIES), totalInjuries = sum(INJURIES)) %>%
        arrange(desc(totalFatalities)) %>%
        head(5))
## # A tibble: 5 x 3
##   EVTYPE         totalFatalities totalInjuries
##   <fct>                    <dbl>         <dbl>
## 1 TORNADO                   5633         91346
## 2 EXCESSIVE HEAT            1903          6525
## 3 FLASH FLOOD                978          1777
## 4 HEAT                       937          2100
## 5 LIGHTNING                  816          5230
 # Top 10 fatalities by EVENT TYPE for each state
(stormDataProcess_evtype_state <- stormDataProcess %>%
        group_by(EVTYPE, STATE) %>%
        summarize(totalFatalities = sum(FATALITIES), totalInjuries = sum(INJURIES)) %>%
        arrange(desc(totalFatalities)) %>%
        head(10))
## # A tibble: 10 x 4
## # Groups:   EVTYPE [3]
##    EVTYPE         STATE totalFatalities totalInjuries
##    <fct>          <fct>           <dbl>         <dbl>
##  1 HEAT           IL                653           241
##  2 TORNADO        AL                617          7929
##  3 TORNADO        TX                538          8207
##  4 TORNADO        MS                450          6244
##  5 TORNADO        MO                388          4330
##  6 TORNADO        AR                379          5116
##  7 TORNADO        TN                368          4748
##  8 EXCESSIVE HEAT PA                359           320
##  9 EXCESSIVE HEAT IL                330           352
## 10 TORNADO        OK                296          4829
# Injuries by EVENT TYPE for each state 
stormDataProcess_evtype_state_inj <- stormDataProcess_evtype_state %>%
        arrange(desc(totalInjuries))
# Injuries by Event TYPE
stormDataProcess_evtype_inj <- stormDataProcess_evtype %>%
        arrange(desc(totalInjuries))
  1. Finding all the economics damages
# Damages per events
stormDataProcess_dmg <- stormDataProcess %>%
        group_by(EVTYPE) %>%
        summarize(totalDamage = sum(propDamage, na.rm = TRUE) + sum(cropDamage, na.rm = TRUE)) %>%
        arrange(desc(totalDamage)) %>%
        head(5)
  1. Results 4.1 Most harmful event
# Plotting fatalities
ggplot(stormDataProcess_evtype, aes(x = reorder(EVTYPE,-totalFatalities), y = totalFatalities, fill = EVTYPE)) +
        geom_col() +
        labs(title = "Total fatalities by Event type in USA",
              x = "Event Type",
              y = "Total Fatalities") +
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 60, hjust = 1))

# fatalities by state

ggplot(stormDataProcess_evtype_state, aes(x = reorder(STATE,-totalFatalities),
                                          y = totalFatalities,fill = STATE)) +
        geom_col() +
        facet_grid(.~ EVTYPE) +
        labs(title = "Total fatalities by top 10 Events type in USA",
              x = "State",
              y = "Total Fatalities") +
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 60, hjust = 1))

# Injuries in USA

ggplot(stormDataProcess_evtype_state_inj, aes(x = reorder(STATE,-totalInjuries),
                                              y = totalInjuries,fill = STATE)) +
        geom_col() +
        facet_grid(.~ EVTYPE) +
        labs(title = "Total Injuries by top 10 Events type in USA",
              x = "State",
              y = "Total Injuries") +
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 60, hjust = 1))

# Injuries by state
ggplot(stormDataProcess_evtype_inj, aes(x = reorder(EVTYPE,-totalInjuries),
                                        y = totalInjuries,fill = EVTYPE)) +
        geom_col() +
        labs(title = "Total Injuries by top 5 Events type in USA",
              x = "Events",
              y = "Total Injuries") +
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 60, hjust = 1))

Looking at these plots we can say that in the USA the most harmful event is the TORNADO with a total of 5633 fatalities and 91.346 thousand injuries over the years. But looking at each state separately we can see that in Illinois the most harmful event is the HEAT.

4.2 Event with the greatest economic consequences

        ggplot(stormDataProcess_dmg, aes(x = reorder(EVTYPE, -totalDamage), y = totalDamage, fill = EVTYPE)) +
        geom_col() +
        labs(title = "Total Damages by Weather Events in USA",
             x = "Event Type",
             y = "Total Damages in $") +
        theme(legend.position = "none",
              axis.text.x = element_text(angle = 60, hjust = 1))

We can see with this plot that the event with greatest economic consequences is the FLOOD with $150.3196783 billions over the years.