Statistical analysis of Storm Events Database (1950 – 2024)

The database to be analysed currently contains data from January 1950 to December 2024, as entered by NOAA’s National Weather Service (NWS). Bulk data are available in comma-separated files (CSV). In the following, data will be analysed addressing two research questions:

  1. Across the United States, which types of events are most harmful with respect to population health?
  2. Across the United States, which types of events have the greatest economic consequences?

In the earlier years of the database there are generally fewer events recorded, most likely due to a lack of good records. More recent years can be considered more complete.

Data Processing

Data were loaded from the NOAA website via the provided link. They were then processed by use of RStudio.

# Load libraries

  library(knitr)
## Warning: Paket 'knitr' wurde unter R Version 4.4.3 erstellt
  library(dplyr)
## 
## Attache Paket: 'dplyr'
## Die folgenden Objekte sind maskiert von 'package:stats':
## 
##     filter, lag
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     intersect, setdiff, setequal, union
  library(ggplot2)
  library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ✔ readr     2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Read dataset
    
  StormData <- read_csv("Reproducible Research/repdata_data_StormData.csv")
## Rows: 902297 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (18): BGN_DATE, BGN_TIME, TIME_ZONE, COUNTYNAME, STATE, EVTYPE, BGN_AZI,...
## dbl (18): STATE__, COUNTY, BGN_RANGE, COUNTY_END, END_RANGE, LENGTH, WIDTH, ...
## lgl  (1): COUNTYENDN
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Get data overview
  
  summary(StormData)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI          END_DATE        
##  Min.   :   0.000   Length:902297      Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :   1.484                                                           
##  3rd Qu.:   1.000                                                           
##  Max.   :3749.000                                                           
##                                                                             
##    END_TIME           COUNTY_END COUNTYENDN       END_RANGE       
##  Length:902297      Min.   :0    Mode:logical   Min.   :  0.0000  
##  Class :character   1st Qu.:0    NA's:902297    1st Qu.:  0.0000  
##  Mode  :character   Median :0                   Median :  0.0000  
##                     Mean   :0                   Mean   :  0.9862  
##                     3rd Qu.:0                   3rd Qu.:  0.0000  
##                     Max.   :0                   Max.   :925.0000  
##                                                                   
##    END_AZI           END_LOCATI            LENGTH              WIDTH         
##  Length:902297      Length:902297      Min.   :   0.0000   Min.   :   0.000  
##  Class :character   Class :character   1st Qu.:   0.0000   1st Qu.:   0.000  
##  Mode  :character   Mode  :character   Median :   0.0000   Median :   0.000  
##                                        Mean   :   0.2301   Mean   :   7.503  
##                                        3rd Qu.:   0.0000   3rd Qu.:   0.000  
##                                        Max.   :2315.0000   Max.   :4400.000  
##                                                                              
##        F               MAG            FATALITIES          INJURIES        
##  Min.   :0.0      Min.   :    0.0   Min.   :  0.0000   Min.   :   0.0000  
##  1st Qu.:0.0      1st Qu.:    0.0   1st Qu.:  0.0000   1st Qu.:   0.0000  
##  Median :1.0      Median :   50.0   Median :  0.0000   Median :   0.0000  
##  Mean   :0.9      Mean   :   46.9   Mean   :  0.0168   Mean   :   0.1557  
##  3rd Qu.:1.0      3rd Qu.:   75.0   3rd Qu.:  0.0000   3rd Qu.:   0.0000  
##  Max.   :5.0      Max.   :22000.0   Max.   :583.0000   Max.   :1700.0000  
##  NA's   :843563                                                           
##     PROPDMG         PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Min.   :   0.00   Length:902297      Min.   :  0.000   Length:902297     
##  1st Qu.:   0.00   Class :character   1st Qu.:  0.000   Class :character  
##  Median :   0.00   Mode  :character   Median :  0.000   Mode  :character  
##  Mean   :  12.06                      Mean   :  1.527                     
##  3rd Qu.:   0.50                      3rd Qu.:  0.000                     
##  Max.   :5000.00                      Max.   :990.000                     
##                                                                           
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 
# Transform to factor
  
    StormData$EVTYPE <- as.factor(StormData$EVTYPE)

Results

Impact of storm events on population health

First, consequences of storm events for population health are analysed. The following two plots illustrate the top ten events and the number of health impacts they cause. Concrete values can be inferred from the respective tables.

# Impact on population health
  
  # Events causing the most injuries
  
   injuries <-  StormData %>%
      group_by(EVTYPE) %>%   
      summarise(INJURIES = sum(INJURIES, na.rm = T)) %>%
      arrange(desc(INJURIES))
    
  #Barplot illustrating Events causing the most Injuries

   x <- injuries$EVTYPE
   y <- injuries$INJURIES[1:10]
   
   ylog <- log(y)
   
   barplot(ylog, names.arg = x[1:10], 
           main = "Sum of Injuries per Event", 
           xlab = "", ylab = "Number of Events (log)",
           cex.names = 0.8, 
           las = 2,
           col = "lightgreen")

  # Events causing the most fatalities

  fatalities <-  StormData %>%
      group_by(EVTYPE) %>%   
      summarise(FALTALITIES = sum(FATALITIES, na.rm = T)) %>%
      arrange(desc(FALTALITIES))
   
  # Bar plot illustrating most fatalities per event
   
    a <- fatalities$EVTYPE
    b <- fatalities$FALTALITIES[1:10]
    
    barplot(b, names.arg = a[1:10], 
            main = "Sum of Fatalities per Event", 
            xlab = "", ylab = "Number of Events",
            cex.names = 0.8, 
            las = 2,
            col = "orange")


Impact of storm events on economy

In a second step, consequences of storm events for the economy are reported. Thereby, property and crop damages are considered. Results can be found in the table.

  # Transform variables indicating property and crop damage

    StormData_new <- StormData %>%
      mutate(PROPDMG2 = case_when(
        PROPDMGEXP == "K" ~ PROPDMG * 1000,
        PROPDMGEXP == "M" ~ PROPDMG * 1000000,
        PROPDMGEXP == "B" ~ PROPDMG * 1000000000,
        TRUE ~ PROPDMG)) %>%
      mutate(CROPDMG2 = case_when(
        CROPDMGEXP == "K" ~ CROPDMG * 1000,
        CROPDMGEXP == "M" ~ CROPDMG * 1000000,
        PROPDMGEXP == "B" ~ PROPDMG * 1000000000,
        TRUE ~ CROPDMG))

  # Calculate mean value for economic damage
  
    StormData_new <- StormData_new %>%
        mutate(DMG = PROPDMG2 + CROPDMG2)  
    
  # Aggregate data per event type
    
    events <- aggregate(DMG ~ EVTYPE, data = StormData_new, FUN = sum, na.rm = TRUE)
        events$PROPDMG <- aggregate(PROPDMG2 ~ EVTYPE, data = StormData_new, FUN = sum, na.rm = TRUE)
        events$CROPDMG <- aggregate(CROPDMG2 ~ EVTYPE, data = StormData_new, FUN = sum, na.rm = TRUE)
      
  # Create table with top 10 damages

    table_events <- cbind(events$EVTYPE, events$DMG, events$PROPDMG, events$CROPDMG)
        colnames(table_events) <- c("Events", "Total_Damage", "Events2", "Property", "Events3", "Crop")
  
    table_events <- table_events %>%
          arrange(desc(Total_Damage))
        
    top_10_eco <- table_events[1:10,]

    top_10_eco %>%
      select(Events, Total_Damage, Property, Crop) %>%
      kable()
Events Total_Damage Property Crop
FLOOD 153319678257 144657709807 8661968450
HURRICANE/TYPHOON 117653712800 69305840000 48347872800
STORM SURGE 85883541000 43323536000 42560005000
TORNADO 57340614060 56925660790 414953270
HAIL 18752904943 15727367053 3025537890
FLASH FLOOD 17562179167 16140862067 1421317100
HURRICANE 14610229010 11868319010 2741910000
TROPICAL STORM 13532236550 7703890550 5828346000
DROUGHT 13518672002 1046106000 12472566002
WINTER STORM 11715441251 6688497251 5026944000