Summary of Analysis

This analysis used the U.S. National Oceanic and Atmospheric Administration (NOAA) Storm Database to examine the effects of severe weather events on population health and the economy.The impact on the Population health was measured as the combined total of injuries and fatalities across all recorded storm events. The analysis shows that Tornadoes are the most harmful event type to population health, causing the highest number of injuries and fatalities. A histogram and line plot of the EVTYPE variable reveal that storm events are unevenly distributed, with a small number of event types occurring very frequently. Economic consequences were measured using the combined property and crop damage values. Flood were found to have the greatest economic impact, resulting in the highest total economic losses among all event types. At the county level, Cook County suffered the highest combined number of injuries and fatalities, indicating significant vulnerability to severe weather events.

Downloading the dataset

Creating the directory

# Create directory if it does not exist
if (!dir.exists("rpubassigndata")) {
  dir.create("rpubassigndata")
}

Define URL and destination file

url <- "http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
destfile <- "rpubassigndata/StormData.csv.bz2"

Downloading the data if it is not already present

if (!file.exists(destfile)) {
  download.file(url, destfile, mode = "wb")
}

Loading the Data

stormdata <- read.csv("rpubassigndata/StormData.csv.bz2")

Data Preprocessing

Inspecting the data

head(stormdata)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE  EVTYPE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL TORNADO
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL TORNADO
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL TORNADO
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL TORNADO
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL TORNADO
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL TORNADO
##   BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END COUNTYENDN
## 1         0                                               0         NA
## 2         0                                               0         NA
## 3         0                                               0         NA
## 4         0                                               0         NA
## 5         0                                               0         NA
## 6         0                                               0         NA
##   END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES INJURIES PROPDMG
## 1         0                      14.0   100 3   0          0       15    25.0
## 2         0                       2.0   150 2   0          0        0     2.5
## 3         0                       0.1   123 2   0          0        2    25.0
## 4         0                       0.0   100 2   0          0        2     2.5
## 5         0                       0.0   150 2   0          0        2     2.5
## 6         0                       1.5   177 2   0          0        6     2.5
##   PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES LATITUDE LONGITUDE
## 1          K       0                                         3040      8812
## 2          K       0                                         3042      8755
## 3          K       0                                         3340      8742
## 4          K       0                                         3458      8626
## 5          K       0                                         3412      8642
## 6          K       0                                         3450      8748
##   LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1       3051       8806              1
## 2          0          0              2
## 3          0          0              3
## 4          0          0              4
## 5          0          0              5
## 6          0          0              6
str(stormdata)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : chr  "4/18/1950 0:00:00" "4/18/1950 0:00:00" "2/20/1951 0:00:00" "6/8/1951 0:00:00" ...
##  $ BGN_TIME  : chr  "0130" "0145" "1600" "0900" ...
##  $ TIME_ZONE : chr  "CST" "CST" "CST" "CST" ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: chr  "MOBILE" "BALDWIN" "FAYETTE" "MADISON" ...
##  $ STATE     : chr  "AL" "AL" "AL" "AL" ...
##  $ EVTYPE    : chr  "TORNADO" "TORNADO" "TORNADO" "TORNADO" ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : chr  "" "" "" "" ...
##  $ BGN_LOCATI: chr  "" "" "" "" ...
##  $ END_DATE  : chr  "" "" "" "" ...
##  $ END_TIME  : chr  "" "" "" "" ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : chr  "" "" "" "" ...
##  $ END_LOCATI: chr  "" "" "" "" ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: chr  "K" "K" "K" "K" ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: chr  "" "" "" "" ...
##  $ WFO       : chr  "" "" "" "" ...
##  $ STATEOFFIC: chr  "" "" "" "" ...
##  $ ZONENAMES : chr  "" "" "" "" ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : chr  "" "" "" "" ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

Summary of data

summary(stormdata)
##     STATE__       BGN_DATE           BGN_TIME          TIME_ZONE        
##  Min.   : 1.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.:19.0   Class :character   Class :character   Class :character  
##  Median :30.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :31.2                                                           
##  3rd Qu.:45.0                                                           
##  Max.   :95.0                                                           
##                                                                         
##      COUNTY       COUNTYNAME           STATE              EVTYPE         
##  Min.   :  0.0   Length:902297      Length:902297      Length:902297     
##  1st Qu.: 31.0   Class :character   Class :character   Class :character  
##  Median : 75.0   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100.6                                                           
##  3rd Qu.:131.0                                                           
##  Max.   :873.0                                                           
##                                                                          
##    BGN_RANGE          BGN_AZI           BGN_LOCATI          END_DATE        
##  Min.   :   0.000   Length:902297      Length:902297      Length:902297     
##  1st Qu.:   0.000   Class :character   Class :character   Class :character  
##  Median :   0.000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :   1.484                                                           
##  3rd Qu.:   1.000                                                           
##  Max.   :3749.000                                                           
##                                                                             
##    END_TIME           COUNTY_END COUNTYENDN       END_RANGE       
##  Length:902297      Min.   :0    Mode:logical   Min.   :  0.0000  
##  Class :character   1st Qu.:0    NA's:902297    1st Qu.:  0.0000  
##  Mode  :character   Median :0                   Median :  0.0000  
##                     Mean   :0                   Mean   :  0.9862  
##                     3rd Qu.:0                   3rd Qu.:  0.0000  
##                     Max.   :0                   Max.   :925.0000  
##                                                                   
##    END_AZI           END_LOCATI            LENGTH              WIDTH         
##  Length:902297      Length:902297      Min.   :   0.0000   Min.   :   0.000  
##  Class :character   Class :character   1st Qu.:   0.0000   1st Qu.:   0.000  
##  Mode  :character   Mode  :character   Median :   0.0000   Median :   0.000  
##                                        Mean   :   0.2301   Mean   :   7.503  
##                                        3rd Qu.:   0.0000   3rd Qu.:   0.000  
##                                        Max.   :2315.0000   Max.   :4400.000  
##                                                                              
##        F               MAG            FATALITIES           INJURIES        
##  Min.   :0.00     Min.   :    0.0   Min.   :  0.00000   Min.   :   0.0000  
##  1st Qu.:0.00     1st Qu.:    0.0   1st Qu.:  0.00000   1st Qu.:   0.0000  
##  Median :1.00     Median :   50.0   Median :  0.00000   Median :   0.0000  
##  Mean   :0.91     Mean   :   46.9   Mean   :  0.01678   Mean   :   0.1557  
##  3rd Qu.:1.00     3rd Qu.:   75.0   3rd Qu.:  0.00000   3rd Qu.:   0.0000  
##  Max.   :5.00     Max.   :22000.0   Max.   :583.00000   Max.   :1700.0000  
##  NA's   :843563                                                            
##     PROPDMG         PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Min.   :   0.00   Length:902297      Min.   :  0.000   Length:902297     
##  1st Qu.:   0.00   Class :character   1st Qu.:  0.000   Class :character  
##  Median :   0.00   Mode  :character   Median :  0.000   Mode  :character  
##  Mean   :  12.06                      Mean   :  1.527                     
##  3rd Qu.:   0.50                      3rd Qu.:  0.000                     
##  Max.   :5000.00                      Max.   :990.000                     
##                                                                           
##      WFO             STATEOFFIC         ZONENAMES            LATITUDE   
##  Length:902297      Length:902297      Length:902297      Min.   :   0  
##  Class :character   Class :character   Class :character   1st Qu.:2802  
##  Mode  :character   Mode  :character   Mode  :character   Median :3540  
##                                                           Mean   :2875  
##                                                           3rd Qu.:4019  
##                                                           Max.   :9706  
##                                                           NA's   :47    
##    LONGITUDE        LATITUDE_E     LONGITUDE_       REMARKS         
##  Min.   :-14451   Min.   :   0   Min.   :-14455   Length:902297     
##  1st Qu.:  7247   1st Qu.:   0   1st Qu.:     0   Class :character  
##  Median :  8707   Median :   0   Median :     0   Mode  :character  
##  Mean   :  6940   Mean   :1452   Mean   :  3509                     
##  3rd Qu.:  9605   3rd Qu.:3549   3rd Qu.:  8735                     
##  Max.   : 17124   Max.   :9706   Max.   :106220                     
##                   NA's   :40                                        
##      REFNUM      
##  Min.   :     1  
##  1st Qu.:225575  
##  Median :451149  
##  Mean   :451149  
##  3rd Qu.:676723  
##  Max.   :902297  
## 

Number of rows

nrow(stormdata)
## [1] 902297

Checking for missing data

sum(!complete.cases(stormdata))
## [1] 902297
sum(is.na(stormdata))
## [1] 1745947
any(duplicated(stormdata))
## [1] FALSE

Analysis

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Count frequency of each EVTYPE

evtype_freq <- stormdata %>%
  count(EVTYPE, sort = TRUE)

Bar plot (top 10 most frequent event types for clarity)

ggplot(evtype_freq[1:10, ],
       aes(x = reorder(EVTYPE, n), y = n)) +
  geom_col(fill = "steelblue") +
  coord_flip() +
  labs(
    title = "Top 10 Most Frequent Storm Event Types",
    x = "Event Type",
    y = "Frequency"
  ) +
  theme_minimal()

Keep only relevant variables

storm <- stormdata[, c("EVTYPE", "FATALITIES", "INJURIES",
                       "PROPDMG", "CROPDMG", "COUNTYNAME")]

# Total health impact
storm$HEALTH_IMPACT <- storm$FATALITIES + storm$INJURIES

# Total economic impact
storm$ECONOMIC_DAMAGE <- storm$PROPDMG + storm$CROPDMG

The Summary of harm to population health by EVTYPE

health_by_event <- aggregate(HEALTH_IMPACT ~ EVTYPE, data = storm, sum)
health_by_event <- health_by_event[order(-health_by_event$HEALTH_IMPACT), ]

head(health_by_event, 10)
##                EVTYPE HEALTH_IMPACT
## 834           TORNADO         96979
## 130    EXCESSIVE HEAT          8428
## 856         TSTM WIND          7461
## 170             FLOOD          7259
## 464         LIGHTNING          6046
## 275              HEAT          3037
## 153       FLASH FLOOD          2755
## 427         ICE STORM          2064
## 760 THUNDERSTORM WIND          1621
## 972      WINTER STORM          1527

The Histogram of EVTYPE (frequency)

hist(as.numeric(factor(storm$EVTYPE)),
     breaks = 50,
     main = "Histogram of Storm Event Types (EVTYPE)",
     xlab = "Event Type Index",
     col = "lightblue")

Event type with greatest economic impact

economic_by_event <- aggregate(ECONOMIC_DAMAGE ~ EVTYPE, data = storm, sum)
economic_by_event <- economic_by_event[order(-economic_by_event$ECONOMIC_DAMAGE), ]

head(economic_by_event, 10)
##                 EVTYPE ECONOMIC_DAMAGE
## 834            TORNADO       3312276.7
## 153        FLASH FLOOD       1599325.1
## 856          TSTM WIND       1445168.2
## 244               HAIL       1268289.7
## 170              FLOOD       1067976.4
## 760  THUNDERSTORM WIND        943635.6
## 464          LIGHTNING        606932.4
## 786 THUNDERSTORM WINDS        464978.1
## 359          HIGH WIND        342014.8
## 972       WINTER STORM        134699.6

County with the most injuries and fatalities

county_impact <- aggregate(HEALTH_IMPACT ~ COUNTYNAME, data = storm, sum)
county_impact <- county_impact[order(-county_impact$HEALTH_IMPACT), ]

head(county_impact, 10)
##       COUNTYNAME HEALTH_IMPACT
## 8485   JEFFERSON          2728
## 5653      GREENE          1952
## 27694    WICHITA          1907
## 10592    MADISON          1771
## 20322  OHZ42>088          1569
## 8480      JASPER          1524
## 28529  WORCESTER          1386
## 24418 TUSCALOOSA          1163
## 10614     MARION          1138
## 22148       POLK          1058

Result