Synopsis: This analyis involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database to determine the underlying trend of the data. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage from year 1950 to 2011.Based on the current analysis, it is observed that Tornado, Heat, Flood, Lighting caused more fatalities across USA. Also we could say that Tornado, Flood, Thunderstrom wind and Heat caused more no. of injuries or lifeloss than other event types. Flood, Hurricane, Tornado and Hail caused more property damages, whereas Drought, Flood, Ice Storm and Hurricane caused a significant impact on the crop damage situtaions across USA. So we can draw below conclusions based on the above mentioned observations -

Across the United States, the event type Tornado, Flood, Heat, Thunderstorm wind and Lighting are most harmful with respect to population health. Across the United States, the event type Flood, Hurricane, Tornado, Hail, Ice Storm and Drought have the greatest economic consequences.

library(R.utils)
library(dplyr)
library(ggplot2)
  1. Data Processing : This section includes the R source code which has been used to prepare the data for analysis stage. The main data has been downloaded from the NOAA website, and then loaded into RStudio for further analysis purpose.
setwd("C:/Users/soudey/Documents/Data Science/NOAA storm data research")
bunzip2("repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv", remove = FALSE, skip = TRUE)
## [1] "repdata_data_StormData.csv"
## attr(,"temporary")
## [1] FALSE
storm_data <- read.csv("repdata_data_StormData.csv")
dim(storm_data)
## [1] 902297     37
head(storm_data)
##   STATE__           BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1       1  4/18/1950 0:00:00     0130       CST     97     MOBILE    AL
## 2       1  4/18/1950 0:00:00     0145       CST      3    BALDWIN    AL
## 3       1  2/20/1951 0:00:00     1600       CST     57    FAYETTE    AL
## 4       1   6/8/1951 0:00:00     0900       CST     89    MADISON    AL
## 5       1 11/15/1951 0:00:00     1500       CST     43    CULLMAN    AL
## 6       1 11/15/1951 0:00:00     2000       CST     77 LAUDERDALE    AL
##    EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO         0                                               0
## 2 TORNADO         0                                               0
## 3 TORNADO         0                                               0
## 4 TORNADO         0                                               0
## 5 TORNADO         0                                               0
## 6 TORNADO         0                                               0
##   COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1         NA         0                      14.0   100 3   0          0
## 2         NA         0                       2.0   150 2   0          0
## 3         NA         0                       0.1   123 2   0          0
## 4         NA         0                       0.0   100 2   0          0
## 5         NA         0                       0.0   150 2   0          0
## 6         NA         0                       1.5   177 2   0          0
##   INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1       15    25.0          K       0                                    
## 2        0     2.5          K       0                                    
## 3        2    25.0          K       0                                    
## 4        2     2.5          K       0                                    
## 5        2     2.5          K       0                                    
## 6        6     2.5          K       0                                    
##   LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1     3040      8812       3051       8806              1
## 2     3042      8755          0          0              2
## 3     3340      8742          0          0              3
## 4     3458      8626          0          0              4
## 5     3412      8642          0          0              5
## 6     3450      8748          0          0              6

After loading the data , the next step is to categorize the different catastropic events based on the event types in order to remove the data ambiguity.

  storm_data$EVTYPE[storm_data$EVTYPE == "TSTM WIND"] <- "TUNDERSTORM WIND"
  storm_data$EVTYPE[storm_data$EVTYPE == "THUNDERTORM WINDS"] <- "TUNDERSTORM WIND"
  storm_data$EVTYPE[storm_data$EVTYPE == "THUNDERSTORM WINDS"] <- "TUNDERSTORM WIND" 
  storm_data$EVTYPE[storm_data$EVTYPE == "FLASH FLOOD"] <- "FLOOD"
  storm_data$EVTYPE[storm_data$EVTYPE == "RIVER FLOOD"] <- "FLOOD"
  storm_data$EVTYPE[storm_data$EVTYPE == "HURRICANE/TYPHOON"] <- "HURRICANE"
  storm_data$EVTYPE[storm_data$EVTYPE == "EXCESSIVE HEAT"] <- "HEAT"
  head(unique(storm_data$EVTYPE))
## [1] TORNADO               TUNDERSTORM WIND      HAIL                 
## [4] FREEZING RAIN         SNOW                  ICE STORM/FLASH FLOOD
## 985 Levels:    HIGH SURF ADVISORY  COASTAL FLOOD ... WND
  1. Results :

This section will showcase the no.of fatalities caused due to the natural calamites

library(dplyr)
fatal_accidents <- aggregate(FATALITIES ~ EVTYPE, data = storm_data, sum)
fatal_accidents <- filter(fatal_accidents,fatal_accidents$FATALITIES > 0)
fatal_accidents <- arrange(fatal_accidents,desc(fatal_accidents$FATALITIES))
head(fatal_accidents, 10)
##              EVTYPE FATALITIES
## 1           TORNADO       5633
## 2              HEAT       2840
## 3             FLOOD       1450
## 4         LIGHTNING        816
## 5  TUNDERSTORM WIND        569
## 6       RIP CURRENT        368
## 7         HIGH WIND        248
## 8         AVALANCHE        224
## 9      WINTER STORM        206
## 10     RIP CURRENTS        204

This section will showcase the no.of persons got injured due to the natural calamites

injuries_accidents <- aggregate(INJURIES ~ EVTYPE, data = storm_data, sum)
injuries_accidents <- filter(injuries_accidents,injuries_accidents$INJURIES > 0)
injuries_accidents <- arrange(injuries_accidents,desc(injuries_accidents$INJURIES))
head(injuries_accidents, 10)
##               EVTYPE INJURIES
## 1            TORNADO    91346
## 2               HEAT     8625
## 3              FLOOD     8568
## 4   TUNDERSTORM WIND     7865
## 5          LIGHTNING     5230
## 6          ICE STORM     1975
## 7  THUNDERSTORM WIND     1488
## 8               HAIL     1361
## 9          HURRICANE     1321
## 10      WINTER STORM     1321

Based on the NOAA documentation we need to transform the property and crop damage quantity into dollar amount Estimates should be rounded to three significant digits, followed by an alphabetical character signifying the magnitude of the number, i.e., 1.55B for $1,550,000,000. Alphabetical characters used to signify magnitude include “K” for thousands, “M” for millions, and “B” for billions.

View(unique(storm_data$PROPDMGEXP))
View(unique(storm_data$CROPDMGEXP))

storm_data[storm_data$PROPDMGEXP == "K",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "K",]$PROPDMG * 1000
storm_data[storm_data$PROPDMGEXP == "m",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "m",]$PROPDMG * 1000000
storm_data[storm_data$PROPDMGEXP == "M",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "M",]$PROPDMG * 1000000
storm_data[storm_data$PROPDMGEXP == "B",]$PROPDMG <- storm_data[storm_data$PROPDMGEXP == "B",]$PROPDMG * 1000000000
storm_data[storm_data$CROPDMGEXP == "K",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "K",]$CROPDMG * 1000
storm_data[storm_data$CROPDMGEXP == "k",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "k",]$CROPDMG * 1000
storm_data[storm_data$CROPDMGEXP == "m",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "m",]$CROPDMG * 1000000
storm_data[storm_data$CROPDMGEXP == "M",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "M",]$CROPDMG * 1000000
storm_data[storm_data$CROPDMGEXP == "B",]$CROPDMG <- storm_data[storm_data$CROPDMGEXP == "B",]$CROPDMG * 1000000000

This section will showcase the total amount of property damage caused due to the natural calamites

property_damage <- aggregate(PROPDMG ~ EVTYPE, data = storm_data, sum)
property_damage <- filter(property_damage,property_damage$PROPDMG > 0)
property_damage <- arrange(property_damage,desc(property_damage$PROPDMG))
head(property_damage, 10)
##              EVTYPE      PROPDMG
## 1             FLOOD 165917467374
## 2         HURRICANE  81174159010
## 3           TORNADO  56937160779
## 4       STORM SURGE  43323536000
## 5              HAIL  15732267048
## 6    TROPICAL STORM   7703890550
## 7      WINTER STORM   6688497251
## 8  TUNDERSTORM WIND   6221568518
## 9         HIGH WIND   5270046295
## 10         WILDFIRE   4765114000

This section will showcase the total amount of crop damage caused due to the natural calamites

crop_damage <- aggregate(CROPDMG ~ EVTYPE, data = storm_data, sum)
crop_damage <- filter(crop_damage,crop_damage$CROPDMG > 0)
crop_damage <- arrange(crop_damage,desc(crop_damage$CROPDMG))
head(crop_damage, 10)
##              EVTYPE     CROPDMG
## 1           DROUGHT 13972566000
## 2             FLOOD 12112744550
## 3         HURRICANE  5349782800
## 4         ICE STORM  5022113500
## 5              HAIL  3025954473
## 6      EXTREME COLD  1292973000
## 7      FROST/FREEZE  1094086000
## 8              HEAT   893863500
## 9  TUNDERSTORM WIND   744662138
## 10       HEAVY RAIN   733399800
  1. Plots -

Fatalities per event type

library(ggplot2)
ggplot(data=fatal_accidents[1:10,], aes(x = factor(EVTYPE), y = FATALITIES, fill=EVTYPE)) +
  geom_bar(stat = "identity",width=0.3) +
  coord_flip() +
  ylab("Natural Calamities") +
  xlab("No. of Fatalities") +
  ggtitle("Total no. of fatalities per event type across USA")

Injuries per event type

library(ggplot2)
ggplot(data=injuries_accidents[1:10,], aes(x = factor(EVTYPE), y = INJURIES, fill=EVTYPE)) +
  geom_bar(stat = "identity",width=0.3) +
  coord_flip() +
  ylab("Natural Calamities") +
  xlab("No. of injuries") +
  ggtitle("Total no. of injuries per event type across USA")

Property Damage per event type

library(ggplot2)
ggplot(data=property_damage[1:10,], aes(x = factor(EVTYPE), y = PROPDMG, fill=EVTYPE)) +
  geom_bar(stat = "identity",width=0.3) +
  coord_flip() +
  ylab("Natural Calamities") +
  xlab("Total amount of property damage") +
  ggtitle("Total amount of property damage per event type across USA")

Crop Damage per event type

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.