Analysis of severe Natural Events that impacted Life and Economy in the US History

Synopsis

Storm Data is an official publication of the National Oceanic and Atmospheric Administration (NOAA). This Report summarizes the impact of some rare , unusual weather phenomena that has caused loss of life and property. This summary covers the total injuries , fatalities , property damage and crop damage estimates in all such events for the period of 1950-2011.The analysis starts with preprocessing the raw data for computing the monetary aspect of damage caused by an eventassigning an Event Category by cleaning the data, this is followed by Categorizing the data for Event Types into 9 broad Event Categories.Next,the date treatment is performed to extract the year for the occurence on an event. Finally the data is rolled up at Event Category - Year level and the same is displayed in form of intutive plot.

Data Processing

1. Downloading Data [NOAA](https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2) 

library(R.utils)
bunzip2("NOAA.bz2", "NOAA.csv", remove = FALSE)
## Error: File already exists: NOAA.csv
2. Installing and loading "R.utils"" package

3. Extracting the file using bunzip2 function

4. Reading the data into R

df1 <- read.csv("NOAA.csv", header = TRUE, sep = ",")
5. Calculating the monetary damage to property and crops

  df2<- df1

# Calculating dollar damage to property
  df2$PropDmgAmt <- ifelse(grepl("[Hh]",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*100,

                  ifelse(grepl("[Kk]",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1000,

                  ifelse(grepl("[Mm]",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1000000,

                  ifelse(grepl("[Bb]",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1000000000,

                  ifelse(grepl("1",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e1,

                  ifelse(grepl("2",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e2,

                  ifelse(grepl("3",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e3,

                  ifelse(grepl("4",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e4,

                  ifelse(grepl("5",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e5,

                  ifelse(grepl("6",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e6,

                  ifelse(grepl("7",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e7,

                  ifelse(grepl("8",df2$PROPDMGEXP)==TRUE,df2$PROPDMG*1e8,
                         df2$PROPDMG))))))))))))

  # Calculating dollar damage to crops
  df2$CropDmgAmt <- ifelse(grepl("[Hh]",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*100,

                  ifelse(grepl("[Kk]",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1000,

                  ifelse(grepl("[Mm]",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1000000,

                  ifelse(grepl("[Bb]",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1000000000,

                  ifelse(grepl("1",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e1,

                  ifelse(grepl("2",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e2,

                  ifelse(grepl("3",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e3,

                  ifelse(grepl("4",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e4,

                  ifelse(grepl("5",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e5,

                  ifelse(grepl("6",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e6,

                  ifelse(grepl("7",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e7,

                  ifelse(grepl("8",df2$CROPDMGEXP)==TRUE,df2$CROPDMG*1e8,
                         df2$CROPDMG))))))))))))

  #Calculating total dollar damage
  df2$TotalAmt=(df2$PropDmgAmt+df2$CropDmgAmt)
6. Categorizing EVENT TYPE into 9 broad categories EVCATGRY as it shows 985   unique entries

      * COLD/ICE

      * HEAT/DROUGHT/FIRE

      * RAIN/FLOOD/HIGHSEAS

      * WIND/STORM/TORNADO

      * VOLCANIC ERUPTION/EARTHQUAKE/TSUNAMI

      * LIGHTNING

      * FOG

      * DUST

      * OTHERS

  options(width=120)
  opts_chunk$set(warning = FALSE, message = FALSE,tidy = FALSE, size="small")

  df2$EVCATGRY <- ifelse (grepl("[Ww]inter|AVALANCHE|AVALANCE|BLIZZARD|[Bb]lizzard|CHILL|COLD|COOL|GLAZE|HYPOTH    ERMIA|[Hh]ypothermia|HYPERTHERMIA|ICE|[Ii]ce|ICY|FREEZ|[Ff]reezing|[Ff]reez|FROST|[Ff]rost|LOW TEMP|SLEET|SNOW|WINT|[Ff]reeze|[Ss]now|[Cc]old|[Ii]cy",df2$EVTYPE)==TRUE,"COLD/ICE",

                ifelse(grepl("BELOW NORMAL PRECIP|[Ww]armth|DRY|DRIE|DROUGHT|FIRE|HEAT|[Hh]eat|HIGH TEMP|HOT|WARM|[Dd]ry",df2$EVTYPE)==TRUE,"HEAT/DROUGHT/FIRE",

                ifelse(grepl("COAST|CSTL|CURRENT|DAM FAIL|DAM BREAK|DRIZZLE|DROWN|EROSION|[Ee]rosion|EROSIN|[Ff]lood|FLOOD|FLOOOD|FLD|HEAVY SHOWER|HIGH WATER|HIGH WAVES|LAKE|LANDSLUMP|[Ll]andslump|[Mm]udslide|MARINE|PRECIP|RAIN|RISING WATER|RIVER|[Pp]recipitation|ROGUE WAVE|SLIDE|STREAM|SEA|SEICHE|SURF|SWELL|TIDE|TIDAL|TORRENT|WET|[Ww]et|[Rr]ain|[Cc]oastal",df2$EVTYPE)==TRUE,"RAIN/FLOOD/HIGHSEAS",

                ifelse(grepl("BURST|[Mm]icroburst|CLOUD|DEPRESSION|FLOYD|FUNNEL|GUST|HAIL|[Hh]ail|HURRICANE|LANDSPOUT|STORM|SOUTHEAST|THUNDERSTORM|THUNDERTSORM|THUNDESTORM|TORNADO|TORNDAO|TSTM|TSTM WIND|TURBULENCE|TYPHOON|WALL|WATERSPOUT|WATER SPOUT|WAYTERSPOUT|WIND|[Ww]ind|WND|[Hh]urricane|THUNDERSTORM WINDS|[Tt]stm",df2$EVTYPE)==TRUE,"WIND/STORM/TORNADO",

                ifelse(grepl("TSUNAMI|VOLCAN|[Vv]olcanic",df2$EVTYPE)==TRUE,"VOLCANICERUPTION/EARTHQUAKE/TSUNAMI",
                ifelse(grepl("LIGHTNING|LIGNTNING|LIGHTING",df2$EVTYPE)==TRUE,"LIGHTNING",

                ifelse(grepl("FOG|VOG|SMOKE|[Ff]og",df2$EVTYPE)==TRUE,"FOG",

                ifelse(grepl("DUST|[Dd]ust",df2$EVTYPE)==TRUE,"DUST"
                       ,"OTHERS"))))))))
7. Aggregating the data to determine the effects at Event - Year level
df2$Year <- as.factor(format(as.Date(df2$BGN_DATE, "%m/%d/%Y"), "%Y"))

options(scipen = 100)

agg_1 <- tapply(df2$EVCATGRY, df2[c(41, 42)], length)

agg_1[is.na(agg_1)] <- 0

agg_2 <- aggregate(df2[c(23, 24, 38, 39, 40)], df2[c(42, 41)], sum)

agg_3 <- aggregate(df2[c(23, 24, 38, 39, 40)], df2[c(42)], sum)

agg_4 <- aggregate(df2[c(23, 24, 38, 39, 40)], df2[c(41)], sum)

Results

Barplot to display the Count of Events per Year partitioned by Event Category

p <- barplot(agg_1, main = "Count of Events by Year", xlab = "Year", ylab = "Count of Events", 
    col = rainbow(15))
legend("topleft", cex = 0.8, pch = "*", legend = rownames(agg_1), col = rainbow(15))


# Displaying data labels
ypos <- apply(agg_1, 2, sum)
text(p, (ypos + 2500), as.integer(ypos), col = "Black", cex = 0.8, xpd = TRUE, 
    srt = 90)

plot of chunk barplot


** As the graph shows “WIND/STORM/TORNADO” Category of Events alone contribute to 79.0276 % of the total events occured in the time frame under analysis **

*Displaying the Events Category Summary table for the metrics

options(scipen = 100)
options(width = 120)
opts_chunk$set(comment = "Table", warning = FALSE, message = FALSE, tidy = FALSE, 
    size = "small")
print(agg_4)
##                              EVCATGRY FATALITIES INJURIES   PropDmgAmt  CropDmgAmt     TotalAmt
## 1                            COLD/ICE       1362     6876  12718442261  8757127950  21475570211
## 2                                DUST          2       43       738630           0       738630
## 3                                 FOG         80     1076     22929500           0     22929500
## 4                   HEAT/DROUGHT/FIRE       3271    10884   9574792850 15280341910  24855134760
## 5                           LIGHTNING        817     5231    930419430    12092090    942511520
## 6                              OTHERS          9       11      1948050     1034400      2982450
## 7                 RAIN/FLOOD/HIGHSEAS       2517     9919 176589147165 13352657000 189941804165
## 8 VOLCANICERUPTION/EARTHQUAKE/TSUNAMI         33      129    144562000       20000    144582000
## 9                  WIND/STORM/TORNADO       7054   106359 228241888882 11700918831 239942807713

** The table above shows that the Maximum economic damage has been caused by “WIND/STORM/TORNADO” with an estimate of $ 239942807713 **