Synopsis:

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

# download file containing data if it hasn't already been downloaded
Url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
File <- "StormData.csv"
if (!file.exists(File)) {
  download.file(Url, File, mode = "wb")
}
SD = read.csv('StormData.csv')
str(SD)
## 'data.frame':    902297 obs. of  37 variables:
##  $ STATE__   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_DATE  : Factor w/ 16335 levels "1/1/1966 0:00:00",..: 6523 6523 4242 11116 2224 2224 2260 383 3980 3980 ...
##  $ BGN_TIME  : Factor w/ 3608 levels "00:00:00 AM",..: 272 287 2705 1683 2584 3186 242 1683 3186 3186 ...
##  $ TIME_ZONE : Factor w/ 22 levels "ADT","AKS","AST",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ COUNTY    : num  97 3 57 89 43 77 9 123 125 57 ...
##  $ COUNTYNAME: Factor w/ 29601 levels "","5NM E OF MACKINAC BRIDGE TO PRESQUE ISLE LT MI",..: 13513 1873 4598 10592 4372 10094 1973 23873 24418 4598 ...
##  $ STATE     : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ BGN_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ BGN_AZI   : Factor w/ 35 levels "","  N"," NW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ BGN_LOCATI: Factor w/ 54429 levels "","- 1 N Albion",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_DATE  : Factor w/ 6663 levels "","1/1/1993 0:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_TIME  : Factor w/ 3647 levels ""," 0900CST",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ COUNTY_END: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ COUNTYENDN: logi  NA NA NA NA NA NA ...
##  $ END_RANGE : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ END_AZI   : Factor w/ 24 levels "","E","ENE","ESE",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ END_LOCATI: Factor w/ 34506 levels "","- .5 NNW",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LENGTH    : num  14 2 0.1 0 0 1.5 1.5 0 3.3 2.3 ...
##  $ WIDTH     : num  100 150 123 100 150 177 33 33 100 100 ...
##  $ F         : int  3 2 2 2 2 2 2 1 3 3 ...
##  $ MAG       : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ PROPDMGEXP: Factor w/ 19 levels "","-","?","+",..: 17 17 17 17 17 17 17 17 17 17 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ CROPDMGEXP: Factor w/ 9 levels "","?","0","2",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ WFO       : Factor w/ 542 levels ""," CI","$AC",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ STATEOFFIC: Factor w/ 250 levels "","ALABAMA, Central",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ ZONENAMES : Factor w/ 25112 levels "","                                                                                                               "| __truncated__,..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ LATITUDE  : num  3040 3042 3340 3458 3412 ...
##  $ LONGITUDE : num  8812 8755 8742 8626 8642 ...
##  $ LATITUDE_E: num  3051 0 0 0 0 ...
##  $ LONGITUDE_: num  8806 0 0 0 0 ...
##  $ REMARKS   : Factor w/ 436781 levels "","-2 at Deer Park\n",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ REFNUM    : num  1 2 3 4 5 6 7 8 9 10 ...

1. address the question of which types of events are most harmful to population health

Calculate the fatalities

totFatalities = aggregate(SD$FATALITIES, by=list(SD$EVTYPE),"sum")
names(totFatalities) = c('Event', 'Fatalities')
totFatalitiesSorted = totFatalities %>%
  arrange(desc(Fatalities)) %>% 
  head(20)
totFatalitiesSorted
##                      Event Fatalities
## 1                  TORNADO       5633
## 2           EXCESSIVE HEAT       1903
## 3              FLASH FLOOD        978
## 4                     HEAT        937
## 5                LIGHTNING        816
## 6                TSTM WIND        504
## 7                    FLOOD        470
## 8              RIP CURRENT        368
## 9                HIGH WIND        248
## 10               AVALANCHE        224
## 11            WINTER STORM        206
## 12            RIP CURRENTS        204
## 13               HEAT WAVE        172
## 14            EXTREME COLD        160
## 15       THUNDERSTORM WIND        133
## 16              HEAVY SNOW        127
## 17 EXTREME COLD/WIND CHILL        125
## 18             STRONG WIND        103
## 19                BLIZZARD        101
## 20               HIGH SURF        101

Calculate the injuries

totInjuries <- aggregate(SD$INJURIES, by = list(SD$EVTYPE), "sum")
names(totInjuries) <- c("Event", "Injuries")
totInjuriesSorted = totInjuries %>%
  arrange(desc(Injuries)) %>%
  head(20)
totInjuriesSorted
##                 Event Injuries
## 1             TORNADO    91346
## 2           TSTM WIND     6957
## 3               FLOOD     6789
## 4      EXCESSIVE HEAT     6525
## 5           LIGHTNING     5230
## 6                HEAT     2100
## 7           ICE STORM     1975
## 8         FLASH FLOOD     1777
## 9   THUNDERSTORM WIND     1488
## 10               HAIL     1361
## 11       WINTER STORM     1321
## 12  HURRICANE/TYPHOON     1275
## 13          HIGH WIND     1137
## 14         HEAVY SNOW     1021
## 15           WILDFIRE      911
## 16 THUNDERSTORM WINDS      908
## 17           BLIZZARD      805
## 18                FOG      734
## 19   WILD/FOREST FIRE      545
## 20         DUST STORM      440

Visualization

par(mfrow=c(1,2), mar = c(10,4,2,2),las = 3,cex = 0.7,cex.main = 1.4, cex.lab = 1.2)
barplot(totFatalitiesSorted$Fatalities, names.arg = totFatalitiesSorted$Event, col = 'blue',
        main = 'Top 20 Weather Events for Fatalities', ylab = 'Number of Fatalities')
barplot(totInjuriesSorted$Injuries, names.arg = totInjuriesSorted$Event, col = 'red',
        main = 'Top 20 Weather Events for Injuries', ylab = 'Number of Injuries')

CONCLUSION:

Thus we see that Tornados cause most deaths and injuries in the U.S. National Oceanic

and Atmospheric Administration’s (NOAA) storm database. But Excessive heat causes second

most deaths, whereas as far as injuries are conserned second to fourth causes have very similar values.

2. address the question of which types of events have the greatest economic consequences

The property

totProperty <- aggregate(SD$PROPDMG, by = list(SD$EVTYPE), "sum")
names(totProperty) <- c("Event", "Property")
totPropertySorted = totProperty %>%
  arrange(desc(Property)) %>% head(20)
totPropertySorted
##                   Event   Property
## 1               TORNADO 3212258.16
## 2           FLASH FLOOD 1420124.59
## 3             TSTM WIND 1335965.61
## 4                 FLOOD  899938.48
## 5     THUNDERSTORM WIND  876844.17
## 6                  HAIL  688693.38
## 7             LIGHTNING  603351.78
## 8    THUNDERSTORM WINDS  446293.18
## 9             HIGH WIND  324731.56
## 10         WINTER STORM  132720.59
## 11           HEAVY SNOW  122251.99
## 12             WILDFIRE   84459.34
## 13            ICE STORM   66000.67
## 14          STRONG WIND   62993.81
## 15           HIGH WINDS   55625.00
## 16           HEAVY RAIN   50842.14
## 17       TROPICAL STORM   48423.68
## 18     WILD/FOREST FIRE   39344.95
## 19       FLASH FLOODING   28497.15
## 20 URBAN/SML STREAM FLD   26051.94

The Crop

totCrop <- aggregate(SD$CROPDMG, by = list(SD$EVTYPE), "sum")
names(totCrop) <- c("Event", "Crop")
totCropSorted = totCrop %>% arrange(desc(Crop)) %>% head(20)
totCropSorted
##                 Event      Crop
## 1                HAIL 579596.28
## 2         FLASH FLOOD 179200.46
## 3               FLOOD 168037.88
## 4           TSTM WIND 109202.60
## 5             TORNADO 100018.52
## 6   THUNDERSTORM WIND  66791.45
## 7             DROUGHT  33898.62
## 8  THUNDERSTORM WINDS  18684.93
## 9           HIGH WIND  17283.21
## 10         HEAVY RAIN  11122.80
## 11       FROST/FREEZE   7034.14
## 12       EXTREME COLD   6121.14
## 13     TROPICAL STORM   5899.12
## 14          HURRICANE   5339.31
## 15     FLASH FLOODING   5126.05
## 16  HURRICANE/TYPHOON   4798.48
## 17           WILDFIRE   4364.20
## 18     TSTM WIND/HAIL   4356.65
## 19   WILD/FOREST FIRE   4189.54
## 20          LIGHTNING   3580.61

Visualization

par(mfrow = c(1, 2), mar = c(10, 4, 2, 2), las = 3, cex = 0.7, cex.main = 1.4, cex.lab = 1.2)
barplot(totPropertySorted$Property, names.arg = totPropertySorted$Event, col = 'Brown',
        main = 'Top 20 Weather Events for Property Damage ', ylab = 'Amount of Property Damage', ylim = c(0, 3500000))
barplot(totCropSorted$Crop, names.arg = totCropSorted$Event, col = 'Green',
        main = 'Top 20 Weather Events for Crop Damage', ylab = 'Amount of  Crop Damage', ylim = c(0, 3500000))

The totl damage by adding both costs (property and crop damage)

totTotalCost <- aggregate(SD$CROPDMG+SD$PROPDMG, by = list(SD$EVTYPE), "sum")
names(totTotalCost) <- c("Event", "TotalCost")
totTotalCostSorted = totTotalCost %>% arrange(desc(TotalCost)) %>% head(20)
totTotalCostSorted
##                 Event  TotalCost
## 1             TORNADO 3312276.68
## 2         FLASH FLOOD 1599325.05
## 3           TSTM WIND 1445168.21
## 4                HAIL 1268289.66
## 5               FLOOD 1067976.36
## 6   THUNDERSTORM WIND  943635.62
## 7           LIGHTNING  606932.39
## 8  THUNDERSTORM WINDS  464978.11
## 9           HIGH WIND  342014.77
## 10       WINTER STORM  134699.58
## 11         HEAVY SNOW  124417.71
## 12           WILDFIRE   88823.54
## 13          ICE STORM   67689.62
## 14        STRONG WIND   64610.71
## 15         HEAVY RAIN   61964.94
## 16         HIGH WINDS   57384.60
## 17     TROPICAL STORM   54322.80
## 18   WILD/FOREST FIRE   43534.49
## 19            DROUGHT   37997.67
## 20     FLASH FLOODING   33623.20

Visualization

par(mfrow = c(1,1), mar = c(10, 4, 2, 2), las = 3, cex = 0.7, cex.main = 1.4, cex.lab = 1.2)
barplot(totTotalCostSorted$TotalCost, names.arg = totTotalCostSorted$Event, col = 'Black',
        main = 'Top 20 Weather Events for total Damage ', ylab = 'Amount of total Damage', ylim = c(0, 3500000))

CONCLUSION:

Thus we notice that tornadoes cause most total damage.