Synopsis:

The document is an author’s analytical attempt to explore the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.

The object hereby is to answer the question of the impact of this weather events, mainly forcusing on Injuries, Fatalities, Crop and Property Damages.

DATA PROCESSING:

Load data

# the zip file
stormDataZipFile <- "repdata-data-StormData.csv.bz2"
unZippedFile <- "StormData.csv"
# get current working dir
projDir <- getwd()

zipFilePath <- paste(projDir,stormDataZipFile,sep = "/")
unzippedDataPath <- paste(projDir,unZippedFile, sep = "/")

# check if unzipped file exists
if (!file.exists(unzippedDataPath)) {
  bunzip2(
    stormDataZipFile, unZippedFile, remove = FALSE, skip = TRUE, header = TRUE
  )
}
read the file into memory
  stormRawData <- read.csv(unZippedFile)

subset health and economic impact storm data:

  events <-
    c(
      "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG",  "CROPDMGEXP" 
    )

# set the events we want to see
  filteredEventData <- stormRawData[,events]

# for memory concerns caching the reading may be necessay
  cache = TRUE

a glimpse at raw data

  head(filteredEventData)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Health Consequences:

INJURIES : Summarize Injuries data

  injuries <-
  ddply(filteredEventData, .(EVTYPE), summarize,totalInjuries = sum(INJURIES, na.rm = TRUE))
injuries <-
  injuries[order(injuries$totalInjuries, decreasing = TRUE),]

head(injuries, 10)
##                EVTYPE totalInjuries
## 834           TORNADO         91346
## 856         TSTM WIND          6957
## 170             FLOOD          6789
## 130    EXCESSIVE HEAT          6525
## 464         LIGHTNING          5230
## 275              HEAT          2100
## 427         ICE STORM          1975
## 153       FLASH FLOOD          1777
## 760 THUNDERSTORM WIND          1488
## 244              HAIL          1361
dim(injuries)
## [1] 985   2
Create the Injuries Plot object
injuriesPlot <-
  ggplot(data = head(injuries, 10), aes(
    x = reorder(EVTYPE,totalInjuries) , y = totalInjuries, fill = EVTYPE,alpha =  0.5
  )) + geom_bar(stat = "identity",fill = "darkblue") +
  xlab("Events") + ylab("Totals Injuries") + coord_flip() + theme(legend.position = "none")

FATALITIES : Summarize fatalities data

fatalitiesCase <-
  ddply(filteredEventData, .(EVTYPE), summarize,totalFatalities = sum(FATALITIES, na.rm = TRUE))
fatalitiesCase <-
  fatalitiesCase[order(fatalitiesCase$totalFatalities, decreasing = TRUE),]
 
dim(fatalitiesCase)
## [1] 985   2
head(fatalitiesCase, tail(10))
##             EVTYPE totalFatalities
## 834        TORNADO            5633
## 130 EXCESSIVE HEAT            1903
## 153    FLASH FLOOD             978
## 275           HEAT             937
## 464      LIGHTNING             816
## 856      TSTM WIND             504
## 170          FLOOD             470
## 585    RIP CURRENT             368
## 359      HIGH WIND             248
## 19       AVALANCHE             224
Create a Plot object for the Fatalities
fatalitiesPlot <-
  ggplot(
    data = head(fatalitiesCase, 10), aes(
      x = reorder(EVTYPE,totalFatalities),  y = totalFatalities, fill = EVTYPE, alpha = .3 )
  ) + geom_bar(stat = "identity",fill = "red") +
  xlab("Events") + ylab("Totals Fatalities") + ggtitle("Top 10 Weather Events Health Injuries and/or Fatalities impacts in US") +  
  coord_flip() + theme(legend.position ="none")

Economic Consequences:

Uniqueness of Property Exponential data
  unique(filteredEventData$PROPDMGEXP)
##  [1] K M   B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels:  - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
Uniqueness of Crop Exponential data
  unique(filteredEventData$CROPDMGEXP)
## [1]   M K m B ? 0 k 2
## Levels:  ? 0 2 B k K m M
Since we have values in both Lowercase and Uppercase formats, we force the Lowercase to be Uppercase for true uniqueness.
  filteredEventData$PROPDMGEXP <- toupper(filteredEventData$PROPDMGEXP)
  filteredEventData$CROPDMGEXP <- toupper(filteredEventData$CROPDMGEXP)
Assigning ‘0’ to invalid exponent data like “”, ‘+’, ‘-’ and ‘?’
  filteredEventData$PROPDMGEXP[filteredEventData$PROPDMGEXP %in% c("?","-","+","")] = "0"
  filteredEventData$CROPDMGEXP[filteredEventData$CROPDMGEXP %in% c("?","-","+","")] = "0"
New Property Exponential Uniqueness Data
  unique(filteredEventData$PROPDMGEXP)
##  [1] "K" "M" "0" "B" "5" "6" "4" "2" "3" "H" "7" "1" "8"
New Crop Exponential Data Uniqueness
  unique(filteredEventData$CROPDMGEXP)
## [1] "0" "M" "K" "B" "2"
the exponential values are stored in a seperate column describing their value with letters (h = hundred, k = thousand, m = million, b = billion)
Property conversion
  filteredEventData$PROPDMGEXP[filteredEventData$PROPDMGEXP %in% c("B")] = "9"
  filteredEventData$PROPDMGEXP[filteredEventData$PROPDMGEXP %in% c("M")] = "6"
  filteredEventData$PROPDMGEXP[filteredEventData$PROPDMGEXP %in% c("K")] = "3"
  filteredEventData$PROPDMGEXP[filteredEventData$PROPDMGEXP %in% c("H")] = "2"
Crop conversion
filteredEventData$CROPDMGEXP[filteredEventData$CROPDMGEXP %in% c("B")] = "9"
filteredEventData$CROPDMGEXP[filteredEventData$CROPDMGEXP %in% c("M")] = "6"
filteredEventData$CROPDMGEXP[filteredEventData$CROPDMGEXP %in% c("K")] = "3"
filteredEventData$CROPDMGEXP[filteredEventData$CROPDMGEXP %in% c("H")] = "2"
Multiply Property / Crop Exponential values with Actual damage data
PROPDMGEXP / CROPDMGEXP stands for the power of 10, we do calculation
Property calculations
  filteredEventData$propertyDamage <-
  filteredEventData$PROPDMG * (10 ** as.numeric(filteredEventData$PROPDMGEXP))
Crop calculations
filteredEventData$cropDamage <-
  filteredEventData$CROPDMG * (10 ** as.numeric(filteredEventData$CROPDMGEXP))
 
  head(filteredEventData, tail(10))
##     EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1  TORNADO          0       15    25.0          3       0          0
## 2  TORNADO          0        0     2.5          3       0          0
## 3  TORNADO          0        2    25.0          3       0          0
## 4  TORNADO          0        2     2.5          3       0          0
## 5  TORNADO          0        2     2.5          3       0          0
## 6  TORNADO          0        6     2.5          3       0          0
## 7  TORNADO          0        1     2.5          3       0          0
## 8  TORNADO          0        0     2.5          3       0          0
## 9  TORNADO          1       14    25.0          3       0          0
## 10 TORNADO          0        0    25.0          3       0          0
##    propertyDamage cropDamage
## 1           25000          0
## 2            2500          0
## 3           25000          0
## 4            2500          0
## 5            2500          0
## 6            2500          0
## 7            2500          0
## 8            2500          0
## 9           25000          0
## 10          25000          0

Economic Impact on Property and Crop

Summarize Economic Damage Data

financialDamage <- ddply(
    filteredEventData, .(EVTYPE), summarize,totalPropertyDamage = sum(propertyDamage),totalCropDamage = sum(cropDamage)
  )
Omit events that are non disastrous (with Zero values)
financialDamage <- financialDamage[(financialDamage$totalCropDamage > 0 | financialDamage$totalPropertyDamage > 0),]
head(financialDamage) 
##                   EVTYPE totalPropertyDamage totalCropDamage
## 1     HIGH SURF ADVISORY              200000               0
## 3            FLASH FLOOD               50000               0
## 5              TSTM WIND             8100000               0
## 6        TSTM WIND (G45)                8000               0
## 9                      ?                5000               0
## 14   AGRICULTURAL FREEZE                   0        28820000
Order data Descendingly
financialDamage <-
    financialDamage[order(financialDamage$totalCropDamage, decreasing = TRUE),]
financialDamage <-
    financialDamage[order(financialDamage$totalPropertyDamage, decreasing = TRUE),]
Create a Plot object for Property Damages
propertyPlot <-
  ggplot(
    data = head(financialDamage, 10), aes( x = reorder(EVTYPE,totalPropertyDamage),  y = log10(totalPropertyDamage), fill = totalPropertyDamage, alpha =.3 )) + 
  geom_bar(stat = "identity", fill = "darkblue") +
  xlab("Events") + ylab("Property Damages [Billions $]") + ggtitle("Economical impact of Top 10 Weather Events in the USA") +  coord_flip() + theme(legend.position = "none") 
Create a Plot object for Crop Damages
cropPlot <-
  ggplot(
    data = head(financialDamage, 10), aes( x = reorder(EVTYPE,totalCropDamage),  y = log10(totalCropDamage), fill = totalCropDamage, alpha =.3)) + 
  geom_bar(stat = "identity", fill = "darkgreen") +
  xlab("Events") + ylab("Crop Damages [Billions $]") +  coord_flip() +
  theme(legend.position = "none")

RESULTS:

Fatalities and Injuries:

grid.arrange(fatalitiesPlot, injuriesPlot, nrow = 2, ncol = 1)

Economic Impact on Property and Crop:

grid.arrange(propertyPlot, cropPlot, nrow = 2, ncol = 1)

Tornados are the most health impactful, while Floods have most damaging imact to crop and property of the top 10 weather events as shown in the plots above.




Reproducible Research Project 2
a John Hopkins University Coursera Data Science Specialization Course