SYNOPSIS

#Data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database will be used in this analysis. From 1950 to 2011, data are being consolidated although the earliest data have less assessment compared to the latest one which shows the advancement in our data gathering. Storms and other severe weather conditions not only affects the schedule of people but in the big picture, it damages the economy of a country buy doing serious damages. Property damage which were dealt to things that are man made while crop damages are examples of damages to nature. Both are being assessed to analyze the impact of weather to economic damages.

DATA PROCESSING

#Checking if data is available already, if no, code will automatically download the file

if (!"stormdata_noaa.csv.bz2" %in% dir("./")) {
    download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "stormdata_noaa.csv.bz2")
}

#Reading the data to a single variable named “data”. After reading, roughly check the dimensions and names inside the data to have a grasp of whats inside.

data <- read.csv(bzfile("stormdata_noaa.csv.bz2"), sep = ",", header = TRUE, stringsAsFactors = FALSE)
    dim(data)
## [1] 902297     37
    names(data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

#Not all of the data will be used later, in this case, it is better to have a subset of data and saved it in a variable named “sorm_subdata”.

storm_subdata <- data[ , c(8, 23:28)]
    names(storm_subdata)
## [1] "EVTYPE"     "FATALITIES" "INJURIES"   "PROPDMG"    "PROPDMGEXP"
## [6] "CROPDMG"    "CROPDMGEXP"

RESULTS

Get data from the subset for fatalities

fatalities_sum <- aggregate(FATALITIES ~ EVTYPE, data = storm_subdata, FUN = sum)
fatalities_sum <- fatalities_sum[order(fatalities_sum$FATALITIES, decreasing = TRUE), ]

#Getting the preferred number of maximum total cases.

fatalities_max <- fatalities_sum[1:6, ]
    fatalities_max
##             EVTYPE FATALITIES
## 834        TORNADO       5633
## 130 EXCESSIVE HEAT       1903
## 153    FLASH FLOOD        978
## 275           HEAT        937
## 464      LIGHTNING        816
## 856      TSTM WIND        504

Get data from the subset for injuries

injuries_sum <- aggregate(INJURIES ~ EVTYPE, data = storm_subdata, FUN = sum)
injuries_sum <- injuries_sum[order(injuries_sum$INJURIES, decreasing = TRUE), ]

#Getting the preferred number of maximum total cases.

injuries_max <- injuries_sum[1:6, ]
    injuries_max
##             EVTYPE INJURIES
## 834        TORNADO    91346
## 856      TSTM WIND     6957
## 170          FLOOD     6789
## 130 EXCESSIVE HEAT     6525
## 464      LIGHTNING     5230
## 275           HEAT     2100

#Plotting data to a bar chart

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
combined <- merge(fatalities_max, injuries_max, by.x = "EVTYPE", by.y = "EVTYPE")
combined <- arrange(combined, desc(FATALITIES+INJURIES))
event_name <- combined$EVTYPE

barplot(t(combined[,-1]), names.arg = event_name, ylim = c(0,100000), beside = T, cex.names = 0.75, las=1, col = c("red", "green"), main="Top 5 Events with the Most Casualties")
legend("topright",c("Fatalities","Injuries"),fill=c("red","green"),bty = "n")

#Creation of variables simply property and crop damage which will be used to evaluate economic damage of each disaster.

data$property_damage = 0
data[data$PROPDMGEXP == "H", ]$property_damage = data[data$PROPDMGEXP == "H", ]$PROPDMG * 10^2

data[data$PROPDMGEXP == "K", ]$property_damage = data[data$PROPDMGEXP == "K", ]$PROPDMG * 10^3

data[data$PROPDMGEXP == "M", ]$property_damage = data[data$PROPDMGEXP == "M", ]$PROPDMG * 10^6

data[data$PROPDMGEXP == "B", ]$property_damage = data[data$PROPDMGEXP == "B", ]$PROPDMG * 10^9


data$crop_damage = 0
data[data$CROPDMGEXP == "H", ]$crop_damage = data[data$CROPDMGEXP == "H", ]$CROPDMG * 10^2

data[data$CROPDMGEXP == "K", ]$crop_damage = data[data$CROPDMGEXP == "K", ]$CROPDMG * 10^3

data[data$CROPDMGEXP == "M", ]$crop_damage = data[data$CROPDMGEXP == "M", ]$CROPDMG * 10^6

data[data$CROPDMGEXP == "B", ]$crop_damage = data[data$CROPDMGEXP == "B", ]$CROPDMG * 10^9

#Consolidating property and crop damage to find out the economic damages.

economic_damage <- aggregate(property_damage + crop_damage ~ EVTYPE, data, sum)
names(economic_damage) = c("EVENT_TYPE", "TOTAL_DAMAGE")
economic_damage <- economic_damage[order(economic_damage$TOTAL_DAMAGE, decreasing = TRUE), ]

economic_damage <- economic_damage[1:10, ]

#Making a plot to show graphically some of the top disasters.

barplot(economic_damage$TOTAL_DAMAGE/10^9, names.arg = economic_damage$EVENT_TYPE, las = 3, col = "purple", main = "Top 10 Events with the Most Economic Damage", ylab = "Total Damage in (Billion USD)")