#Data from U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database will be used in this analysis. From 1950 to 2011, data are being consolidated although the earliest data have less assessment compared to the latest one which shows the advancement in our data gathering. Storms and other severe weather conditions not only affects the schedule of people but in the big picture, it damages the economy of a country buy doing serious damages. Property damage which were dealt to things that are man made while crop damages are examples of damages to nature. Both are being assessed to analyze the impact of weather to economic damages.
#Checking if data is available already, if no, code will automatically download the file
if (!"stormdata_noaa.csv.bz2" %in% dir("./")) {
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", destfile = "stormdata_noaa.csv.bz2")
}
#Reading the data to a single variable named “data”. After reading, roughly check the dimensions and names inside the data to have a grasp of whats inside.
data <- read.csv(bzfile("stormdata_noaa.csv.bz2"), sep = ",", header = TRUE, stringsAsFactors = FALSE)
dim(data)
## [1] 902297 37
names(data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
#Not all of the data will be used later, in this case, it is better to have a subset of data and saved it in a variable named “sorm_subdata”.
storm_subdata <- data[ , c(8, 23:28)]
names(storm_subdata)
## [1] "EVTYPE" "FATALITIES" "INJURIES" "PROPDMG" "PROPDMGEXP"
## [6] "CROPDMG" "CROPDMGEXP"
fatalities_sum <- aggregate(FATALITIES ~ EVTYPE, data = storm_subdata, FUN = sum)
fatalities_sum <- fatalities_sum[order(fatalities_sum$FATALITIES, decreasing = TRUE), ]
#Getting the preferred number of maximum total cases.
fatalities_max <- fatalities_sum[1:6, ]
fatalities_max
## EVTYPE FATALITIES
## 834 TORNADO 5633
## 130 EXCESSIVE HEAT 1903
## 153 FLASH FLOOD 978
## 275 HEAT 937
## 464 LIGHTNING 816
## 856 TSTM WIND 504
injuries_sum <- aggregate(INJURIES ~ EVTYPE, data = storm_subdata, FUN = sum)
injuries_sum <- injuries_sum[order(injuries_sum$INJURIES, decreasing = TRUE), ]
#Getting the preferred number of maximum total cases.
injuries_max <- injuries_sum[1:6, ]
injuries_max
## EVTYPE INJURIES
## 834 TORNADO 91346
## 856 TSTM WIND 6957
## 170 FLOOD 6789
## 130 EXCESSIVE HEAT 6525
## 464 LIGHTNING 5230
## 275 HEAT 2100
#Plotting data to a bar chart
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
combined <- merge(fatalities_max, injuries_max, by.x = "EVTYPE", by.y = "EVTYPE")
combined <- arrange(combined, desc(FATALITIES+INJURIES))
event_name <- combined$EVTYPE
barplot(t(combined[,-1]), names.arg = event_name, ylim = c(0,100000), beside = T, cex.names = 0.75, las=1, col = c("red", "green"), main="Top 5 Events with the Most Casualties")
legend("topright",c("Fatalities","Injuries"),fill=c("red","green"),bty = "n")
#Creation of variables simply property and crop damage which will be used to evaluate economic damage of each disaster.
data$property_damage = 0
data[data$PROPDMGEXP == "H", ]$property_damage = data[data$PROPDMGEXP == "H", ]$PROPDMG * 10^2
data[data$PROPDMGEXP == "K", ]$property_damage = data[data$PROPDMGEXP == "K", ]$PROPDMG * 10^3
data[data$PROPDMGEXP == "M", ]$property_damage = data[data$PROPDMGEXP == "M", ]$PROPDMG * 10^6
data[data$PROPDMGEXP == "B", ]$property_damage = data[data$PROPDMGEXP == "B", ]$PROPDMG * 10^9
data$crop_damage = 0
data[data$CROPDMGEXP == "H", ]$crop_damage = data[data$CROPDMGEXP == "H", ]$CROPDMG * 10^2
data[data$CROPDMGEXP == "K", ]$crop_damage = data[data$CROPDMGEXP == "K", ]$CROPDMG * 10^3
data[data$CROPDMGEXP == "M", ]$crop_damage = data[data$CROPDMGEXP == "M", ]$CROPDMG * 10^6
data[data$CROPDMGEXP == "B", ]$crop_damage = data[data$CROPDMGEXP == "B", ]$CROPDMG * 10^9
#Consolidating property and crop damage to find out the economic damages.
economic_damage <- aggregate(property_damage + crop_damage ~ EVTYPE, data, sum)
names(economic_damage) = c("EVENT_TYPE", "TOTAL_DAMAGE")
economic_damage <- economic_damage[order(economic_damage$TOTAL_DAMAGE, decreasing = TRUE), ]
economic_damage <- economic_damage[1:10, ]
#Making a plot to show graphically some of the top disasters.
barplot(economic_damage$TOTAL_DAMAGE/10^9, names.arg = economic_damage$EVENT_TYPE, las = 3, col = "purple", main = "Top 10 Events with the Most Economic Damage", ylab = "Total Damage in (Billion USD)")