library(data.table)
library(ggplot2)
library(plyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(R.utils)
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.8.2 (2022-06-13 22:00:14 UTC) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.26.0 (2024-01-24 05:12:50 UTC) successfully loaded. See ?R.oo for help.
##
## Attaching package: 'R.oo'
## The following object is masked from 'package:R.methodsS3':
##
## throw
## The following objects are masked from 'package:methods':
##
## getClasses, getMethods
## The following objects are masked from 'package:base':
##
## attach, detach, load, save
## R.utils v2.12.3 (2023-11-18 01:00:02 UTC) successfully loaded. See ?R.utils for help.
##
## Attaching package: 'R.utils'
## The following object is masked from 'package:utils':
##
## timestamp
## The following objects are masked from 'package:base':
##
## cat, commandArgs, getOption, isOpen, nullfile, parse, warnings
library(stringr)
#Unzipped the bz2 compressed data file, using the pathway from my computer and it's file name, with the bunzip2() function. I then loaded the unzipped bz2 file into the working directory using the read.csv() function, and assigning it the value, "stormdata".
#file name = "repdata_data_StormData.csv"
bunzip2("C:/Users/micha/OneDrive/Desktop/repdata_data_StormData.csv.bz2", "repdata_data_StormData.csv", remove = FALSE)
stormdata <- read.csv("repdata_data_StormData.csv", header= TRUE, sep=",")
SubsetColumns <- c("EVTYPE","FATALITIES","INJURIES","PROPDMG","PROPDMGEXP","CROPDMG","CROPDMGEXP")
stormdata <- stormdata[, SubsetColumns]
SSstormdata <- subset(stormdata, EVTYPE != "?"
&
(FATALITIES > 0 | INJURIES > 0 | PROPDMG > 0 | CROPDMG > 0),
select = c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP","CROPDMG", "CROPDMGEXP"))
sum(is.na(SSstormdata)) #Checking to confirm there are no NA values in subset dataset
## [1] 0
unique(SSstormdata$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "4" "h" "2" "7" "3" "H" "-"
unique(SSstormdata$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k"
Economic_Conversion <- function(exp) {
exp <- toupper(exp);
if (exp == "") return (10^0);
if (exp == "-") return (10^0);
if (exp == "?") return (10^0);
if (exp == "+") return (10^0);
if (exp == "0") return (10^0);
if (exp == "1") return (10^1);
if (exp == "2") return (10^2);
if (exp == "3") return (10^3);
if (exp == "4") return (10^4);
if (exp == "5") return (10^5);
if (exp == "6") return (10^6);
if (exp == "7") return (10^7);
if (exp == "8") return (10^8);
if (exp == "9") return (10^9);
if (exp == "H") return (10^2);
if (exp == "K") return (10^3);
if (exp == "M") return (10^6);
if (exp == "B") return (10^9);
return (NA);
}
SSstormdata$Property_COST <- with(SSstormdata, as.numeric(PROPDMG) * sapply(PROPDMGEXP, Economic_Conversion))/10^8
SSstormdata$Crop_COST <- with(SSstormdata, as.numeric(CROPDMG) * sapply(CROPDMGEXP, Economic_Conversion))/10^8
Pop_Health <- aggregate(x=list(Event_Health= SSstormdata$FATALITIES + SSstormdata$INJURIES),
by = list(EVENT_TYPE = SSstormdata$EVTYPE),
FUN = sum,
na.rm = TRUE)
Pop_Health <- Pop_Health[order(Pop_Health$Event_Health, decreasing = TRUE),]
Econ_Damages <- aggregate(x=list(Event_Costs= SSstormdata$Property_COST + SSstormdata$Crop_COST),
by= list(EVENT_TYPE = SSstormdata$EVTYPE),
FUN = sum,
na.rm = TRUE)
Econ_Damages <- Econ_Damages[order(Econ_Damages$Event_Costs, decreasing = TRUE),]
Q1 <- ggplot(head(Pop_Health, 10), aes(x= EVENT_TYPE, y = Event_Health, fill = EVENT_TYPE))+
geom_bar(stat= "identity", col = "black", width = 0.8)+
theme(axis.text.x = element_text(angle = 75, vjust = 1, hjust = 1))+
xlab("Type of Event")+
ylab("Total Fatalities and Injuries")+
ggtitle("Top 10 Most Harmful Storm Types to Population Health")
Q2 <- ggplot(head(Econ_Damages, 10), aes(x= EVENT_TYPE, y = Event_Costs, fill = EVENT_TYPE))+
coord_flip()+
geom_bar(stat= "identity", col = "black")+
theme(plot.title = element_text(size = 14, hjust = 0.5)) +
xlab("Type of Event")+
ylab("Total Economic Damages")+
ggtitle("Top 10 Damaging Storm Types on the Economy")