(— title: ““Weather Event Impact Analysis”” author: “Asmaa Hassan” date: “2024-12-25” output: html_document —
This analysis aims to assess the impact of different weather events in the United States from 1996 to 2011. We investigate two main questions: 1. Which weather events are most harmful to population health? 2. Which weather events have the greatest economic consequences?
library(ggplot2)
library(dplyr)
install.packages("gdtools")
install.packages("plyr")
install.packages("flextable")
library(plyr)
library(flextable)
stormfile <- "/home/asmae/Downloads/repdata_data_StormData.csv.bz2"
rawdata <- read.csv(file = stormfile, header = TRUE, sep = ",")
rawdata$BGN_DATE <- strptime(rawdata$BGN_DATE, "%m/%d/%Y %H:%M:%S")
maindata <- subset(rawdata, BGN_DATE > "1995-12-31")
rm(rawdata)
maindata <- subset(maindata, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
maindata$EVTYPE <- toupper(maindata$EVTYPE)
maindata <- maindata[maindata$FATALITIES != 0 |
maindata$INJURIES != 0 |
maindata$PROPDMG != 0 |
maindata$CROPDMG != 0, ]
Results Q1: Which Events Are Most Harmful to Population Health? Summing Fatalities and Injuries
fatalities <- aggregate(FATALITIES ~ EVTYPE, data = maindata, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data = maindata, sum)
fatalities <- arrange(fatalities, desc(FATALITIES), EVTYPE)[1:10,]
injuries <- arrange(injuries, desc(INJURIES), EVTYPE)[1:10,]
Top 10 Events by Fatalities&Top 10 Events by Injuries
fatalities
## EVTYPE FATALITIES
## 1 EXCESSIVE HEAT 1797
## 2 TORNADO 1511
## 3 FLASH FLOOD 887
## 4 LIGHTNING 651
## 5 FLOOD 414
## 6 RIP CURRENT 340
## 7 TSTM WIND 241
## 8 HEAT 237
## 9 HIGH WIND 235
## 10 AVALANCHE 223
injuries
## EVTYPE INJURIES
## 1 TORNADO 20667
## 2 FLOOD 6758
## 3 EXCESSIVE HEAT 6391
## 4 LIGHTNING 4141
## 5 TSTM WIND 3629
## 6 FLASH FLOOD 1674
## 7 THUNDERSTORM WIND 1400
## 8 WINTER STORM 1292
## 9 HURRICANE/TYPHOON 1275
## 10 HEAT 1222
Visualization: Fatalities and Injuries Fatalities
ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Fatalities")
Injuries
ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) +
geom_bar(stat = "identity", fill = "red") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Injuries")
Q2: Which Events Have the Greatest Economic Consequences? Transforming Damage Data
maindata$PROPDMGEXP <- gsub("[Hh]", "2", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Kk]", "3", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Mm]", "6", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Bb]", "9", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("\\+", "1", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("\\?|\\-|\\ ", "0", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- as.numeric(maindata$PROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Hh]", "2", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Kk]", "3", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Mm]", "6", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Bb]", "9", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("\\+", "1", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("\\-|\\?|\\ ", "0", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- as.numeric(maindata$CROPDMGEXP)
maindata$PROPDMGEXP[is.na(maindata$PROPDMGEXP)] <- 0
maindata$CROPDMGEXP[is.na(maindata$CROPDMGEXP)] <- 0
maindata <- mutate(maindata,
PROPDMGTOTAL = PROPDMG * (10 ^ PROPDMGEXP),
CROPDMGTOTAL = CROPDMG * (10 ^ CROPDMGEXP))
Summring Economic Losses
Economic_data <- aggregate(cbind(PROPDMGTOTAL, CROPDMGTOTAL) ~ EVTYPE, data = maindata, FUN = sum)
Economic_data$ECONOMIC_LOSS <- Economic_data$PROPDMGTOTAL + Economic_data$CROPDMGTOTAL
Economic_data <- Economic_data[order(Economic_data$ECONOMIC_LOSS, decreasing = TRUE), ]
worsteconomicevents <- Economic_data[1:10, c(1, 4)]
worsteconomicevents
## EVTYPE ECONOMIC_LOSS
## 48 FLOOD 148919611950
## 88 HURRICANE/TYPHOON 71913712800
## 141 STORM SURGE 43193541000
## 149 TORNADO 24900370720
## 66 HAIL 17071172870
## 46 FLASH FLOOD 16557105610
## 86 HURRICANE 14554229010
## 32 DROUGHT 14413667000
## 152 TROPICAL STORM 8320186550
## 83 HIGH WIND 5881421660
Visualization: Economic Loss
ggplot(worsteconomicevents, aes(x = EVTYPE, y = ECONOMIC_LOSS)) +
geom_bar(stat = "identity", fill = "blue") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
xlab("Event Type") + ylab("Total Property & Crop Damages (USD)") +
ggtitle("Total Economic Loss in the US (1996 - 2011)")