(— title: ““Weather Event Impact Analysis”” author: “Asmaa Hassan” date: “2024-12-25” output: html_document —

Introduction

This analysis aims to assess the impact of different weather events in the United States from 1996 to 2011. We investigate two main questions: 1. Which weather events are most harmful to population health? 2. Which weather events have the greatest economic consequences?

Data Processing

1. Loading Necessary Packages

library(ggplot2)
library(dplyr)
install.packages("gdtools")
install.packages("plyr")
install.packages("flextable")
library(plyr)
library(flextable)
  1. Reading and Transforming the Data We load the NOAA storm event dataset and focus on data from January 1996 onwards, as this is when all event types started being recorded.
stormfile <- "/home/asmae/Downloads/repdata_data_StormData.csv.bz2"
rawdata <- read.csv(file = stormfile, header = TRUE, sep = ",")
rawdata$BGN_DATE <- strptime(rawdata$BGN_DATE, "%m/%d/%Y %H:%M:%S")
maindata <- subset(rawdata, BGN_DATE > "1995-12-31")
rm(rawdata)

maindata <- subset(maindata, select = c(EVTYPE, FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG, CROPDMGEXP))
maindata$EVTYPE <- toupper(maindata$EVTYPE)

maindata <- maindata[maindata$FATALITIES != 0 | 
                       maindata$INJURIES != 0 | 
                       maindata$PROPDMG != 0 | 
                       maindata$CROPDMG != 0, ]

Results Q1: Which Events Are Most Harmful to Population Health? Summing Fatalities and Injuries

fatalities <- aggregate(FATALITIES ~ EVTYPE, data = maindata, sum)
injuries <- aggregate(INJURIES ~ EVTYPE, data = maindata, sum)

fatalities <- arrange(fatalities, desc(FATALITIES), EVTYPE)[1:10,]
injuries <- arrange(injuries, desc(INJURIES), EVTYPE)[1:10,]

Top 10 Events by Fatalities&Top 10 Events by Injuries

fatalities
##            EVTYPE FATALITIES
## 1  EXCESSIVE HEAT       1797
## 2         TORNADO       1511
## 3     FLASH FLOOD        887
## 4       LIGHTNING        651
## 5           FLOOD        414
## 6     RIP CURRENT        340
## 7       TSTM WIND        241
## 8            HEAT        237
## 9       HIGH WIND        235
## 10      AVALANCHE        223
injuries
##               EVTYPE INJURIES
## 1            TORNADO    20667
## 2              FLOOD     6758
## 3     EXCESSIVE HEAT     6391
## 4          LIGHTNING     4141
## 5          TSTM WIND     3629
## 6        FLASH FLOOD     1674
## 7  THUNDERSTORM WIND     1400
## 8       WINTER STORM     1292
## 9  HURRICANE/TYPHOON     1275
## 10              HEAT     1222

Visualization: Fatalities and Injuries Fatalities

ggplot(fatalities, aes(x = EVTYPE, y = FATALITIES)) + 
  geom_bar(stat = "identity", fill = "red") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("Event Type") + ylab("Fatalities")

Injuries

ggplot(injuries, aes(x = EVTYPE, y = INJURIES)) + 
  geom_bar(stat = "identity", fill = "red") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) +
  xlab("Event Type") + ylab("Injuries")

Q2: Which Events Have the Greatest Economic Consequences? Transforming Damage Data

maindata$PROPDMGEXP <- gsub("[Hh]", "2", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Kk]", "3", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Mm]", "6", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("[Bb]", "9", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("\\+", "1", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- gsub("\\?|\\-|\\ ", "0", maindata$PROPDMGEXP)
maindata$PROPDMGEXP <- as.numeric(maindata$PROPDMGEXP)

maindata$CROPDMGEXP <- gsub("[Hh]", "2", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Kk]", "3", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Mm]", "6", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("[Bb]", "9", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("\\+", "1", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- gsub("\\-|\\?|\\ ", "0", maindata$CROPDMGEXP)
maindata$CROPDMGEXP <- as.numeric(maindata$CROPDMGEXP)

maindata$PROPDMGEXP[is.na(maindata$PROPDMGEXP)] <- 0
maindata$CROPDMGEXP[is.na(maindata$CROPDMGEXP)] <- 0

maindata <- mutate(maindata, 
                   PROPDMGTOTAL = PROPDMG * (10 ^ PROPDMGEXP), 
                   CROPDMGTOTAL = CROPDMG * (10 ^ CROPDMGEXP))

Summring Economic Losses

Economic_data <- aggregate(cbind(PROPDMGTOTAL, CROPDMGTOTAL) ~ EVTYPE, data = maindata, FUN = sum)
Economic_data$ECONOMIC_LOSS <- Economic_data$PROPDMGTOTAL + Economic_data$CROPDMGTOTAL
Economic_data <- Economic_data[order(Economic_data$ECONOMIC_LOSS, decreasing = TRUE), ]
worsteconomicevents <- Economic_data[1:10, c(1, 4)]
worsteconomicevents
##                EVTYPE ECONOMIC_LOSS
## 48              FLOOD  148919611950
## 88  HURRICANE/TYPHOON   71913712800
## 141       STORM SURGE   43193541000
## 149           TORNADO   24900370720
## 66               HAIL   17071172870
## 46        FLASH FLOOD   16557105610
## 86          HURRICANE   14554229010
## 32            DROUGHT   14413667000
## 152    TROPICAL STORM    8320186550
## 83          HIGH WIND    5881421660

Visualization: Economic Loss

ggplot(worsteconomicevents, aes(x = EVTYPE, y = ECONOMIC_LOSS)) + 
  geom_bar(stat = "identity", fill = "blue") + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) + 
  xlab("Event Type") + ylab("Total Property & Crop Damages (USD)") +
  ggtitle("Total Economic Loss in the US (1996 - 2011)")