Abstract

This project analyses socio-economic consequences of severe weather events in the US. Using the U.S. National Oceanic and Atmospheric Administration (NOAA) storm database I found that Tornados, Extreme Heat and Floods are responsible for most deaths and injuries in the US. In addition, floods and hurricanes carry the countrys economic losses. However, for the agriculture sector drougth and flood events are responsible for most of economic damages.

Data Processing

Downloading and reading the data

setwd("~/R Studio personal projects/Curso Johns Hopkins/Reproducible Research/Week 4")

if(!file.exists("./Projectdata")){dir.create("./Projectdata")}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl,destfile="./Projectdata/ProjectDataset.csv.bz2",method="curl")

datapath <- file.path("./Projectdata")

data<- read.csv(file.path(datapath,"ProjectDataset.csv.bz2" ), sep = ",")

Changing date format and managing the fatalities and injuries data

data$BGN_DATE <- strptime(data$BGN_DATE, format='%m/%d/%Y %H:%M:%S')

data$EVTYPE <- tolower(data$EVTYPE) ##as there are some variabels in CAPS and other in lower case (R doesnt get 'TORNADO' equal to 'Tornado' or 'tornado')

NOAA <- data[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]

fatalities <- aggregate(FATALITIES ~ EVTYPE, data=NOAA, sum)
fatalities <- fatalities[order(-fatalities$FATALITIES),]
fatalities <- fatalities[fatalities$FATALITIES>30, ,drop= FALSE] ##plotting fatalities with more than 30 deaths as there is a lot of events

injuries <- aggregate(INJURIES ~ EVTYPE, data=NOAA, sum)
injuries <- injuries[order(-injuries$INJURIES),]
injuries <- injuries[injuries$INJURIES>100, ,drop= FALSE] ##plotting fatalities with more than 100 deaths as there is a lot of events

Managing the economic damages data

 ##property damamge in millions
NOAA$PROPDMG_in_dollars = 0
NOAA[NOAA$PROPDMGEXP == "H", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "H", ]$PROPDMG * 10^-4
NOAA[NOAA$PROPDMGEXP == "K", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "K", ]$PROPDMG * 10^-3
NOAA[NOAA$PROPDMGEXP == "M", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "M", ]$PROPDMG
NOAA[NOAA$PROPDMGEXP == "B", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "B", ]$PROPDMG * 10^3
## crop damage in millions
NOAA$CROPDMG_in_dollars = 0
NOAA[NOAA$CROPDMGEXP == "H", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "H", ]$CROPDMG * 10^-4
NOAA[NOAA$CROPDMGEXP == "K", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "K", ]$CROPDMG * 10^-3
NOAA[NOAA$CROPDMGEXP == "M", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "M", ]$CROPDMG
NOAA[NOAA$CROPDMGEXP == "B", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "B", ]$CROPDMG * 10^3
##I did this change as economic damages are expressed in Hundreds, Thousands, Millions, and Billions. I chose to use MILLIONS as the unit to measure economic damages


##aggregating data

cropdamage <- aggregate(CROPDMG_in_dollars ~ EVTYPE, data=NOAA, sum)
cropdamage <- cropdamage[order(-cropdamage$CROPDMG_in_dollars),]
cropdamage <- cropdamage[cropdamage$CROPDMG_in_dollars>30, ,drop= FALSE] ##plotting damages valued in more than 30,000,000


propdamage <- aggregate(PROPDMG_in_dollars ~ EVTYPE, data=NOAA, sum)
propdamage <- propdamage[order(-propdamage$PROPDMG_in_dollars),]
propdamage <- propdamage[propdamage$PROPDMG_in_dollars>300, ,drop= FALSE] ##plotting damages valued in more than 300,000,000


alldamage <- merge.data.frame(cropdamage,propdamage, by.x = "EVTYPE")
alldamage[,"alldamages"] <- (alldamage[,2]+alldamage[,3])

alldamage2 <- rbind(data.frame(alldamage[,1], "count" = alldamage[,2], "type"="CROP DAMAGE"),data.frame(alldamage[,1], "count" = alldamage[,3], "type"="PROP DAMAGE"))

alldamage <- alldamage[order(-alldamage$alldamages),]


# alldamage <- alldamage[alldamage$`PROPDMG_in_dollars + CROPDMG_in_dollars`>500, ,drop= FALSE] ##plotting damages valued in more than 500,000,000

Results

Across the United States, which types of events are most harmful with respect to population health?

library(ggplot2)
fatalities_by_disaster <- ggplot(fatalities, aes(x=reorder(EVTYPE,FATALITIES), y=FATALITIES)) +geom_bar(stat = "identity", fill = "navy", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Number of Fatalities by Type of Events") + ylim(0,6000)+coord_flip() 

plot(fatalities_by_disaster)

injuries_by_disaster <- ggplot(injuries, aes(x=reorder(EVTYPE,INJURIES), y=INJURIES)) +geom_bar(stat = "identity", fill = "goldenrod4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Number of Injuries by Type of Events")+coord_flip() 

plot(injuries_by_disaster)

# injuries_by_disaster <- ggplot(allinjuries2, aes(fill=type, x=reorder(allinjuries...1.,count), y=count)) +geom_bar(position="stack",stat = "identity", fill = "goldenrod4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Number of Injuries by Type of Events")+coord_flip() 
# 
# plot(injuries_by_disaster)

Across the United States, which types of events have the greatest economic consequences?

# cropdamage_by_disaster <- ggplot(cropdamage, aes(x=reorder(EVTYPE,CROPDMG_in_dollars), y=CROPDMG_in_dollars)) +geom_bar(stat = "identity", fill = "green4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Crop Damage by Type of Events(in millions of USD)")+coord_flip() 
# 
# plot(cropdamage_by_disaster)
# 
# propdamage_by_disaster <- ggplot(propdamage, aes(x=reorder(EVTYPE,PROPDMG_in_dollars), y=PROPDMG_in_dollars)) +geom_bar(stat = "identity", fill = "thistle4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Property Damage by Type of Events(in millions of USD)")+coord_flip() 
# 
# plot(propdamage_by_disaster)

alldamage_by_disaster <- ggplot(alldamage2, aes(fill=type,x=reorder(alldamage...1.,count), y=count)) +geom_bar(position="stack",stat = "identity", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Damage by Type of Events (in millions of USD)")+coord_flip() 

plot(alldamage_by_disaster)

Conclusions

Tornados, Extreme Heat and Floods are responsible for most deaths and injuries in the US. However, floods and hurricanes carry most of US economic losses. However, for the agriculture sector drougth and flood events are responsible for most of economic damages. This result could be because croplands are less prone to face economic damages from Tornados and Hurricanes as they are not located in coast areas.