This project analyses socio-economic consequences of severe weather events in the US. Using the U.S. National Oceanic and Atmospheric Administration (NOAA) storm database I found that Tornados, Extreme Heat and Floods are responsible for most deaths and injuries in the US. In addition, floods and hurricanes carry the countrys economic losses. However, for the agriculture sector drougth and flood events are responsible for most of economic damages.
Downloading and reading the data
setwd("~/R Studio personal projects/Curso Johns Hopkins/Reproducible Research/Week 4")
if(!file.exists("./Projectdata")){dir.create("./Projectdata")}
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl,destfile="./Projectdata/ProjectDataset.csv.bz2",method="curl")
datapath <- file.path("./Projectdata")
data<- read.csv(file.path(datapath,"ProjectDataset.csv.bz2" ), sep = ",")
Changing date format and managing the fatalities and injuries data
data$BGN_DATE <- strptime(data$BGN_DATE, format='%m/%d/%Y %H:%M:%S')
data$EVTYPE <- tolower(data$EVTYPE) ##as there are some variabels in CAPS and other in lower case (R doesnt get 'TORNADO' equal to 'Tornado' or 'tornado')
NOAA <- data[,c('EVTYPE','FATALITIES','INJURIES', 'PROPDMG', 'PROPDMGEXP', 'CROPDMG', 'CROPDMGEXP')]
fatalities <- aggregate(FATALITIES ~ EVTYPE, data=NOAA, sum)
fatalities <- fatalities[order(-fatalities$FATALITIES),]
fatalities <- fatalities[fatalities$FATALITIES>30, ,drop= FALSE] ##plotting fatalities with more than 30 deaths as there is a lot of events
injuries <- aggregate(INJURIES ~ EVTYPE, data=NOAA, sum)
injuries <- injuries[order(-injuries$INJURIES),]
injuries <- injuries[injuries$INJURIES>100, ,drop= FALSE] ##plotting fatalities with more than 100 deaths as there is a lot of events
Managing the economic damages data
##property damamge in millions
NOAA$PROPDMG_in_dollars = 0
NOAA[NOAA$PROPDMGEXP == "H", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "H", ]$PROPDMG * 10^-4
NOAA[NOAA$PROPDMGEXP == "K", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "K", ]$PROPDMG * 10^-3
NOAA[NOAA$PROPDMGEXP == "M", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "M", ]$PROPDMG
NOAA[NOAA$PROPDMGEXP == "B", ]$PROPDMG_in_dollars = NOAA[NOAA$PROPDMGEXP == "B", ]$PROPDMG * 10^3
## crop damage in millions
NOAA$CROPDMG_in_dollars = 0
NOAA[NOAA$CROPDMGEXP == "H", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "H", ]$CROPDMG * 10^-4
NOAA[NOAA$CROPDMGEXP == "K", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "K", ]$CROPDMG * 10^-3
NOAA[NOAA$CROPDMGEXP == "M", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "M", ]$CROPDMG
NOAA[NOAA$CROPDMGEXP == "B", ]$CROPDMG_in_dollars = NOAA[NOAA$CROPDMGEXP == "B", ]$CROPDMG * 10^3
##I did this change as economic damages are expressed in Hundreds, Thousands, Millions, and Billions. I chose to use MILLIONS as the unit to measure economic damages
##aggregating data
cropdamage <- aggregate(CROPDMG_in_dollars ~ EVTYPE, data=NOAA, sum)
cropdamage <- cropdamage[order(-cropdamage$CROPDMG_in_dollars),]
cropdamage <- cropdamage[cropdamage$CROPDMG_in_dollars>30, ,drop= FALSE] ##plotting damages valued in more than 30,000,000
propdamage <- aggregate(PROPDMG_in_dollars ~ EVTYPE, data=NOAA, sum)
propdamage <- propdamage[order(-propdamage$PROPDMG_in_dollars),]
propdamage <- propdamage[propdamage$PROPDMG_in_dollars>300, ,drop= FALSE] ##plotting damages valued in more than 300,000,000
alldamage <- merge.data.frame(cropdamage,propdamage, by.x = "EVTYPE")
alldamage[,"alldamages"] <- (alldamage[,2]+alldamage[,3])
alldamage2 <- rbind(data.frame(alldamage[,1], "count" = alldamage[,2], "type"="CROP DAMAGE"),data.frame(alldamage[,1], "count" = alldamage[,3], "type"="PROP DAMAGE"))
alldamage <- alldamage[order(-alldamage$alldamages),]
# alldamage <- alldamage[alldamage$`PROPDMG_in_dollars + CROPDMG_in_dollars`>500, ,drop= FALSE] ##plotting damages valued in more than 500,000,000
library(ggplot2)
fatalities_by_disaster <- ggplot(fatalities, aes(x=reorder(EVTYPE,FATALITIES), y=FATALITIES)) +geom_bar(stat = "identity", fill = "navy", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Number of Fatalities by Type of Events") + ylim(0,6000)+coord_flip()
plot(fatalities_by_disaster)
injuries_by_disaster <- ggplot(injuries, aes(x=reorder(EVTYPE,INJURIES), y=INJURIES)) +geom_bar(stat = "identity", fill = "goldenrod4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Number of Injuries by Type of Events")+coord_flip()
plot(injuries_by_disaster)
# injuries_by_disaster <- ggplot(allinjuries2, aes(fill=type, x=reorder(allinjuries...1.,count), y=count)) +geom_bar(position="stack",stat = "identity", fill = "goldenrod4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Number of Injuries by Type of Events")+coord_flip()
#
# plot(injuries_by_disaster)
# cropdamage_by_disaster <- ggplot(cropdamage, aes(x=reorder(EVTYPE,CROPDMG_in_dollars), y=CROPDMG_in_dollars)) +geom_bar(stat = "identity", fill = "green4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Crop Damage by Type of Events(in millions of USD)")+coord_flip()
#
# plot(cropdamage_by_disaster)
#
# propdamage_by_disaster <- ggplot(propdamage, aes(x=reorder(EVTYPE,PROPDMG_in_dollars), y=PROPDMG_in_dollars)) +geom_bar(stat = "identity", fill = "thistle4", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Property Damage by Type of Events(in millions of USD)")+coord_flip()
#
# plot(propdamage_by_disaster)
alldamage_by_disaster <- ggplot(alldamage2, aes(fill=type,x=reorder(alldamage...1.,count), y=count)) +geom_bar(position="stack",stat = "identity", width = NULL) +xlab("Disaster Type") + ylab("Fatalities") + ggtitle("Damage by Type of Events (in millions of USD)")+coord_flip()
plot(alldamage_by_disaster)
Tornados, Extreme Heat and Floods are responsible for most deaths and injuries in the US. However, floods and hurricanes carry most of US economic losses. However, for the agriculture sector drougth and flood events are responsible for most of economic damages. This result could be because croplands are less prone to face economic damages from Tornados and Hurricanes as they are not located in coast areas.