Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. In this study, I explored the NOAA Storm data and answer two questions below. 1. Across the United States, which types of events are most harmful with respect to population health? 2. Across the United States, which types of events have the greatest economic consequences?
In this section, I describe (in words and code) how the data were loaded into R and processed for analysis.
Firstly, let’s set up the environment for the R markdown file
knitr::opts_chunk$set(fig.width = 10, fig.height = 10, fig.path = 'Figs/',
echo = TRUE, warning = FALSE, message = FALSE)
library(knitr)
library(ggplot2)
library(gridExtra)
Note, you need to set working directory into the same folder with the data file
if(file.exists("repdata_data_StormData.csv.bz2"))
rawdata <- read.table("repdata_data_StormData.csv.bz2", sep = ",", header = T)
Now with the data, let’s find out what is the most harmful weather event with respect to the total number of fatalities and injuries. In this study, I list the top 10 events.
# Create summary table for total number os Fatalities and Injuries by all evenets
summaryData1 <- aggregate(cbind(FATALITIES, INJURIES) ~ EVTYPE, data = rawdata, FUN = "sum", na.rm = TRUE)
# Create sum columns for FATALITIES and INJURIES
summaryData1$HEALTH <- summaryData1$FATALITIES + summaryData1$INJURIES
# Sort data by three factors and get the first 10 elements
byFat <- summaryData1[order(-summaryData1$FATALITIES),] [1:10,]
byInj <- summaryData1[order(-summaryData1$INJURIES),] [1:10,]
byHea <- summaryData1[order(-summaryData1$HEALTH),] [1:10,]
# Create plosts
fat <- ggplot(byFat, aes(x = reorder(EVTYPE, FATALITIES), y = FATALITIES)) +
geom_bar(stat="identity",fill="steelblue4") +
labs(x = "Event Type", y = "Total Fatalilties Number") +
labs(title = "Top 10 Fatalities by Event Types") +
ylim(c(0,100000)) +
coord_flip()
inj <- ggplot(byInj, aes(x = reorder(EVTYPE, INJURIES), y = INJURIES)) +
geom_bar(stat="identity",fill="tomato1") +
labs(x = "Event Type", y = "Total Injuries Number") +
labs(title = "Top 10 Injuries by Event Types") +
ylim(c(0,100000)) +
coord_flip()
hea <- ggplot(byHea, aes(x = reorder(EVTYPE, HEALTH), y = HEALTH)) +
geom_bar(stat="identity",fill="grey50") +
labs(x = "Event Type", y = "Total Fatalilties and Injuries Number") +
labs(title = "Top 10 Fatalilties and Injuries by Event Types") +
ylim(c(0,100000)) +
coord_flip()
grid.arrange(fat, inj, hea, ncol = 1, nrow = 3)
Then let’s find out what is the most harmful weather event with respect to the property damage and crop damage. In this study, I list the top 10 events.
According to the instruction, the alphabetical characters used to signify magnitude include “K” for thousands, “M” for millions, and “B” for billions.
# Convert the estimated number in column "PROPDMG" and "CROPDMG" into correct number by magnitude
propDamgeMag <- ifelse(rawdata$PROPDMGEXP == "K", 1000, ifelse(rawdata$PROPDMGEXP == "M", 1000000, ifelse(rawdata$PROPDMGEXP == "B", 1000000000, 1)))
rawdata$REAL_PROPDMG <- rawdata$PROPDMG * propDamgeMag
cropDamgeMag <- ifelse(rawdata$CROPDMGEXP == "K", 1000, ifelse(rawdata$CROPDMGEXP == "M", 1000000, ifelse(rawdata$CROPDMGEXP == "B", 1000000000, 1)))
rawdata$REAL_CROPDMG <- rawdata$CROPDMG * cropDamgeMag
# Create summary table for the estimated damages
summaryData2 <- aggregate(cbind(REAL_PROPDMG, REAL_CROPDMG) ~ EVTYPE, data = rawdata, FUN = "sum", na.rm = TRUE)
# Create sum columns for REAL_PROPDMG and REAL_CROPDMG
summaryData2$ECOM <- summaryData2$REAL_PROPDMG + summaryData2$REAL_CROPDMG
# Sort data by three factors and get the first 10 elements
byProd <- summaryData2[order(-summaryData2$REAL_PROPDMG),] [1:10,]
byCrop <- summaryData2[order(-summaryData2$REAL_CROPDMG),] [1:10,]
byEcom <- summaryData2[order(-summaryData2$ECOM),] [1:10,]
# Create plosts
prod <- ggplot(byProd, aes(x = reorder(EVTYPE, REAL_PROPDMG), y = REAL_PROPDMG)) +
geom_bar(stat="identity",fill="steelblue4") +
labs(x = "Event Type", y = "Total Property Damage") +
labs(title = "Top 10 Property Damage by Event Types") +
ylim(c(0,151000000000)) +
coord_flip()
crop <- ggplot(byCrop, aes(x = reorder(EVTYPE, REAL_CROPDMG), y = REAL_CROPDMG)) +
geom_bar(stat="identity",fill="tomato1") +
labs(x = "Event Type", y = "Total Crop Damage") +
labs(title = "Top 10 Crop Damage by Event Types") +
ylim(c(0,151000000000)) +
coord_flip()
ecom <- ggplot(byEcom, aes(x = reorder(EVTYPE, ECOM), y = ECOM)) +
geom_bar(stat="identity",fill="grey50") +
labs(x = "Event Type", y = "Total Property and Crop Damage") +
labs(title = "Top 10 Property and Crop Damage by Event Types") +
ylim(c(0,151000000000)) +
coord_flip()
grid.arrange(prod, crop, ecom, ncol = 1, nrow = 3)