Conduction of this analysis was a part of Coursera Reproducible Research(Assignment 2), this course is a part of Data Science Specialization. This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s NOAA Storm Database and its consequences on both population health and the economy. The data analyzed tracked characteristics of significant storms and weather events in the United States covered between the years 1950 and November 2011. In the earlier years of the database, there are generally fewer events recorded, most likely due to a lack of proper records. More recent years should be considered complete.
This analysis investigates the top severe weather events that were most harmful to the population health in terms of fatalities and injuries. In addition, the economic consequence was analyzed by exploring financial damages on properties and crops.
Here are results of the top severe weather events that cause the most damages:
Download the data from the link provided above. Unzips the data if data has not been downloaded to the local computer.
library(R.utils)
if(!file.exists("./data")){dir.create("./data")}
url <-("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2")
filepath <- "./data/StormData.csv.bz2"
download.file (url, filepath)
if(!file.exists("./data/StormData.csv"))
{bunzip2("./data/StormData.csv.bz2", "./data/StormData.csv")}
All the required libraries are loaded.
library(ggplot2)
library(dplyr)
library(gridExtra)
library(formattable)
Read the data and assign it to the data frame.
data <- read.csv("./data/StormData.csv")
Not all the variables are required for analysis so we have to select only the required variables.
storm_data <- select(data, c("EVTYPE","FATALITIES","INJURIES","PROPDMG", "PROPDMGEXP","CROPDMG","CROPDMGEXP"))
Arrange the fatalities and take a sum by the event type. This provides us the sum of fatalities caused by different events.
Fatalities <- aggregate(FATALITIES~EVTYPE, data=storm_data, sum)
top10_fatalities<- Fatalities %>% arrange(desc(FATALITIES)) %>%
top_n(10)
Arrange the injuries and take a sum by the event type. This provides us the sum of injuries caused by different events.
Injuries <- aggregate(INJURIES~EVTYPE, data=storm_data, sum)
top10_injuries<- Injuries %>% arrange(desc(INJURIES)) %>%
top_n(10)
Property Damage Exponent values in the dataset is assigned as symbols of “SI Units” which needs to be identified.
unique(storm_data$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
Numerical values are assigned to each unique symbols based on their “SI Units” . Wikipedia Power of 10
# Assigning values for the property exponent strmdata
storm_data$PROPEXP[storm_data$PROPDMGEXP == "M"] <- 1e+06
storm_data$PROPEXP[storm_data$PROPDMGEXP == ""] <- 1
storm_data$PROPEXP[storm_data$PROPDMGEXP == "B"] <- 1e+09
storm_data$PROPEXP[storm_data$PROPDMGEXP == "m"] <- 1e+06
storm_data$PROPEXP[storm_data$PROPDMGEXP == "0"] <- 1
storm_data$PROPEXP[storm_data$PROPDMGEXP == "5"] <- 1e+05
storm_data$PROPEXP[storm_data$PROPDMGEXP == "6"] <- 1e+06
storm_data$PROPEXP[storm_data$PROPDMGEXP == "4"] <- 10000
storm_data$PROPEXP[storm_data$PROPDMGEXP == "2"] <- 100
storm_data$PROPEXP[storm_data$PROPDMGEXP == "3"] <- 1000
storm_data$PROPEXP[storm_data$PROPDMGEXP == "h"] <- 100
storm_data$PROPEXP[storm_data$PROPDMGEXP == "7"] <- 1e+07
storm_data$PROPEXP[storm_data$PROPDMGEXP == "H"] <- 100
storm_data$PROPEXP[storm_data$PROPDMGEXP == "1"] <- 10
storm_data$PROPEXP[storm_data$PROPDMGEXP == "8"] <- 1e+08
# Assigning '0' to invalid exponent strmdata
storm_data$PROPEXP[storm_data$PROPDMGEXP %in% c("+", "-", "?", "")] <- 0
Property damage value is a product of variables PROPDMG and PROPEXP
storm_data$PROPDMGVAL <- storm_data$PROPDMG * storm_data$PROPEXP
Crop Damage Exponent values in the dataset is assigned as symbols of “SI Units” which needs to be identified.
unique(storm_data$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
Numerical values are assigned to each unique symbols based on their “SI Units” . Wikipedia Power of 10
# Assigning values for the crop exponent strmdata
storm_data$CROPEXP[storm_data$CROPDMGEXP == "M"] <- 1e+06
storm_data$CROPEXP[storm_data$CROPDMGEXP == "K"] <- 1000
storm_data$CROPEXP[storm_data$CROPDMGEXP == "m"] <- 1e+06
storm_data$CROPEXP[storm_data$CROPDMGEXP == "B"] <- 1e+09
storm_data$CROPEXP[storm_data$CROPDMGEXP == "0"] <- 1
storm_data$CROPEXP[storm_data$CROPDMGEXP == "k"] <- 1000
storm_data$CROPEXP[storm_data$CROPDMGEXP == "2"] <- 100
storm_data$CROPEXP[storm_data$CROPDMGEXP == ""] <- 1
# Assigning '0' to invalid exponent strmdata
storm_data$CROPEXP[storm_data$CROPDMGEXP %in% c(""," ?")] <- 0
Crop damage value is a product of variables CROPDMG and CROPEXP
# calculating the crop damage
storm_data$CROPDMGVAL <- storm_data$CROPDMG * storm_data$CROPEXP
Arrange the property damages and take a sum by the event type. This provides us the sum of property damages in USD caused by different events.
prop <- aggregate(PROPDMGVAL~EVTYPE,data=storm_data,FUN=sum,na.rm=TRUE)
top10_prop<- prop %>% arrange(desc(PROPDMGVAL)) %>%
top_n(10)
Arrange the crop damages and take a sum by the event type. This provides us the sum of crop damages in USD caused by different events.
crop <- aggregate(CROPDMGVAL~EVTYPE,data=storm_data,FUN=sum,na.rm=TRUE)
top10_crop<- crop %>% arrange(desc(CROPDMGVAL)) %>%
top_n(10)
formattable(top10_fatalities)
| EVTYPE | FATALITIES |
|---|---|
| TORNADO | 5633 |
| EXCESSIVE HEAT | 1903 |
| FLASH FLOOD | 978 |
| HEAT | 937 |
| LIGHTNING | 816 |
| TSTM WIND | 504 |
| FLOOD | 470 |
| RIP CURRENT | 368 |
| HIGH WIND | 248 |
| AVALANCHE | 224 |
formattable(top10_injuries)
| EVTYPE | INJURIES |
|---|---|
| TORNADO | 91346 |
| TSTM WIND | 6957 |
| FLOOD | 6789 |
| EXCESSIVE HEAT | 6525 |
| LIGHTNING | 5230 |
| HEAT | 2100 |
| ICE STORM | 1975 |
| FLASH FLOOD | 1777 |
| THUNDERSTORM WIND | 1488 |
| HAIL | 1361 |
b1 <- ggplot(top10_fatalities, aes(x = reorder(EVTYPE, FATALITIES), FATALITIES, theme_set(theme_classic()))) +
geom_bar(stat = "identity", fill = "cyan4") +
theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 10)) +
xlab("Event Type") + ylab("Fatalities") + ggtitle("Total Fatalities by Top 10 Weather Events") +
theme(plot.title = element_text(size = 10)) + coord_flip()
b2 <- ggplot(top10_injuries, aes(x = reorder(EVTYPE, INJURIES), INJURIES, theme_set(theme_classic()))) +
geom_bar(stat = "identity", fill ="darkcyan") +
theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 10))+
xlab("Event Type") + ylab("Injuries") + ggtitle("Total Injuries by top 10 Weather Events") +
theme(plot.title = element_text(size = 10)) + coord_flip()
grid.arrange(b1, b2, nrow = 2, top = "Polulation health as a result of the most harmful events")
#### 3.2.1 Top 10 Property Damages
formattable(top10_prop)
| EVTYPE | PROPDMGVAL |
|---|---|
| FLOOD | 143779180000 |
| HURRICANE/TYPHOON | 69303870000 |
| TORNADO | 53783900134 |
| STORM SURGE | 43304930000 |
| FLASH FLOOD | 15416842262 |
| HAIL | 15060160736 |
| HURRICANE | 11858970000 |
| TROPICAL STORM | 7657980000 |
| WINTER STORM | 6557340001 |
| RIVER FLOOD | 5105200000 |
formattable(top10_crop)
| EVTYPE | CROPDMGVAL |
|---|---|
| DROUGHT | 13972566000 |
| FLOOD | 5661968450 |
| RIVER FLOOD | 5029459000 |
| ICE STORM | 5022113500 |
| HAIL | 3025954470 |
| HURRICANE | 2741910000 |
| HURRICANE/TYPHOON | 2607872800 |
| FLASH FLOOD | 1421317100 |
| EXTREME COLD | 1292973000 |
| FROST/FREEZE | 1094086000 |
h3 <- ggplot(top10_prop, aes(x = reorder(EVTYPE, PROPDMGVAL), PROPDMGVAL, theme_set(theme_classic()))) +
geom_bar(stat = "identity", fill = "seagreen1") +
theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 10)) +
xlab("Event Type") + ylab("Total Damage (USD)") + ggtitle("Total Property Damage by top 10 Weather Events") +
theme(plot.title = element_text(size = 10)) + coord_flip()
h4 <- ggplot(top10_crop, aes(x = reorder(EVTYPE, CROPDMGVAL), CROPDMGVAL, theme_set(theme_classic()))) +
geom_bar(stat = "identity", fill ="seagreen3") +
theme(axis.text.x = element_text(angle = 0, hjust = 1, size = 10)) +
xlab("Event Type") + ylab("Total Damage (USD)") + ggtitle("Total Crop Damage by top 10 Weather Events") +
theme(plot.title = element_text(size = 10)) + coord_flip()
grid.arrange(h3, h4, nrow = 2,as.table=TRUE, top = "Economic damage as a result of the most harmful events", padding = unit(0.5, "line"))