In this analysis we examine the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. The goal is to describe based on the historical data the most influencial events types deaths, injuries and total damage in dollars.
The explored dataset describes the Weather events in different aspects for the period 1950 to 2011. In the study we extract the relevant data fields, aggregate the data summing up the deaths, injured people and total damage in dollar for property and crop. There are three histograms visualising the total deaths, injures and total damage over the period by weather event type.
As this could be seen, the highest impact to deaths and injuries is tornado. The highest economical impact is created by floods.
library(ggplot2)
library(reshape)
In this section we download the data and apply neccessary data processing steps. Data is downloaded from here Storm Data. The documentation of the data is accessible here National Weather Service Storm Data Documentation
Downloading the data
if(!file.exists("repdata_data_StormData.csv.bz2")){
url <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(url, destfile = "repdata_data_StormData.csv.bz2",method = "curl")
}
Reading the downloaded data if RDS fromat does not exist
if(!file.exists("repdata_data_StormData.RDS")){
# Read the data
data <- read.csv(file = "repdata_data_StormData.csv.bz2",header = TRUE,sep=",")
# Save to RDS if not exist
saveRDS(object = data,file = "repdata_data_StormData.RDS")
}
Storing the data in RDS format for quicker reading
data <- readRDS(file = "repdata_data_StormData.RDS")
dim(data)
## [1] 902297 37
Subset the data for relevant events
data <- data[,c("EVTYPE","FATALITIES","INJURIES","PROPDMG",
"PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
Creating factor variables for event type
data$EVTYPE <- as.factor(data$EVTYPE)
fatalities_agr <- aggregate(data$FATALITIES, list(data$EVTYPE), sum)
injuries_agr <- aggregate(data$INJURIES, list(data$EVTYPE), sum)
names(fatalities_agr) <- c("Event", "Count")
names(injuries_agr) <- c("Event", "Count")
eventsdisasters <- cbind(fatalities_agr,injuries_agr[,2])
names(eventsdisasters) <- c("Event_type","Fatailities_count","Injuries_count")
Showing 5 most impactful events types by deaths and injuries
eventsdisasters <- eventsdisasters[order(-eventsdisasters$Fatailities_count,
-eventsdisasters$Injuries_count),]
head(eventsdisasters)
## Event_type Fatailities_count Injuries_count
## 834 TORNADO 5633 91346
## 130 EXCESSIVE HEAT 1903 6525
## 153 FLASH FLOOD 978 1777
## 275 HEAT 937 2100
## 464 LIGHTNING 816 5230
## 856 TSTM WIND 504 6957
In this section we review the economical damage, adopting appropirate impact value units.
unitscale <- function(coef, scale){
if (is.na(scale)){
as.numeric(coef)
}
else if (scale== "K"){
as.numeric(coef)*10^3
}
else if (scale== "k"){
as.numeric(coef)*10^3
}
else if (scale == "M"){
as.numeric(coef)*10^6
}
else if (scale == "m"){
as.numeric(coef)*10^6
}
else if (scale== "B"){
as.numeric(coef)*10^9
}
else if (scale== "?"){
as.numeric(coef)*1
}
else{
as.numeric(coef)
}
}
data$property_damage <- apply(data[, c('PROPDMG', 'PROPDMGEXP')], 1,
function(x) unitscale(x['PROPDMG'], x['PROPDMGEXP']))
data$crop_damage <- apply(data[, c('CROPDMG', 'CROPDMGEXP')], 1,
function(x) unitscale(x['CROPDMG'], x['CROPDMGEXP']))
Aggregating damage values in dollars
propoerty_damage_agr <- aggregate(data$property_damage, list(data$EVTYPE), sum)
crop_damage_agr <- aggregate(data$crop_damage, list(data$EVTYPE), sum)
names(propoerty_damage_agr) <- c("Event", "Property_damage_value")
names(crop_damage_agr) <- c("Event", "Crop_damage_value")
Creating summary dataset and showing 5 highest impact event types
eventsdamage_value <- cbind(propoerty_damage_agr,crop_damage_agr[,2])
names(eventsdamage_value) <- c("Event_type","Property_damage_value","Crop_damage_value")
eventsdamage_value$Total_damage_value <- eventsdamage_value$Property_damage_value +
eventsdamage_value$Crop_damage_value
eventsdamage_value <- eventsdamage_value[order(-eventsdamage_value$Total_damage_value),]
head(eventsdamage_value)
## Event_type Property_damage_value Crop_damage_value
## 170 FLOOD 144657709807 5661968450
## 411 HURRICANE/TYPHOON 69305840000 2607872800
## 834 TORNADO 56937160779 414953270
## 670 STORM SURGE 43323536000 5000
## 244 HAIL 15732267048 3025954473
## 153 FLASH FLOOD 16140812067 1421317100
## Total_damage_value
## 170 150319678257
## 411 71913712800
## 834 57352114049
## 670 43323541000
## 244 18758221521
## 153 17562129167
In this section we describe 3 historgrams with the highest impact weather event types in corresponding categories.
eventsdisasters$events_order_by_injuries_count <- reorder(eventsdisasters$Event_type,
eventsdisasters$Injuries_count)
p1 <- ggplot(data = eventsdisasters[1:10,], aes(x = events_order_by_injuries_count,
y = Injuries_count))
p1 + geom_bar(stat ='identity') +
coord_flip() +
labs(title = '10 Highest impact types of weather\n measured by injuries 1950 - 2011',
y = 'Number of People', x = 'Weather event type')
As this could be clearly seen, the tornadoes are the highest impact for generating injuries for people.
eventsdisasters$events_order_by_deaths_count <- reorder(eventsdisasters$Event_type,
eventsdisasters$Fatailities_count)
p2 <- ggplot(data = eventsdisasters[1:10,], aes(x = events_order_by_deaths_count,
y = Fatailities_count))
p2 + geom_bar(stat ='identity') +
coord_flip() +
labs(title = '10 Highest impact types of weather\n measured by deaths 1950 - 2011',
y = 'Number of People', x = 'Weather event type')
As this could be clearly seen, the tornadoes are the highest impact for generating deaths of people.
eventsdamage_value$events_order_by_damage <- reorder(eventsdamage_value$Event_type,
eventsdamage_value$Total_damage_value)
p3 <- ggplot(data = eventsdamage_value[1:10,], aes(x = events_order_by_damage,
y = Total_damage_value/10^9))
p3 + geom_bar(stat ='identity') +
coord_flip() +
labs(title = '10 Highest damage events types of weather\n measured by total property and crop damage 1950 - 2011',
y = 'Damage in Billion dollars', x = 'Weather event type')