In this report we aim to describe the weather events that have resulted in the greatest health (Part1) and economic consequences (Part2).The events in the NOAA Storm Database start in the year 1950 and end in November 2011. Using this Database, we identified “INJURIES” and “FATALITIES” as a measure for Health consequences and “CROPDMG x CROPDMGEXP” and “PROPDMG x PROPDMGEXP” as a measure of Economic consequences.
# Data Processing
temp <- tempfile()
download.file("http://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
temp)
raw_data <- read.csv(bzfile(temp, "repdata-data-StormData.csv"))
unlink(temp)
# initialise an empty vector which eventually will contain the sum of INJURIES and FATALITIES across each # row in the dataset
INJURIES_and_FATALITIES <- c()
total_damages <- c()
# creating a hash map that stores the value of CROPDMGEXP
crop_value_map <- c(0, 0, 0, 1e+09, 1000, 1000, 1e+06, 1e+06)
names(crop_value_map) <- levels(raw_data$CROPDMGEXP)[-1]
prop_value_map <- c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1e+09, 100, 100, 1000,
1e+06, 1e+06)
names(prop_value_map) <- levels(raw_data$PROPDMGEXP)[-1]
# subsetting the data to only relevant columns
relevant_columns <- c("EVTYPE", "FATALITIES", "INJURIES")
temp_data <- match(names(raw_data), relevant_columns)
indices <- which(!is.na(temp_data))
data <- raw_data[, indices]
# subsetting the data to only relevant rows
data <- data[data$FATALITIES != 0 | data$INJURIES != 0, ]
# Looping through all the elements of the dataset to populate INJURIES_and_FATALITIES
for (i in 1:nrow(data)) {
INJURIES_and_FATALITIES <- c(INJURIES_and_FATALITIES, data$FATALITIES[i] +
data$INJURIES[i])
}
# Adding a new column to the data frame containing the total number of INJURIES and FATALITIES
data <- cbind(data, INJURIES_and_FATALITIES)
# split the data based on EVTYPE
data_split <- split(data, data$EVTYPE)
# creating an empty event_data_frame
event_data_frame = data.frame()
for (i in 1:length(data_split)) {
sum_of_all_injuries_and_fatalities_per_event <-
colSums(data.frame(data_split[[i]]$INJURIES_and_FATALITIES))
event_name <- data_split[[i]]$EVTYPE[1]
temp_data_frame <- data.frame(event_name, sum_of_all_injuries_and_fatalities_per_event)
event_data_frame <- rbind(event_data_frame, temp_data_frame)
}
names(event_data_frame) <- c("EVTYPE", "sum_of_all_injuries_and_fatalities_per_event")
# Picking the top_five_health_hazard_event
sorted_event_data_frame <-
event_data_frame[order(event_data_frame$sum_of_all_injuries_and_fatalities_per_event,
decreasing = TRUE), ]
# The top_five_health_hazard_event are:
sorted_event_data_frame$EVTYPE[1]
## [1] TORNADO
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame$EVTYPE[2]
## [1] EXCESSIVE HEAT
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame$EVTYPE[3]
## [1] TSTM WIND
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame$EVTYPE[4]
## [1] FLOOD
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame$EVTYPE[5]
## [1] LIGHTNING
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
# Plotting the results obtained
barplot(sorted_event_data_frame$sum_of_all_injuries_and_fatalities_per_event[1:5]/1000,
main = "Total number of injuries and fatalities for top five events", xlab = "Event Types",
ylab = "Total number injuries and fatalities/1000", col = rainbow(5), names.arg =
sorted_event_data_frame$EVTYPE[1:5],
space = c(12, 1))
# subsetting the data to only relevant columns
relevant_columns <- c("EVTYPE", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")
temp_data <- match(names(raw_data), relevant_columns)
indices <- which(!is.na(temp_data))
raw_data <- raw_data[, indices]
# subsetting the data to only relevant rows
raw_data <- raw_data[raw_data$PROPDMG != 0 | raw_data$CROPDMG != 0, ]
for (i in 1:nrow(raw_data)) {
if (length(which(raw_data$CROPDMGEXP[i] %in% names(crop_value_map)))) {
c_value <- crop_value_map[as.character(raw_data$CROPDMGEXP[i])]
} else {
c_value <- 0
}
if (length(which(raw_data$PROPDMGEXP[i] %in% names(prop_value_map)))) {
p_value <- prop_value_map[as.character(raw_data$PROPDMGEXP[i])]
} else {
p_value <- 0
}
temp <- raw_data$CROPDMG[i] * c_value + raw_data$PROPDMG[i] * p_value
total_damages <- c(total_damages, temp)
}
## Warning: closing unused connection 5 (C:\Users\ppandey2\AppData\Local\Temp
## \Rtmp2TZkvt\filef0832845a83)
# Adding a new column to the data frame containing the total damages from crop and property
raw_data <- cbind(raw_data, total_damages)
# split the data based on EVTYPE
data_split <- split(raw_data, raw_data$EVTYPE)
# creating an empty event_data_frame
event_data_frame = data.frame()
for (i in 1:length(data_split)) {
sum_of_all_property_and_crop_damage_per_event <- colSums(data.frame(data_split[[i]]$total_damages))
event_name <- data_split[[i]]$EVTYPE[1]
temp_data_frame <- data.frame(event_name, sum_of_all_property_and_crop_damage_per_event)
event_data_frame <- rbind(event_data_frame, temp_data_frame)
}
names(event_data_frame) <- c("EVTYPE", "sum_of_all_property_and_crop_damage_per_event")
# Picking the top_five_event in terms of economic consequences
sorted_event_data_frame_damages <-
event_data_frame[order(event_data_frame$sum_of_all_property_and_crop_damage_per_event,
decreasing = TRUE), ]
# The top_five event in terms of economic consequences are:
sorted_event_data_frame_damages$EVTYPE[1]
## [1] FLOOD
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame_damages$EVTYPE[2]
## [1] HURRICANE/TYPHOON
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame_damages$EVTYPE[3]
## [1] TORNADO
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame_damages$EVTYPE[4]
## [1] STORM SURGE
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
sorted_event_data_frame_damages$EVTYPE[5]
## [1] HAIL
## 985 Levels: HIGH SURF ADVISORY COASTAL FLOOD ... WND
# Plotting the results obtained
barplot(sorted_event_data_frame_damages$sum_of_all_property_and_crop_damage_per_event[1:5]/1e+06,
main = "Total economic damages for top five events", xlab = "Event Types",
ylab = "Total Economic damages in terms of crop and property/1000000", col = rainbow(5),
names.arg = sorted_event_data_frame_damages$EVTYPE[1:5], space = c(8, 1))
We identified “INJURIES” and “FATALITIES” as a measure for Health consequences and “CROPDMG x CROPDMGEXP” and “PROPDMG x PROPDMGEXP” as a measure of Economic consequences. We found out that the events causing greatest health consequences were TORNADO, EXCESSIVE HEAT,TSTM WIND, FLOOD and LIGHTNING and the events resulting in greatest economic damages were FLOOD, HURRICANE/TYPHOON, TORNADO, STORM SURGE and HAIL