library(plyr)
library(poweRlaw)
library(maptools)
## Loading required package: sp
## Checking rgeos availability: TRUE
library(ggplot2)
library(taRifx)
## Loading required package: reshape2
Sys.setlocale("LC_TIME", "English")
## [1] "English_United States.1252"
Sys.setlocale("LC_COLLATE", "English")
## [1] "English_United States.1252"
Sys.setlocale("LC_CTYPE", "English")
## [1] "English_United States.1252"
Sys.setlocale("LC_MONETARY", "English")
## [1] "English_United States.1252"
file <- bzfile("repdata_data_StormData.csv.bz2", open = "r")
data <- read.csv(file, stringsAsFactors = FALSE)
close(file)
## Grouping EVTYPES
data$EVTYPE[grep("WIND", data$EVTYPE)] <- "WIND"
data$EVTYPE[grep("TORNADO", data$EVTYPE)] <- "TORNADO"
data$EVTYPE[grep("HEAT", data$EVTYPE)] <- "HEAT"
data$EVTYPE[grep("SNOW", data$EVTYPE)] <- "SNOW"
data$EVTYPE[grep("FLOOD", data$EVTYPE)] <- "FLOOD"
data$EVTYPE[grep("WINTER", data$EVTYPE)] <- "WINTER"
data$EVTYPE[grep("RIP", data$EVTYPE)] <- "RIP"
## sum deaths and injuries by the event type
total_by_event <- ddply(data, .(EVTYPE), summarise, fatalities = sum(FATALITIES,
na.rm = TRUE), injuries = sum(INJURIES, na.rm = TRUE))
## sort by deaths and ijuries in the descending order
sorted <- arrange(total_by_event, desc(fatalities), desc(injuries))
## remove events that made no damage (in terms of injuries or fatalities)
remove_no_damage <- sorted[(sorted$fatalities > 0 & sorted$injuries > 0), ]
## Create variables that show dollar amount of damage
Mults <- c(M = 10^6, m = 10^6, K = 10^3, k = 10^3, B = 10^9, b = 10^9)
data$sumP <- data$PROPDMG * Mults[data$PROPDMGEXP]
data$sumP[is.na(data$sumP)] <- 0
data$sumC <- data$CROPDMG * Mults[data$CROPDMGEXP]
data$sumC[is.na(data$sumC)] <- 0
## sum damage by event type and sorting
total_by_event2 <- ddply(data, .(EVTYPE), summarise, property = sum(sumP, na.rm = TRUE),
crops = sum(sumC, na.rm = TRUE))
sorted2 <- arrange(total_by_event2, desc(property), desc(crops))
## remove events that made no damage
remove_no_damage2 <- sorted2[(sorted2$property > 0 & sorted2$crops > 0), ]
## Top 10 events by fatalities and injuries
head(sorted, 10)
## EVTYPE fatalities injuries
## 1 TORNADO 5636 91407
## 2 HEAT 3138 9154
## 3 FLOOD 1523 8603
## 4 WIND 1446 11495
## 5 LIGHTNING 816 5230
## 6 RIP 577 529
## 7 WINTER 277 1876
## 8 AVALANCHE 224 170
## 9 SNOW 162 1118
## 10 EXTREME COLD 160 231
## total deaths
total_deaths <- sum(remove_no_damage$fatalities)
total_deaths
## [1] 15052
## percentage of deaths caused by tornado
top_death_percentage <- remove_no_damage$fatalities[1]/total_deaths
top_death_percentage
## [1] 0.3744
# percentage of deaths caused by top five events
top_five_deaths <- sum(remove_no_damage$fatalities[1:5])
top_five_deaths_percentage <- top_five_deaths/total_deaths
top_five_deaths_percentage
## [1] 0.8344
## total injuries
total_injuries <- sum(remove_no_damage$injuries)
total_injuries
## [1] 140356
## percentage of injuries caused by tornado
top_injury_percentage <- remove_no_damage$injuries[1]/total_injuries
top_injury_percentage
## [1] 0.6513
# percentage of injuries caused by top five events
top_five_injuries <- sum(remove_no_damage$injuries[1:5])
top_five_injury_percentage <- top_five_injuries/total_injuries
top_five_injury_percentage
## [1] 0.8969
library(maptools)
par(mfrow = c(1, 2))
plot(remove_no_damage$fatalities, col = "blue", pch = 16, ylab = "Fatalities")
pointLabel(x = 0, remove_no_damage$fatalities, ifelse(remove_no_damage$fatalities >
1000, as.character(remove_no_damage$EVTYPE), NA), allowSmallOverlap = FALSE,
cex = 0.7, pos = 4)
plot(remove_no_damage$injuries, col = "red", pch = 16, ylab = "Injuries")
pointLabel(x = 0, remove_no_damage$injuries, ifelse(remove_no_damage$injuries >
7000, as.character(remove_no_damage$EVTYPE), NA), allowSmallOverlap = FALSE,
cex = 0.7, pos = 4)
## Top 10 events by propery and crop damage
head(sorted2, 10)
## EVTYPE property crops
## 1 FLOOD 1.674e+11 1.224e+10
## 2 HURRICANE/TYPHOON 6.931e+10 2.608e+09
## 3 TORNADO 5.699e+10 4.150e+08
## 4 STORM SURGE 4.332e+10 5.000e+03
## 5 WIND 1.774e+10 2.159e+09
## 6 HAIL 1.573e+10 3.026e+09
## 7 HURRICANE 1.187e+10 2.742e+09
## 8 TROPICAL STORM 7.704e+09 6.783e+08
## 9 WINTER 6.717e+09 4.244e+07
## 10 WILDFIRE 4.765e+09 2.955e+08
## total property damage
total_property_damage <- sum(remove_no_damage2$property)
total_property_damage
## [1] 4.236e+11
## percentage of propery damage caused by flood
top_event_property <- max(remove_no_damage2$property)/total_property_damage
top_event_property
## [1] 0.3951
# percentage of propery damage caused by top five events
top_five_property <- sum(remove_no_damage2$property[1:5])
top_five__property_percentage <- top_five_property/total_property_damage
top_five__property_percentage
## [1] 0.8374
## total crop damage
total_crop_damage <- sum(remove_no_damage2$crops)
total_crop_damage
## [1] 4.87e+10
## percentage of crop damage caused by drought
top_event_crop <- max(remove_no_damage2$crops)/total_crop_damage
top_event_crop
## [1] 0.2869
# percentage of propery damage caused by top five events
sort_crop <- arrange(remove_no_damage2, desc(crops))
top_five_crops <- sum(sort_crop$crops[1:5])
top_five_crops_percentage <- top_five_crops/total_crop_damage
top_five_crops_percentage
## [1] 0.7598
library(maptools)
par(mfrow = c(1, 2))
plot(remove_no_damage2$property, col = "blue", pch = 16, ylab = "Propery Damage")
pointLabel(x = 0, remove_no_damage2$property, ifelse(remove_no_damage2$property >
1.7e+10, as.character(remove_no_damage2$EVTYPE), NA), allowSmallOverlap = FALSE,
cex = 0.7, pos = 4)
plot(sort_crop$crops, col = "red", pch = 16, ylab = "Crop Damage")
pointLabel(x = 0, sort_crop$crops, ifelse(sort_crop$crops > 2.9e+09, as.character(sort_crop$EVTYPE),
NA), allowSmallOverlap = FALSE, cex = 0.7, pos = 4)