NOAA 1950 to 2011 summaries base on population and a cost impact by event type

Synopsis

This report is a summary and reports on the method to obtain the impact of weather on the USA population, property and crop damage based on NNOA data which coverers the years of 1950 to 2011. The data set grew in coverage and completeness as time elapsed, but still contain missing or incomplete data. The data set covers 38 pieces of information (including a reference number) and has over 900K records. The data set has facts that record start time, locations - (state, county, lat and long), type of event, end time of event, area impacted and G forces and impact. The impacts address fatalities, injuries property and crop damage costs that are recorded using year of record. These graphs summaries the answers to two questions; which events are must harmful to the population and secondly which event type has the most economical costs. A data subset method was used as well as data convertion in support of the analsysis.

The referance of grouping is found at:

http://www.nws.noaa.gov/om/hazstats.shtml

library(lubridate)
library(data.table)
library(dtplyr)
library(ggplot2)
library(grid)
library(knitr) 
library(markdown)
library(R.utils)
library(quantmod) # used if going to look at time value of cost impacts
library(gridExtra)
### Download file, saving as a CSV.

if(!file.exists("storm.csv"))  {           
        temp <- tempfile()
        download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "repdata-data-StormData.csv.bz2")
        if (!file.exists("repdata-data-StormData.csv")) {
                bunzip2("repdata-data-StormData.csv.bz2", overwrite = F)
        }
        storm <- read.csv("repdata-data-StormData.csv", sep = ",")
        write.csv(storm, file = "storm.csv")
        datedownload <- Sys.time() ## record the download date of the zip file
}

datedownload <- file.mtime("storm.csv")
data <- read.table(file="storm.csv",header=TRUE, sep = ",", na.strings = "", fill = TRUE, row.names=NULL) 
### had to add in fill=TRUE to read blank or missing information, adding row.names=NULL

Data Processing -Subsetting data

Results

The plots show the top 10 death, injury, property and corp damages weather types

### Death plots, with ording of data within ggplots
deaths <- ggplot() + geom_bar(data = healthdatafat, aes(x = factor(EVTYPE, levels =EVTYPE[order(FATALITIES, decreasing = TRUE)]) , y = FATALITIES, fill = interaction(FATALITIES, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Harmful Events") + ylab("No. of Fatailities") + ggtitle("Top 10 weather events causing Fatalities")

### Injury plots, with ording of data within ggplots
injuries <- ggplot() + geom_bar(data = healthdatainj, aes(x = factor(EVTYPE, levels =EVTYPE[order(INJURIES, decreasing = TRUE)]), y = INJURIES, fill = interaction(INJURIES, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Harmful Events") + ylab("No. of Injuries") + ggtitle("Top 10 weather events causing Injuries") 

grid.arrange(deaths, injuries, ncol = 2)

### Fig #1 Deaths and Injuries Plots

## 
##  Total deaths are: 15145 , and the total injuries are: 140528 .
print(healthdatafat)
##            EVTYPE FATALITIES
## 68        TORNADO       5227
## 10 EXCESSIVE HEAT        402
## 48      LIGHTNING        283
## 71      TSTM WIND        199
## 15    FLASH FLOOD        171
## 16          FLOOD        104
## 36      HIGH WIND        102
## 80   WINTER STORM         85
## 27           HEAT         73
## 78       WILDFIRE         55
print(healthdatainj)
##               EVTYPE INJURIES
## 68           TORNADO    60187
## 10    EXCESSIVE HEAT     4791
## 16             FLOOD     2679
## 43         ICE STORM     1720
## 27              HEAT     1420
## 41 HURRICANE/TYPHOON     1219
## 3           BLIZZARD      718
## 48         LIGHTNING      649
## 71         TSTM WIND      646
## 15       FLASH FLOOD      641
### Property damage plots, with ording of data within ggplots
costprop <- ggplot()+ geom_bar(data = propdata, aes(x = factor(EVTYPE, levels =EVTYPE[order(PROPDMG, decreasing = TRUE)]), y = PROPDMG, fill = interaction(PROPDMG, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Costly Events") + ylab("Cost of Property Damage - Dollars($)") + ggtitle("Top 10 weather events causing Property Damage") 

### Corp damage plots, with ording of data within ggplots
costcrop <- ggplot()+ geom_bar(data = cropdata, aes(x = factor(EVTYPE, levels =EVTYPE[order(CROPDMG, decreasing = TRUE)]), y = CROPDMG, fill = interaction(CROPDMG, EVTYPE)), stat = "identity", show.legend = F) + theme(axis.text.x = element_text(angle = 45, hjust = 1)) + xlab("Costly Events") + ylab("Cost of Crop Damage - Dollars($)") + ggtitle("Top 10 weather events causing Crop Damage") 

grid.arrange(costprop, costcrop, ncol = 2)

### Fig #2 Property & Crop Plots

## 
##  Total Prorperty damage is: 8.40926e+13 , and the total crop damage is: 7.344694e+13 for a total damage cost of 1.575395e+14 .
print(propdata)
##                EVTYPE      PROPDMG
## 10        FLASH FLOOD 1.721257e+13
## 24               HAIL 1.593794e+13
## 14              FLOOD 1.205047e+13
## 72            TORNADO 9.175890e+12
## 79          TSTM WIND 8.253993e+12
## 63 THUNDERSTORM WINDS 5.626033e+12
## 62  THUNDERSTORM WIND 3.965385e+12
## 37          HIGH WIND 3.685047e+12
## 12     FLASH FLOODING 1.324823e+12
## 88           WILDFIRE 7.931438e+11
print(cropdata)
##                EVTYPE      CROPDMG
## 24               HAIL 2.679696e+13
## 10        FLASH FLOOD 1.070681e+13
## 14              FLOOD 9.528190e+12
## 72            TORNADO 7.741043e+12
## 79          TSTM WIND 5.578355e+12
## 62  THUNDERSTORM WIND 4.075668e+12
## 37          HIGH WIND 2.162796e+12
## 75     TROPICAL STORM 9.344577e+11
## 63 THUNDERSTORM WINDS 8.039181e+11
## 58   STORM SURGE/TIDE 7.501000e+11