Synopsis

There are various weather events on earth. Eeach weather event has different influence on our civilization. Some wether events are more harmfull then other or cause more or less damage. These paper present an analysis of the weather data form National Weather Service of the last 60 years. It discover that a tornado is the most harmfull weather event. With focus on economic damage drougth and flood have the greatest economic consequence.

Data source

The analysis base on the weather data form National Weather Service. Downloaded from the URL see https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2 and extract it in directory “Dataset”.

library(utils)
library("R.utils") # to use bunzip
## Loading required package: R.oo
## Loading required package: R.methodsS3
## R.methodsS3 v1.7.1 (2016-02-15) successfully loaded. See ?R.methodsS3 for help.
## R.oo v1.20.0 (2016-02-17) successfully loaded. See ?R.oo for help.
## 
## Attaching package: 'R.oo'
## Die folgenden Objekte sind maskiert von 'package:methods':
## 
##     getClasses, getMethods
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     attach, detach, gc, load, save
## R.utils v2.2.0 (2015-12-09) successfully loaded. See ?R.utils for help.
## 
## Attaching package: 'R.utils'
## Das folgende Objekt ist maskiert 'package:utils':
## 
##     timestamp
## Die folgenden Objekte sind maskiert von 'package:base':
## 
##     cat, commandArgs, getOption, inherits, isOpen, parse, warnings
get_data <- function(){
  ## Download the file
  dataUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
  dir <- "Dataset"
  zipfilename <- "repdata_data_StormData.csv.bz2"
  zipfile <- paste("./",dir,"/",zipfilename,sep = "")

  # create Dir
  if(!file.exists(dir)){
    dir.create(dir)
  }

  # download file and unzip
  if(!file.exists(zipfile)){
    print("Downloading file.")
    download.file(url = dataUrl,destfile = zipfile)
    dateDownload_1 <- date()
    print("Extracting file.")
    bunzip2(zipfile,remove=FALSE)
  }
  
  datafilename <- "repdata_data_StormData.csv"
  datafile <- paste("./",dir,"/",datafilename,sep = "")
  
  dat <- read.table(file = datafile
                    , header = TRUE
                    , sep=","
                    )[,c('EVTYPE','FATALITIES','INJURIES','PROPDMG','PROPDMGEXP','CROPDMG','CROPDMGEXP')]
}
dat <- get_data()

From entire dataset 7 columns with relevant information where evaluated:

Data Processing

Finde harmfull weather events

Harmfull weather events will be differ in - fatalities and - injuries

All recorded weather events of the last 60 years where grouped by event and sum over fatalities and injuries. The result is total number of fatalities and injuries per weather event over the last 60 years. To get the most harmfull event it will be order by injuries decreasing.

# most harmfull events
  harmPerEvent <-  aggregate(cbind(dat$FATALITIES,dat$INJURIES)~dat$EVTYPE, data = dat , FUN=sum, na.rm=TRUE)
  names(harmPerEvent) = c('EVTYPE','FATALITIES','INJURIES')
  harmPerEventSort <- harmPerEvent[order(harmPerEvent$INJURIES,decreasing = TRUE),]
  row.names(harmPerEventSort) <- NULL

In section results the most 20 harmfull weather events will be plotted.

Finde weather events with greatest economic consequence

The economic consecence differ in two classes: - PROPDMG … property damage - CROPDMG … corp damage.

Before aggregate PROPDMG and CROPDMG it is nessesary to multiply it with the corresponding exponent (PROPDMGEXP, CROPDMGEXP). The mapping from PROPDMGEXP/CROPDMGEXP to the factor is:

PROPDMGEXP_CROPDMGEXP <- c('empty,-,?','+','0,1,2,3,4,5,6,7,8,','h,H','k,K','m,M','b,B')
Factor <- c(0,1,10,100,1000,1000000,1000000000)
tab <- cbind(PROPDMGEXP_CROPDMGEXP,Factor)
tab
##      PROPDMGEXP_CROPDMGEXP Factor 
## [1,] "empty,-,?"           "0"    
## [2,] "+"                   "1"    
## [3,] "0,1,2,3,4,5,6,7,8,"  "10"   
## [4,] "h,H"                 "100"  
## [5,] "k,K"                 "1000" 
## [6,] "m,M"                 "1e+06"
## [7,] "b,B"                 "1e+09"

All recorded weather events of the last 60 years where grouped by event and sum over PROPDMG and CROPDMG The result is total number of PROPDMG and CROPDMG per weather event over the last 60 years. To get the event with the greatest economic consequencemost it will be order by CROPDMG decreasing.

# multiply with factor/EXP 
    dat$PROPDMG[dat$PROPDMGEXP %in% c("","-","?")] <- 0 
    dat$PROPDMG[dat$PROPDMGEXP %in% c("+")] <- dat$PROPDMG[dat$PROPDMGEXP %in% c("+")] * 1
    dat$PROPDMG[dat$PROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] <- dat$PROPDMG[dat$PROPDMGEXP  %in% c("0","1","2","3","4","5","6","7","8")] * 10
    dat$PROPDMG[dat$PROPDMGEXP %in% c("h","H")] <- dat$PROPDMG[dat$PROPDMGEXP  %in% c("h","H")] * 100
    dat$PROPDMG[dat$PROPDMGEXP %in% c("k","K")] <- dat$PROPDMG[dat$PROPDMGEXP %in% c("k","K")] * 1000
    dat$PROPDMG[dat$PROPDMGEXP %in% c("m","M")] <- dat$PROPDMG[dat$PROPDMGEXP %in% c("m","M")] * 1000000
    dat$PROPDMG[dat$PROPDMGEXP %in% c("b","B")] <- dat$PROPDMG[dat$PROPDMGEXP %in% c("b","B")] * 1000000000

    dat$CROPDMG[dat$CROPDMGEXP %in% c("","-","?")] <- 0 
    dat$CROPDMG[dat$CROPDMGEXP %in% c("+")] <- dat$CROPDMG[dat$CROPDMGEXP %in% c("+")] * 1
    dat$CROPDMG[dat$CROPDMGEXP %in% c("0","1","2","3","4","5","6","7","8")] <- dat$CROPDMG[dat$CROPDMGEXP  %in% c("0","1","2","3","4","5","6","7","8")] * 10
    dat$CROPDMG[dat$CROPDMGEXP %in% c("h","H")] <- dat$CROPDMG[dat$CROPDMGEXP  %in% c("h","H")] * 100
    dat$CROPDMG[dat$CROPDMGEXP %in% c("k","K")] <- dat$CROPDMG[dat$CROPDMGEXP %in% c("k","K")] * 1000
    dat$CROPDMG[dat$CROPDMGEXP %in% c("m","M")] <- dat$CROPDMG[dat$CROPDMGEXP %in% c("m","M")] * 1000000
    dat$CROPDMG[dat$CROPDMGEXP %in% c("b","B")] <- dat$CROPDMG[dat$CROPDMGEXP %in% c("b","B")] * 1000000000
    
#  aggregate over EVTYPE
  econPerEvent <-  aggregate(cbind(dat$PROPDMG,dat$CROPDMG)~dat$EVTYPE, data = dat , FUN=sum, na.rm=TRUE)
  names(econPerEvent) = c('EVTYPE','PROPDMG','CROPDMG')
  econPerEventSort <- econPerEvent[order(econPerEvent$CROPDMG,decreasing = TRUE),]
  row.names(econPerEventSort) <- NULL

In section results the most 20 harmfull weather events will be plotted.

Results

harmfull weather events

The folowing plot show the most 20 harmfull weather events.

makeplot_harm <- function(){
  
  library(lattice)
  library(latticeExtra)
  # to append
  harmPerEventSort$evtype_str <- as.character(harmPerEventSort$EVTYPE)
  # xy plot of fatilities
  plot_fatilities <- xyplot(FATALITIES  ~ c(1:20)
                            , data =harmPerEventSort[1:20,]
                            , type = "b"
                            , pch = 1
                            , ylab = "fatalities"
                            , xlim = c(1, 20)
                            , xlab = "type of event"
                            , scales=list(x=list(rot=c(45,0)
                            ,labels=harmPerEventSort[1:20,]$evtype_str                                        ,tick.number=20))
  )
  
  # xy plot of injuries
  plot_injuries <- xyplot(INJURIES  ~ c(1:20)
                           , data =harmPerEventSort[1:20,]
                           , type = "b"
                           , pch = 2
                           , ylab = "injuries" 
                           , xlim = c(1, 20)
                           , xlab = "type of event"
  )
  
  # display both on the same graph
  doubleYScale(plot_fatilities, plot_injuries, add.ylab2 = TRUE, under = TRUE,
               text = c( "fatalities", "injuries"), columns = 2, 
               type = c("l", "l")
               )
  
}
plot_harm <- makeplot_harm() 
print(plot_harm)

As indicated in the plot, tornado is the most harmfull weather event in fatilities as well as in injnuries.

weather events with greatest economic consequence

The folowing plot show the weather events with most 20 greatest economic consequence.

makeplot_econ <- function(){
  
  library(lattice)
  library(latticeExtra)
  # to append
  econPerEventSort$evtype_str <- as.character(econPerEventSort$EVTYPE)
  # xy plot of PROPDMG
  plot_PROPDMG <- xyplot(PROPDMG  ~ c(1:20)
                            , data =econPerEventSort[1:20,]
                            , type = "b"
                            , pch = 1
                            , ylab = "proddmg [USD]" 
                            , xlim = c(1, 20)
                            , xlab = "type of event"
                            , scales=list(x=list(rot=c(45,0)
                            ,labels=econPerEventSort[1:20,]$evtype_str
                            ,tick.number=20))
  )
  
  # xy plot of CROPDMG
  plot_CROPDMG<- xyplot(CROPDMG  ~ c(1:20)
                           , data =econPerEventSort[1:20,]
                           , type = "b"
                           , pch = 2
                           , ylab = "corpdmg [USD]" 
                           , xlim = c(1, 20)
                           , xlab = "type of event"
  )
  
  # display both on the same graph
  doubleYScale(plot_PROPDMG , plot_CROPDMG, add.ylab2 = TRUE, under = TRUE,
               text = c( "proddmg", "corpdmg"), columns = 2, 
               type = c("l", "l")
  )
  
}
plot_econ <- makeplot_econ()  
print(plot_econ)

As indicated in the plot, drought and flood are the wether events with the greatest economic consequence. Depending on the focus of damage type (PROPDMG or CROPDMG) wether events with the greatest economic consequence varing.

  • in case of PROPDMG it is flood.
  • in case of CROPDMG it is drought.