PAII_template.Rmd Prepared by Marcel Merchat October 2, 2015

Title: Analysis of Severe Weather Events for the United States

Synopsis:

    The purpose of this project is to provide a reproducible data analysis
    for government officials who need to prepare for severe weather events.
    The raw data was processed to prepare a gross analysis of the risk for
    fatalities and the risk for property damage.
    

RAW DATA:

    The raw data consists of 902,297 records of weather events since 1950
    with 37 variables recorded for each event. The important fields for this
    analysis are the "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", and
    "PROPDMGEXP" columns of data. 

    The data contains four fields that were used in this analysis:

    "EVTYPE"     The type of weather event such as a "TORNADO""
    "FATALITIES" The number of deaths attributed for each event
    "INJURIES"   The number of reported injuries for each event
    "PROPDMG"    The significant figures for the damages in thousands
    "PROPDMGEXP" A code for the multiplier for the "PROPDMG" field
                 The damages in millions of dollars is the product of the
                 "PROPDMG" field and the multiplier that corredsponds to the 
                 "PROPDMGEXP" field.   
                 

Loading libraries:

## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:plyr':
## 
##     here
## The following object is masked from 'package:base':
## 
##     date

Preprocessing Data

file_name <- "repdata-data-StormData.csv"

storms <- read.csv(file_name)
stormdata <- data.frame(
        as.character(storms[,"BGN_DATE"]),
        as.character(storms[,"BGN_TIME"]), 
        storms[,"COUNTY"],
        storms[,"STATE"],
        storms[,"EVTYPE"],
        storms[,"LENGTH"],
        storms[,"WIDTH"],
        storms[,"MAG"],
        as.numeric(as.character(storms[,"FATALITIES"])), 
        storms[,"INJURIES"],
        as.numeric(as.character(storms[,"PROPDMG"])),
        storms[,"PROPDMGEXP"]) 

        colnames(stormdata) <- 
        c("DATE","TIME","COUNTY","STATE","EVTYPE","LENGTH","WIDTH","MAG",
          "FATALITIES","INJURIES","PROPDMG","PROPDMGEXP") 

        deletedcolumns <- c("TIME","LENGTH","WIDTH")
        df1 <- stormdata[,!(names(stormdata) %in% deletedcolumns)]

        severe <- df1[df1[,"FATALITIES"]> 0 | df1[,"INJURIES"]>0 | df1[,"PROPDMG"]>0,]
        severe[,"DATE"] <- as.Date(severe[,"DATE"], format='%m/%d/%Y')

RESULTS

Which types of events are most harmful to health in the United States?

The most harmful problems are displayed in the bar graph below:

##      Sort the observed records by the total number of fatalities since 1950
        severe <- arrange(severe, -FATALITIES)

 ##     Find the total fatalities for  each tyoe of event since 1950 
        risks <-     tapply(severe[,6],severe[,4],sum)
        risks <- sort(risks, decreasing = TRUE, na.last = NA)

        harm_plot <- barplot(height=risks[1:5],beside=FALSE,width = 5,
                             axes = FALSE, axisnames = FALSE,
                             ylab = "Fatalities",
                             main="Harmful Weather Events Since 1950")
        axis(2)
        axis(1, at = harm_plot, labels=names(risks[1:5]), cex.axis = 0.6)

The worst harmful events are listed in the table below:

        df_harmful <- data.frame(severe[1:50,"STATE"],
                                severe[1:50,"EVTYPE"],
                                severe[1:50,"FATALITIES"],
                                severe[1:50,"INJURIES"])
        
        colnames(df_harmful) <- 
        c("STATE","EVENT","FATALITIES","INJURIES")

        grid.table(df_harmful[1:15,])

Which types of events have the greatest economic consequences in the USA?

 ##     Sort the observed records by the amount of property damage.
        severe[,"dmg_millions"] <- NA

 ##     Sort the records so that the total damages is listed in decending order.
 ##     The amount in the "PROPDMG" field is multiplied by the factor derived 
 ##     from the "PROPDMGEXP" field. We use the multiplier described by 
 ##     Tobias Rosenberger on the RPubs website (July 26, 2015)

 ##     Codes for PROPDMG:
 ##     "", "-", "?", "+",
 ##     "0", "1", "2", "3", "4", "5", "6", "7", "8",
 ##     "H","h", "K", "k", "M", "m", "B", "b")

 ##     Corresponding Multiplier:
 ##     1, 0, 0, 0, 
 ##     1, 10, 100, 1000, 10000, 1e+05, 1e+06, 1e+07, 1e+08,
 ##     100, 100, 1000, 1000, 1e+06, 1e+06, 1e+09, 1e+09

        
        severe[severe[,"PROPDMGEXP"]=="","dmg_millions"] <- 
              0.000001  *   severe[severe[,"PROPDMGEXP"]=="","PROPDMG"]        
        severe[severe[,"PROPDMGEXP"]=="-","dmg_millions"] <- 0
        severe[severe[,"PROPDMGEXP"]=="?","dmg_millions"] <- 0
        severe[severe[,"PROPDMGEXP"]=="+","dmg_millions"] <- 0

        severe[severe[,"PROPDMGEXP"]=="8","dmg_millions"] <- 
                 100  *    severe[severe[,"PROPDMGEXP"]=="8","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="7","dmg_millions"] <- 
                  10   *   severe[severe[,"PROPDMGEXP"]=="7","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="6","dmg_millions"] <- 
                   1  *    severe[severe[,"PROPDMGEXP"]=="6","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="5","dmg_millions"] <- 
                 0.1  *    severe[severe[,"PROPDMGEXP"]=="5","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="4","dmg_millions"] <- 
                0.01  *    severe[severe[,"PROPDMGEXP"]=="4","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="3","dmg_millions"] <- 
               0.001  *    severe[severe[,"PROPDMGEXP"]=="3","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="2","dmg_millions"] <- 
              0.0001  *    severe[severe[,"PROPDMGEXP"]=="2","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="1","dmg_millions"] <- 
             0.00001  *    severe[severe[,"PROPDMGEXP"]=="1","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="0","dmg_millions"] <- 
            0.000001  *    severe[severe[,"PROPDMGEXP"]=="0","PROPDMG"]
        

        severe[severe[,"PROPDMGEXP"]=="h","dmg_millions"] <- 
               0.0001  *   severe[severe[,"PROPDMGEXP"]=="h","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="H","dmg_millions"] <- 
               0.0001  *   severe[severe[,"PROPDMGEXP"]=="H","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="k","dmg_millions"] <- 
                0.001  *   severe[severe[,"PROPDMGEXP"]=="k","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="K","dmg_millions"] <- 
                0.001  *   severe[severe[,"PROPDMGEXP"]=="K","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="m","dmg_millions"] <- 
                           severe[severe[,"PROPDMGEXP"]=="m","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="M","dmg_millions"] <- 
                           severe[severe[,"PROPDMGEXP"]=="M","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="b","dmg_millions"] <- 
                1000  *    severe[severe[,"PROPDMGEXP"]=="b","PROPDMG"]
        severe[severe[,"PROPDMGEXP"]=="B","dmg_millions"] <- 
                1000  *    severe[severe[,"PROPDMGEXP"]=="B","PROPDMG"]

 ##     Sort the records so that the total damages is listed in decending order.
        severe <- arrange(severe, -dmg_millions)


 ##     Find the total damages for  each type of event since 1950      
        damages <- tapply(severe[,10],severe[,4],sum)/1000
        damages <- sort(damages, decreasing = TRUE, na.last = NA)
        
        damage_plot <- barplot(height=damages[1:4],beside=FALSE,width = 5,
                               axes = FALSE, axisnames = FALSE,
                               ylab = "Damages in Billions of Dollars",
                               main = "Severe Economic Weather Events")
        axis(2)
        axis(1, at = damage_plot, labels=names(damages[1:4]), cex.axis = 0.55)

Most Damaging Weather Events

   ##   Make table for most damaging events
        
        deletedcolumns <- c("DATE","PROPDMG","PROPDMGEXP")
        

        severe_table1 <- data.frame(severe[,"STATE"],
                                    severe[,"EVTYPE"],
                                    severe[,"FATALITIES"],
                                    severe[,"dmg_millions"])
        
        colnames(severe_table1) <- 
        c("STATE","EVENT","FATALITIES","Dollar_Loss_in_Millions")

        dftable <- severe_table1[1:15,]

        grid.table(dftable)