Analysis of Population Health and Economic Impacts of Weather Events 1950 - 2011

synopsis that describes and summarizes the data analysis in less than 10 sentences

Data Processing

describes how the data were loaded into R and processed for analysis

Data for this analysis were acquired from the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. It is available in the form of a compressed comma-separated-value file which has been compressed using the bzip2 algorithm. The data were downloaded from the Coursera web site at the following URL:

• Storm Data [47Mb]
https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

The bzfile was used to connect to the file and then the read.table command was used to read the rows of data into a data frame. An initial extract of 10,000 rows was reviewed to gain a better sense of the data. Based on that initial analysis the col.classes parameter of the read.table command was used to extract those columns that appeared to be relevant to the desired analysis. This extract included all rows from the source file. Appropriate column names were then assigned to the data frame.

# setwd("~/Desktop/Coursera/Reproducible 
# Research/Week4/Repository/ReproducibleResearchProject2")
con <-bzfile("repdata_data_StormData.csv.bz2", "r")

StormData10000 <- read.table(con,header=TRUE,sep = ",", quote = "\"",nrows=10000)
classes <- sapply(StormData10000, class)

StormDataExtract <- read.table(con,header=TRUE,sep = ",", quote = "\"", 
      colClasses=c("NULL", "factor", "NULL", "NULL" , "NULL", "NULL", 
                   "factor", "factor" , "NULL" , "NULL", "NULL",
                   "NULL" , "NULL", "NULL" , "NULL", "NULL", "NULL", 
                   "NULL", "NULL", "NULL", "NULL", "NULL", 
                   "numeric" , "numeric", "numeric", "factor", "numeric", 
                   "factor", "NULL", "NULL" , "NULL" , "NULL", 
                   "NULL", "NULL" , "NULL", "NULL" , "NULL"), 
      blank.lines.skip = FALSE)
 
colnames(StormDataExtract)<-c("BGN_DATE","STATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG",
                              "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")

After extracting the data the property and storm damage values were multiplied by an appropriate order of magnitude based upon the value specified in the exponent fields. For example if the exponent value was “K” or “k” the damage amount was multiplied by 1000. This was done to facilitate analysis of the economic impact of weather event related damage.

nbrrows<-nrow(StormDataExtract)

PropDamage<-vector("numeric", length = nbrrows)
CropDamage<-vector("numeric", length = nbrrows)

for (i in 1:nbrrows) {
   if (is.na (StormDataExtract$PROPDMG[i])) {
       PropDamage[i]<-0
       }
   else
      if (StormDataExtract$PROPDMG[i]==0) {
          PropDamage[i]<-0
          }
      else
         if (StormDataExtract$PROPDMGEXP[i] == "K" | StormDataExtract$PROPDMGEXP[i] == "k") {
             PropDamage[i]<-StormDataExtract$PROPDMG[i] * 1000
             }
         else
            if (StormDataExtract$PROPDMGEXP[i] == "M" | StormDataExtract$PROPDMGEXP[i] == "m") {
                PropDamage[i]<-StormDataExtract$PROPDMG[i] * 1000000
                }
            else
               if (StormDataExtract$PROPDMGEXP[i] == "B" | StormDataExtract$PROPDMGEXP[i] == "b") {
                   PropDamage[i]<-StormDataExtract$PROPDMG[i] * 1000000000
                   }
               else {
                   PropDamage[i]<-StormDataExtract$PROPDMG[i]
                   }  
}

for (i in 1:nbrrows) {
   if (is.na (StormDataExtract$CROPDMG[i])) {
       CropDamage[i]<-0
       }
  else
     if (StormDataExtract$CROPDMG[i]==0) {
         CropDamage[i]<-0
         }
     else
        if (StormDataExtract$CROPDMGEXP[i] == "K" | StormDataExtract$CROPDMGEXP[i] == "k") {
            CropDamage[i]<-StormDataExtract$CROPDMG[i] * 1000
            }
        else
           if (StormDataExtract$CROPDMGEXP[i] == "M" | StormDataExtract$CROPDMGEXP[i] == "m") {
               CropDamage[i]<-StormDataExtract$CROPDMG[i] * 1000000
               }
           else
              if (StormDataExtract$CROPDMGEXP[i] == "B" | StormDataExtract$CROPDMGEXP[i] == "b") {
                  CropDamage[i]<-StormDataExtract$CROPDMG[i] * 1000000000
                  }
              else {
                  CropDamage[i]<-StormDataExtract$CROPDMG[i]
                  }  
}

Because the precise date and time of casualties and property damage were not readily available the event beginning date was used in order to assign a consistent event year to each event. The begin date was first checked for missing values. None were found the two digit year was then stripped from the data and then for years greater than 49 the century was set to 1900 and for years less than 50 the century was set to 2000.

The cbind function was used to combine the original data, the expanded damage and fatalities values, and the event year into a new data frame for analysis.

sum(is.na(StormDataExtract$BGN_DATE))

## [1] 0

StormDatelt<-as.POSIXlt(as.data.frame(strptime(StormDataExtract$BGN_DATE, "%m/%d/%Y %H:%M:%S"))[,1])

StormYear<-StormDatelt$year

nbrrows<-length(StormYear)

for (i in 1:nbrrows) {
   if (StormYear[i]>49)
      {StormYear[i]<-StormYear[i]+1900}
   else
      {StormYear[i]<-StormYear[i]+2000}
}

BeginYear<-as.data.frame(StormYear)
StormData<-cbind.data.frame(StormDataExtract,PropDamage,CropDamage,StormYear)

Fatalities, injuries, property damage, and crop damage were summarized by year in order to facilitate analysis of the annual population health and economic impacts of weather events.

PropertyDamageByYear<-tapply(StormData$PropDamage, StormData$StormYear, sum)
FatalitiesByYear<-tapply(StormData$FATALITIES, StormData$StormYear, sum)
InjuriesByYear<-tapply(StormData$INJURIES, StormData$StormYear, sum)
CropDamageByYear<-tapply(StormData$CropDamage, StormData$StormYear, sum)

a section titled “Results” where the main results are presented which types of events are most harmful to population health which types of events have the greatest economic consequences

Results

figure(s) have descriptive captions

PlotData<-rbind(CropDamageByYear,
                PropertyDamageByYear)
# PlotData94<-PlotData[,-(1:43)]
# PlotData2<-PlotData94/1000000000
PlotData2<-PlotData/100000000

barplot (as.matrix(PlotData2),args.legend = list(x = "topleft"),
        legend.text = c("Crop", "Property"), 
        col=c("green", "blue"), 
        main="Total Damage and Fatalities by Year 1950-2011",
        xlab="Year Event Began", ylab="Total Damage in USD Billions")
par(new=TRUE)
plot(names(FatalitiesByYear), FatalitiesByYear, type="l", axes=F,
     xlab="", ylab="",col="red",lwd=2)
axis(4, col="red", col.axis="red")
mtext("Total Fatalities",side=4,line=3,col="red")

library(lattice)

FatalitiesByEvent <- tapply(StormData$FATALITIES, StormData$EVTYPE, sum)
FatalitiesByEvent <- sort(FatalitiesByEvent,decreasing=T)
barchart(FatalitiesByEvent[1:12],col=palette(), main="Twelve Most Destructive Weather Events Based on Fatalities", xlab="Total Fatalities")

PropertyDamageByEvent <- tapply(StormData$PropDamage, StormData$EVTYPE, sum)
PropertyDamageByEvent <- PropertyDamageByEvent/1000000000
PropertyDamageByEvent <- sort(PropertyDamageByEvent,decreasing=T)
barchart(PropertyDamageByEvent[1:12],col=palette(),main="Twelve Most Destructive Weather Events Based on Property Damage", xlab="Total Damage in Billions")

NOAA Storm Database

Sai Aditya

February 13, 2025

Reproducible Research Project 2

Introduction

Synopsis

Analysis of Population Health and Economic Impacts of Weather Events 1950 - 2011

Data Processing

Results