1.Synopsys

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.

The events in the database start in the year 1950 and end in November 2011.The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. The data analysis must address the following questions:

Across the United States, which types of events (as indicated in the EVTYPE variable) are most harmful with respect to population health?

Across the United States, which types of events have the greatest economic consequences?

2.Data Processing

Source: website:“https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2” We are using a load function to load the Data in to the sesssion

Descrption for Data Transformation:

-Raw data is transformed to our Analysis by summarizing the data by Event Type -Top 20 events causing the maximum damage is considered for the analysis using PARETO principle -Further refinement can be done to deep dive in to each event type, it’s trend and impact over the years

loadData <- function() {
  if(file.exists("StormData.rds")) {
    loadRDS("raw_data.rds")
  }
  
  if(!file.exists("StormData.rds.bz2")) {
    download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2", method = "curl")
  }
  
  df <- read.csv("StormData.csv.bz2")
  saveRDS(df, "raw_data.rds")
  
  df
}

raw_data <- loadData()

2A-Data Analysis for FATALITIES by Event Type

library(dplyr)

# Aggregate Data by Event Type for Fatalities
Data_fatalities_aggregate<-aggregate(list(FATALITIES=raw_data$FATALITIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)

# Arrange Data in Descending order
Data_fatalities_descending <-arrange(Data_fatalities_aggregate,desc(FATALITIES))

library(sqldf)
attach(Data_fatalities_descending)

Data_fatalities_aggregate_nonzero <- sqldf("select * from Data_fatalities_descending where FATALITIES>0")
Data_fatalities_aggregate_nonzero_top10 <- head(Data_fatalities_aggregate_nonzero,n=10)

2B-Data Analysis for INJURIES by Event Type

Data_injuries_aggregate<-aggregate(list(INJURIES=raw_data$INJURIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)


# Data Injuries Desending
Data_injuries_descending <-arrange(Data_injuries_aggregate,desc(INJURIES))


# Data Injuries remove non zero
Data_injuries_aggregate_nonzero <- sqldf("select * from Data_injuries_descending where INJURIES>0")

#Select Top10 Injury Events
Data_injuries_aggregate_nonzero_top10 <- head(Data_injuries_aggregate_nonzero,n=10)

2C-Data Analysis for PROPERTY DAMAGE by Event Type

library(plyr)
library(dplyr)
library(ggplot2)
library(gridExtra)
Data_economic_impact <- select(raw_data,COUNTYNAME,STATE,EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)

  propdmg <- Data_economic_impact[raw_data$PROPDMG>0,] # filter out Data without any Property damage from the Dataset
  
  for (i in 1:nrow(propdmg)){
    tmp <- propdmg$PROPDMGEXP[i]
    mul <- propdmg$PROPDMG[i]
    if (tmp  %in% c('h', 'H'))
      propdmg$actualpropdmg[i] <- mul * 100
    else if (tmp %in% c('k', 'K'))
      propdmg$actualpropdmg[i] <- mul * 1000
    else if (tmp %in% c('m', 'M'))
      propdmg$actualpropdmg[i] <- mul * 1000000
    else if (tmp %in% c('b', 'B'))
      propdmg$actualpropdmg[i] <- mul * 1000000000
  }

  propdmgSum <- aggregate(list(actualpropdmg=propdmg$actualpropdmg), by=list(EVTYPE=propdmg$EVTYPE),sum,na.rm = TRUE)
  
# Arrange data in descending order
  
  Propdmgsum_descending <-arrange(propdmgSum,desc(actualpropdmg))
  
# Select top10 events with highest property damage
  
   top_propdmg <- head(Propdmgsum_descending,10)

2D-Data Analysis for PROPERTY DAMAGE by Event Type

   cropdmg <- Data_economic_impact[raw_data$CROPDMG>0,] # filter out Data without any Property damage from the Dataset
   
   for (i in 1:nrow(cropdmg)){
     tmp <- cropdmg$CROPDMGEXP[i]
     mul <- cropdmg$CROPDMG[i]
     if (tmp  %in% c('h', 'H'))
       cropdmg$actualcropdmg[i] <- mul * 100
     else if (tmp %in% c('k', 'K'))
       cropdmg$actualcropdmg[i] <- mul * 1000
     else if (tmp %in% c('m', 'M'))
       cropdmg$actualcropdmg[i] <- mul * 1000000
     else if (tmp %in% c('b', 'B'))
       cropdmg$actualcropdmg[i] <- mul * 1000000000
   }
   
   library(plyr)
   #propdmgSum <- ddply(propdmg, .(EVTYPE), summarize, propdmg = sum(actualpropdmg))
   
   cropdmgSum <- aggregate(list(actualcropdmg=cropdmg$actualcropdmg), by=list(EVTYPE=cropdmg$EVTYPE),sum,na.rm = TRUE)
   
   # Arrange data in descending order
   
   cropdmgsum_descending <-arrange(cropdmgSum,desc(actualcropdmg))
   
   # Select top10 events with highest property damage
   
   top_cropdmg <- head(cropdmgsum_descending,10)

3.RESULTS by Event Type for Population Health & Economic Consequences

3A.Plotting Fatalities & Injuries

library(ggplot2)
library(gridExtra)
#Plotting the Data
Fatalities_plot<- ggplot(Data_fatalities_aggregate_nonzero_top10, aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
  geom_bar(stat = "identity")+ 
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
  labs(title = "Fatalities by Event Type")+
  labs(x = "Event Type")

Injuries_plot<-ggplot(Data_injuries_aggregate_nonzero_top10, aes(x = reorder(EVTYPE, -INJURIES), y = INJURIES)) +
  geom_bar(stat = "identity")+ 
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
  labs(title = "Injuries by Event Type")+
  labs(x = "Event Type")
grid.arrange(Fatalities_plot, Injuries_plot, ncol=2)

3C.Plotting Property Damage:

    ggplot(top_propdmg, aes(x = reorder(EVTYPE, -actualpropdmg), y = actualpropdmg)) +
     geom_bar(stat = "identity")+ 
     theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
     labs(title = "Property Damage by Event Type")+
     labs(x = "Event Type")+
     labs(y = "Property Damage (in Billions)")

3D.Plotting Crop Damage

     ggplot(top_cropdmg, aes(x = reorder(EVTYPE, -actualcropdmg), y = actualcropdmg)) +
       geom_bar(stat = "identity")+ 
       theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
       labs(title = "Crop Damage by Event Type")+
       labs(x = "Event Type")+
       labs(y = "Crop Damage (in Billions)")

4.Conclusion

The raw weather events or non-events contains patterns which can be used to reduce the number of events to the official forty-eight categories enumerated in the National Weather Service document provided with the course project instructions. We have found that FLOOD, HURRYCANE and TORNADO had maximum economic impact on the property damage and DROUGHT had major impact on the Crop Damage. Further analysis can be done by indvidually studying the trend and it’s impact for each event type.