1.Synopsys

This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage

The events in the database start in the year 1950 and end in November 2011.The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events. The data analysis addreses two key aspects, One, Population health measured in terms of Fatalities & Injuries and the second Economic consequences measured in terms of Fatalities & Injuries

Our Analysis is done using R statistical Package abd shows that, Across the United States, TORNADO and EXCESSIVE HEAT had major impact on Fatalities and TORNADO was the major contributor of Injuries.Events such as FLOOD, HURRYCANE and TORNADO had maximum economic impact on the property damage and DROUGHT had major impact on the Crop Damage. Our reccomendation is to implement Early warning systems for monitorning of the key events which is consequntial for the health and economic impact

2.Data Processing

A.Data Source: link:https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2

B.Data Dictionary-National Weather Service Storm Data:
link:https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2Fpd01016005curr.pdf

C.National Climatic Data Center Storm Events FAQ link:https://d396qusza40orc.cloudfront.net/repdata%2Fpeer2_doc%2FNCDC%20Storm%20Events-FAQ%20Page.pdf

Descrption for Data Transformation:

-Raw data from the above source is transformed to our Analysis by summarizing the data by Event Type -Top 10 events causing the maximum damage is considered for the analysis
-Further refinement can be done to deep dive in to each event type, it’s trend and impact over the years

Loading the CSV file to R

loadData <- function() {
  if(file.exists("StormData.rds")) {
    loadRDS("raw_data.rds")
  }
  
  if(!file.exists("StormData.rds.bz2")) {
    download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "StormData.csv.bz2", method = "curl")
  }
  
  df <- read.csv("StormData.csv.bz2")
  saveRDS(df, "raw_data.rds")
  
  df
}

raw_data <- loadData()

2A-Data Analysis for FATALITIES by Event Type

library(dplyr)

# Aggregate Data by Event Type for Fatalities
Data_fatalities_aggregate<-aggregate(list(FATALITIES=raw_data$FATALITIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)

# Arrange Data in Descending order
Data_fatalities_descending <-arrange(Data_fatalities_aggregate,desc(FATALITIES))

library(sqldf)
attach(Data_fatalities_descending)

Data_fatalities_aggregate_nonzero <- sqldf("select * from Data_fatalities_descending where FATALITIES>0")
Data_fatalities_aggregate_nonzero_top10 <- head(Data_fatalities_aggregate_nonzero,n=10)

2B-Data Analysis for INJURIES by Event Type

Data_injuries_aggregate<-aggregate(list(INJURIES=raw_data$INJURIES), by=list(EVTYPE=raw_data$EVTYPE),sum,na.rm = TRUE)


# Data Injuries Desending
Data_injuries_descending <-arrange(Data_injuries_aggregate,desc(INJURIES))


# Data Injuries remove non zero
Data_injuries_aggregate_nonzero <- sqldf("select * from Data_injuries_descending where INJURIES>0")

#Select Top10 Injury Events
Data_injuries_aggregate_nonzero_top10 <- head(Data_injuries_aggregate_nonzero,n=10)

2C-Data Analysis for PROPERTY DAMAGE by Event Type

library(plyr)
library(dplyr)
library(ggplot2)
library(gridExtra)
Data_economic_impact <- select(raw_data,COUNTYNAME,STATE,EVTYPE,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)

  propdmg <- Data_economic_impact[raw_data$PROPDMG>0,] # filter out Data without any Property damage from the Dataset
  
  for (i in 1:nrow(propdmg)){
    tmp <- propdmg$PROPDMGEXP[i]
    mul <- propdmg$PROPDMG[i]
    if (tmp  %in% c('h', 'H'))
      propdmg$actualpropdmg[i] <- mul * 100
    else if (tmp %in% c('k', 'K'))
      propdmg$actualpropdmg[i] <- mul * 1000
    else if (tmp %in% c('m', 'M'))
      propdmg$actualpropdmg[i] <- mul * 1000000
    else if (tmp %in% c('b', 'B'))
      propdmg$actualpropdmg[i] <- mul * 1000000000
  }

  propdmgSum <- aggregate(list(actualpropdmg=propdmg$actualpropdmg), by=list(EVTYPE=propdmg$EVTYPE),sum,na.rm = TRUE)
  
# Arrange data in descending order
  
  Propdmgsum_descending <-arrange(propdmgSum,desc(actualpropdmg))
  
# Select top10 events with highest property damage
  
   top_propdmg <- head(Propdmgsum_descending,10)

2D-Data Analysis for PROPERTY DAMAGE by Event Type

   cropdmg <- Data_economic_impact[raw_data$CROPDMG>0,] # filter out Data without any Property damage from the Dataset
   
   for (i in 1:nrow(cropdmg)){
     tmp <- cropdmg$CROPDMGEXP[i]
     mul <- cropdmg$CROPDMG[i]
     if (tmp  %in% c('h', 'H'))
       cropdmg$actualcropdmg[i] <- mul * 100
     else if (tmp %in% c('k', 'K'))
       cropdmg$actualcropdmg[i] <- mul * 1000
     else if (tmp %in% c('m', 'M'))
       cropdmg$actualcropdmg[i] <- mul * 1000000
     else if (tmp %in% c('b', 'B'))
       cropdmg$actualcropdmg[i] <- mul * 1000000000
   }
   
   library(plyr)
   #cropdmgSum <- ddply(propdmg, .(EVTYPE), summarize, propdmg = sum(actualpropdmg))
   
   cropdmgSum <- aggregate(list(actualcropdmg=cropdmg$actualcropdmg), by=list(EVTYPE=cropdmg$EVTYPE),sum,na.rm = TRUE)
   
   # Arrange data in descending order
   
   cropdmgsum_descending <-arrange(cropdmgSum,desc(actualcropdmg))
   
   # Select top10 events with highest property damage
   
   top_cropdmg <- head(cropdmgsum_descending,10)

3.RESULTS by Event Type for Population Health & Economic Consequences

3A.Plotting Fatalities & Injuries

#Plotting the Data
Fatalities_plot<- ggplot(Data_fatalities_aggregate_nonzero_top10, aes(x = reorder(EVTYPE, -FATALITIES), y = FATALITIES)) +
  geom_bar(stat = "identity")+ 
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
  labs(title = "Fatalities by Event Type")+
  labs(x = "Event Type")
Injuries_plot<-ggplot(Data_injuries_aggregate_nonzero_top10, aes(x = reorder(EVTYPE, -INJURIES), y = INJURIES)) +
  geom_bar(stat = "identity")+ 
  theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
  labs(title = "Injuries by Event Type")+
  labs(x = "Event Type")
grid.arrange(Fatalities_plot, Injuries_plot, ncol=2)

Key Take-aways from the Analysis

  1. Tornados and excessive heat and Flashfoods are major causes of fatalities
  2. Major injuries are caused by Tronado and Flooding
  3. It is important to invest in an ‘Early warning system’ for safety of the people from Tornado and Flooding

3B.Plotting Property Damage & Crop Damage:

Property_damage_plot <- ggplot(top_propdmg, aes(x = reorder(EVTYPE, -actualpropdmg), y = actualpropdmg)) +
     geom_bar(stat = "identity")+ 
     theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
     labs(title = "Property Damage by Event Type")+
     labs(x = "Event Type")+
     labs(y = "Property Damage (in Billions)")

Crop_damage_plot <- ggplot(top_cropdmg, aes(x = reorder(EVTYPE, -actualcropdmg), y = actualcropdmg)) +
       geom_bar(stat = "identity")+ 
       theme(axis.text.x=element_text(angle=90,hjust=1,vjust=0.5))+
       labs(title = "Crop Damage by Event Type")+
       labs(x = "Event Type")+
       labs(y = "Crop Damage (in Billions)")
grid.arrange(Property_damage_plot,Crop_damage_plot, ncol=2)

Key Take-aways from the Analysis

  1. Flood, Hurrycane and Tornados are major causes of Property Damage
  2. Major crop damages are caused by Drought
  3. Further analysis is to be done to reveal the pattern of above major causes and it’s annual impact

4.Conclusion

The raw weather events or non-events contains patterns which can be used to reduce the number of events to the official forty-eight categories enumerated in the National Weather Service document provided with the course project instructions. We have found that FLOOD, HURRYCANE and TORNADO had maximum economic impact on the property damage and DROUGHT had major impact on the Crop Damage. Further analysis can be done by indvidually studying the trend and it’s impact for each event type.