The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.

Synopsis

This report is divided into two main sections namely data processing and results. In the data processing section the url of the data set is downloaded and read into R. The data set is processed and a new variable added with the name total damages. In the results section, graphical presentations are produced to answer two basic questions. First it shows that the most harmful event with respect to population health (fatalities and injuries) across the US is Tornado. The second plot shows that the types of events with the greatest economic consequences across the US for crop and property are Hail and Tornado repectively.

1.Data Processing

1.1 Load packages

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 4.2.2
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(reshape2)
## Warning: package 'reshape2' was built under R version 4.2.2

1.2 Load data file and process data file

url<- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
path<- setwd("C:/Users/user/Desktop/Cousera/5. Reproducible Research/Week 4/Project2")
download.file(url, "stormdata.csv", method = "curl" )
storm_data<- read.csv("stormdata.csv", header = TRUE, sep = ",")
dim(storm_data)
## [1] 902297     37
colnames(storm_data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

1.3 Select specific columns for data analysis

storm_data <- storm_data[ , c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "CROPDMG", "STATE", "BGN_DATE", "END_DATE")]

1.4 Process data variables

storm_data$BGN_DATE <- as.POSIXct(storm_data$BGN_DATE, format="%m/%d/%Y")
storm_data$END_DATE <- as.POSIXct(storm_data$END_DATE, format="%m/%d/%Y")
storm_data$EVTYPE <- as.factor(storm_data$EVTYPE)
storm_data$STATE <- as.factor(storm_data$STATE)
summary(storm_data)
##                EVTYPE         FATALITIES          INJURIES        
##  HAIL             :288661   Min.   :  0.0000   Min.   :   0.0000  
##  TSTM WIND        :219940   1st Qu.:  0.0000   1st Qu.:   0.0000  
##  THUNDERSTORM WIND: 82563   Median :  0.0000   Median :   0.0000  
##  TORNADO          : 60652   Mean   :  0.0168   Mean   :   0.1557  
##  FLASH FLOOD      : 54277   3rd Qu.:  0.0000   3rd Qu.:   0.0000  
##  FLOOD            : 25326   Max.   :583.0000   Max.   :1700.0000  
##  (Other)          :170878                                         
##     PROPDMG           CROPDMG            STATE       
##  Min.   :   0.00   Min.   :  0.000   TX     : 83728  
##  1st Qu.:   0.00   1st Qu.:  0.000   KS     : 53440  
##  Median :   0.00   Median :  0.000   OK     : 46802  
##  Mean   :  12.06   Mean   :  1.527   MO     : 35648  
##  3rd Qu.:   0.50   3rd Qu.:  0.000   IA     : 31069  
##  Max.   :5000.00   Max.   :990.000   NE     : 30271  
##                                      (Other):621339  
##     BGN_DATE                          END_DATE                     
##  Min.   :1950-01-03 00:00:00.000   Min.   :1986-04-10 00:00:00.00  
##  1st Qu.:1995-04-20 00:00:00.000   1st Qu.:2000-09-01 00:00:00.00  
##  Median :2002-03-18 00:00:00.000   Median :2005-04-30 00:00:00.00  
##  Mean   :1998-12-27 23:37:48.996   Mean   :2004-09-26 04:11:27.33  
##  3rd Qu.:2007-07-28 00:00:00.000   3rd Qu.:2008-08-10 00:00:00.00  
##  Max.   :2011-11-30 00:00:00.000   Max.   :2011-11-30 00:00:00.00  
##                                    NA's   :243411

1.5 Add new variable to dataset

storm_data$TOTALDMG <- storm_data$PROPDMG+storm_data$CROPDMG
str(storm_data)
## 'data.frame':    902297 obs. of  9 variables:
##  $ EVTYPE    : Factor w/ 985 levels "   HIGH SURF ADVISORY",..: 834 834 834 834 834 834 834 834 834 834 ...
##  $ FATALITIES: num  0 0 0 0 0 0 0 0 1 0 ...
##  $ INJURIES  : num  15 0 2 2 2 6 1 0 14 0 ...
##  $ PROPDMG   : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...
##  $ CROPDMG   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ STATE     : Factor w/ 72 levels "AK","AL","AM",..: 2 2 2 2 2 2 2 2 2 2 ...
##  $ BGN_DATE  : POSIXct, format: "1950-04-18" "1950-04-18" ...
##  $ END_DATE  : POSIXct, format: NA NA ...
##  $ TOTALDMG  : num  25 2.5 25 2.5 2.5 2.5 2.5 2.5 25 25 ...

2. Results

2.1 fatalities

fatalities <- storm_data %>% group_by(EVTYPE) %>% summarise(totalFatalities = sum(FATALITIES)) %>% arrange(desc(totalFatalities)) %>% head(n=10)
View(fatalities)
plot1<-ggplot(fatalities, aes(x = reorder(EVTYPE, -totalFatalities), y = totalFatalities)) + 
  geom_bar(stat="identity", fill="red") +
  ggtitle("Graph: Fatalities by type of catastrophe") + 
  xlab("") +
  ylab("Fatalities") +
  ylim(0,100000)+
  theme(text=element_text(size=10), 
        axis.text.x = element_text(angle=90, hjust=1))

2.2 Injuries

injuries <- storm_data %>% group_by(EVTYPE) %>% summarise(totalInjuries = sum(INJURIES)) %>% arrange(desc(totalInjuries)) %>% head(n=10)
View(injuries)
plot2<-ggplot(injuries, aes(x = reorder(EVTYPE, -totalInjuries), y = totalInjuries) ) + 
  geom_bar(stat="identity", fill="green") + 
  ggtitle("Graph: Injuries by type of catastrophe") + 
  xlab("") +
  ylab("Injuries") +
  ylim(0,100000)+
  theme(text=element_text(size=10),
        axis.text.x = element_text(angle=90, hjust=1))

2.3 Set up multiple plots on a page

grid.arrange(plot1, plot2, ncol=2)

2.4 Economic impact of events

economic <- storm_data %>% select(EVTYPE, PROPDMG, CROPDMG, TOTALDMG) %>% 
  group_by(EVTYPE) %>% 
  summarise(property_damage = sum(PROPDMG)/1000000, crop_damage = sum(CROPDMG)/1000000, total_damage = sum(TOTALDMG)/1000000 ) %>% 
  arrange(desc(total_damage)) %>% head(n=10)

2.5 Property damage

plot3<-ggplot(economic, aes(x = reorder(EVTYPE, -property_damage), y = property_damage) ) + 
  geom_bar(stat="identity", fill="green") + 
  ggtitle("Cost of Property Damage") + 
  xlab("Event type") + 
  ylab("Cost in millions") +
  ylim(0,4)+
  theme(text=element_text(size=10),
        axis.text.x = element_text(angle=90, hjust=1))

2.6 Crop damage

plot4<-ggplot(economic, aes(x = reorder(EVTYPE, -crop_damage), y = crop_damage) ) + 
  geom_bar(stat="identity", fill="red") + 
  ggtitle("Cost of Crop Damage") + 
  xlab("Event type") + 
  ylab("Cost in millions") +
  ylim(0,4)+
  theme(text=element_text(size=10),
        axis.text.x = element_text(angle=90, hjust=1))

2.7 Total Economic damage

plot5<-ggplot(economic, aes(x = reorder(EVTYPE, -total_damage), y = total_damage) ) + 
  geom_bar(stat="identity", fill="blue") + 
  ggtitle("Economic Impact of Events") + 
  xlab("Event type") + 
  ylab("Cost in millions") +
  ylim(0,4)+
  theme(text=element_text(size=10),
        axis.text.x = element_text(angle=90, hjust=1))

2.8 Set up multiple plots on a page

grid.arrange(plot3, plot4, plot5, ncol=3)