1. Synopsis

The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.

The following analysis investigates Fatalities to:

2. Across the United States, which types of events are most harmful with respect to population health?

2.1. Data Processing

First, Download and extract raw data.

#importing library for plot
library(ggplot2)
Url<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#Downloading the Data
download.file(Url,destfile = "storm_data.csv")
storm_data<-read.csv("storm_data.csv")
head(storm_data)

2.2. Examining Column Names

colnames(storm_data)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

2.3. Data Subsetting

Extracting Relevant columns that contributes to the events.

storm_event<-storm_data[, c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
summary(storm_event)
##    BGN_DATE            EVTYPE            FATALITIES          INJURIES        
##  Length:902297      Length:902297      Min.   :  0.0000   Min.   :   0.0000  
##  Class :character   Class :character   1st Qu.:  0.0000   1st Qu.:   0.0000  
##  Mode  :character   Mode  :character   Median :  0.0000   Median :   0.0000  
##                                        Mean   :  0.0168   Mean   :   0.1557  
##                                        3rd Qu.:  0.0000   3rd Qu.:   0.0000  
##                                        Max.   :583.0000   Max.   :1700.0000  
##     PROPDMG         PROPDMGEXP           CROPDMG         CROPDMGEXP       
##  Min.   :   0.00   Length:902297      Min.   :  0.000   Length:902297     
##  1st Qu.:   0.00   Class :character   1st Qu.:  0.000   Class :character  
##  Median :   0.00   Mode  :character   Median :  0.000   Mode  :character  
##  Mean   :  12.06                      Mean   :  1.527                     
##  3rd Qu.:   0.50                      3rd Qu.:  0.000                     
##  Max.   :5000.00                      Max.   :990.000

2.4. Events that contributes to most injuries and Fatalities

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Calculating Total Fatalities and Injuries
Total_injuries<-storm_event %>% group_by(EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES), 
            INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES))

Total_injuries<-Total_injuries[order(-Total_injuries$FATALITIES),]
head(Total_injuries,5)

2.5. Melting/reshaping data so that it is easier to put in bar graph format

Getting the top 5 rows in descending order.

library(reshape)
## Warning: package 'reshape' was built under R version 4.3.2
## 
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
## 
##     rename
New_data<-as.data.frame(head(Total_injuries,5))
#Reshaping the dataset for plot
df<-melt(New_data, id.vars="EVTYPE")
colnames(df)
## [1] "EVTYPE"   "variable" "value"

2.6. Results

# Create chart
ggplot(df,aes(x=reorder(EVTYPE,-value),y=value, fill=variable))+
    geom_bar( stat = "identity",position="dodge")+ylab("Frequency Count")+ theme(plot.title = element_text(hjust = 0.5))+xlab("Event Type")+theme(axis.text.x = element_text(angle=45, hjust=1))+
  ggtitle("Top 5 US Calamities") + theme(plot.title = element_text(hjust = 0.5))

3. Which types of events have the greatest economic consequences in US?

unique(storm_event$PROPDMGEXP)
##  [1] "K" "M" ""  "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(storm_event$CROPDMGEXP)
## [1] ""  "M" "K" "m" "B" "?" "0" "k" "2"

3.1. Subset the dataset on the parameters of interest.

Converting Exponent Columns into Actual Values.

# Map property damage alphanumeric exponents to numeric values.
storm_event$PROPDMGEXP_number <-  recode(storm_event$PROPDMGEXP,
                 " " = 10^0,
                 "-" = 10^0, 
                 "+" = 10^0,
                 "0" = 10^0,
                 "1" = 10^1,
                 "2" = 10^2,
                 "3" = 10^3,
                 "4" = 10^4,
                 "5" = 10^5,
                 "6" = 10^6,
                 "7" = 10^7,
                 "8" = 10^8,
                 "9" = 10^9,
                 "H" = 10^2,
                 "K" = 10^3,
                 "M" = 10^6,
                 "B" = 10^9,
                 .default = 10^0)

# Map crop damage alphanumeric exponents to numeric values
storm_event$CROPDMGEXP_number <-  recode(storm_event$CROPDMGEXP,
                                  " " = 10^0,
                                  "?" = 10^0, 
                                  "0" = 10^0,
                                  "K" = 10^3,
                                  "M" = 10^6,
                                  "B" = 10^9,
                                  .default = 10^0)

#Cost of Damaged Property and Crop 
storm_event$PropCost<-storm_event$PROPDMG * storm_event$PROPDMGEXP_number
storm_event$CropCost<-storm_event$CROPDMG * storm_event$CROPDMGEXP_number

3.2. Events that contributes to Population Health

Calculating total cost of health consequences.

#Calculating Total Property and Crop Cost
TotalCost<-storm_event %>% group_by(EVTYPE) %>% summarise(PropCost = sum(PropCost), 
            CropCost = sum(CropCost), total_cost = sum(PropCost) + sum(CropCost))

TotalCost<-TotalCost[order(-TotalCost$total_cost),]

3.3. Melting data so that it is easier to put in bar graph format

New_cost<-as.data.frame(head(TotalCost,5))
df_cost<-melt(New_cost, id.vars="EVTYPE")
colnames(df_cost)
## [1] "EVTYPE"   "variable" "value"

3.4. Results

# Create chart
ggplot(df_cost,aes(x=reorder(EVTYPE,-value),y=value, fill=variable))+
    geom_bar( stat = "identity",position="dodge")+ylab("Frequency Count")+ theme(plot.title = element_text(hjust = 0.5))+xlab("Event Type")+theme(axis.text.x = element_text(angle=45, hjust=1)) + ggtitle("Top 5 US Storm Events causing Economic Consequences")+
  theme(plot.title = element_text(hjust = 0.5))