The basic goal of this assignment is to explore the NOAA Storm Database and answer some basic questions about severe weather events.
The following analysis investigates Fatalities to:
First, Download and extract raw data.
#importing library for plot
library(ggplot2)
Url<-"https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
#Downloading the Data
download.file(Url,destfile = "storm_data.csv")
storm_data<-read.csv("storm_data.csv")
head(storm_data)
colnames(storm_data)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Extracting Relevant columns that contributes to the events.
storm_event<-storm_data[, c("BGN_DATE", "EVTYPE", "FATALITIES", "INJURIES", "PROPDMG", "PROPDMGEXP", "CROPDMG", "CROPDMGEXP")]
summary(storm_event)
## BGN_DATE EVTYPE FATALITIES INJURIES
## Length:902297 Length:902297 Min. : 0.0000 Min. : 0.0000
## Class :character Class :character 1st Qu.: 0.0000 1st Qu.: 0.0000
## Mode :character Mode :character Median : 0.0000 Median : 0.0000
## Mean : 0.0168 Mean : 0.1557
## 3rd Qu.: 0.0000 3rd Qu.: 0.0000
## Max. :583.0000 Max. :1700.0000
## PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## Min. : 0.00 Length:902297 Min. : 0.000 Length:902297
## 1st Qu.: 0.00 Class :character 1st Qu.: 0.000 Class :character
## Median : 0.00 Mode :character Median : 0.000 Mode :character
## Mean : 12.06 Mean : 1.527
## 3rd Qu.: 0.50 3rd Qu.: 0.000
## Max. :5000.00 Max. :990.000
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Calculating Total Fatalities and Injuries
Total_injuries<-storm_event %>% group_by(EVTYPE) %>% summarise(FATALITIES = sum(FATALITIES),
INJURIES = sum(INJURIES), totals = sum(FATALITIES) + sum(INJURIES))
Total_injuries<-Total_injuries[order(-Total_injuries$FATALITIES),]
head(Total_injuries,5)
Getting the top 5 rows in descending order.
library(reshape)
## Warning: package 'reshape' was built under R version 4.3.2
##
## Attaching package: 'reshape'
## The following object is masked from 'package:dplyr':
##
## rename
New_data<-as.data.frame(head(Total_injuries,5))
#Reshaping the dataset for plot
df<-melt(New_data, id.vars="EVTYPE")
colnames(df)
## [1] "EVTYPE" "variable" "value"
# Create chart
ggplot(df,aes(x=reorder(EVTYPE,-value),y=value, fill=variable))+
geom_bar( stat = "identity",position="dodge")+ylab("Frequency Count")+ theme(plot.title = element_text(hjust = 0.5))+xlab("Event Type")+theme(axis.text.x = element_text(angle=45, hjust=1))+
ggtitle("Top 5 US Calamities") + theme(plot.title = element_text(hjust = 0.5))
unique(storm_event$PROPDMGEXP)
## [1] "K" "M" "" "B" "m" "+" "0" "5" "6" "?" "4" "2" "3" "h" "7" "H" "-" "1" "8"
unique(storm_event$CROPDMGEXP)
## [1] "" "M" "K" "m" "B" "?" "0" "k" "2"
Converting Exponent Columns into Actual Values.
# Map property damage alphanumeric exponents to numeric values.
storm_event$PROPDMGEXP_number <- recode(storm_event$PROPDMGEXP,
" " = 10^0,
"-" = 10^0,
"+" = 10^0,
"0" = 10^0,
"1" = 10^1,
"2" = 10^2,
"3" = 10^3,
"4" = 10^4,
"5" = 10^5,
"6" = 10^6,
"7" = 10^7,
"8" = 10^8,
"9" = 10^9,
"H" = 10^2,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9,
.default = 10^0)
# Map crop damage alphanumeric exponents to numeric values
storm_event$CROPDMGEXP_number <- recode(storm_event$CROPDMGEXP,
" " = 10^0,
"?" = 10^0,
"0" = 10^0,
"K" = 10^3,
"M" = 10^6,
"B" = 10^9,
.default = 10^0)
#Cost of Damaged Property and Crop
storm_event$PropCost<-storm_event$PROPDMG * storm_event$PROPDMGEXP_number
storm_event$CropCost<-storm_event$CROPDMG * storm_event$CROPDMGEXP_number
Calculating total cost of health consequences.
#Calculating Total Property and Crop Cost
TotalCost<-storm_event %>% group_by(EVTYPE) %>% summarise(PropCost = sum(PropCost),
CropCost = sum(CropCost), total_cost = sum(PropCost) + sum(CropCost))
TotalCost<-TotalCost[order(-TotalCost$total_cost),]
New_cost<-as.data.frame(head(TotalCost,5))
df_cost<-melt(New_cost, id.vars="EVTYPE")
colnames(df_cost)
## [1] "EVTYPE" "variable" "value"
# Create chart
ggplot(df_cost,aes(x=reorder(EVTYPE,-value),y=value, fill=variable))+
geom_bar( stat = "identity",position="dodge")+ylab("Frequency Count")+ theme(plot.title = element_text(hjust = 0.5))+xlab("Event Type")+theme(axis.text.x = element_text(angle=45, hjust=1)) + ggtitle("Top 5 US Storm Events causing Economic Consequences")+
theme(plot.title = element_text(hjust = 0.5))