The goal of the assignment is to explore the NOAA Storm Database and answer some basic questions about the effects of severe weather events.The analysis must probe which types of severe weather events are most harmful on;
1.Population Health - injuries and fatalities.
2.Economic Consequences - property and crops.
The events in the database covers the period from 1950 and ends in November 2011.
1.Download the dataset and point to a destination to avoid desktop short cut.
2.Extract the dataset into a dataframe.
3.Convert to data.table.
4.Changed working directory to “C:/Users/kumi/Desktop/repdata%2Fdata%2FStormData.csv.bz2”
library(data.table)
library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = paste0("/Users/kumi/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2'))
#Read csv data
path <-getwd()
stormDF <- read.csv("/Users/kumi/Desktop/repdata%2Fdata%2FStormData.csv.bz2")
#Convert data.frame to data.table
stormDT <-as.data.table(stormDF)
dim(stormDT)
## [1] 902297 37
sum(is.na(stormDT))
## [1] 1745947
mean(is.na(stormDT))
## [1] 0.05229737
names(stormDT)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
Focus on data where fatalities and injuries occured.
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
stormDTS <-select(stormDT,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)
head(stormDTS)
## EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO 0 15 25.0 K 0
## 2 TORNADO 0 0 2.5 K 0
## 3 TORNADO 0 2 25.0 K 0
## 4 TORNADO 0 2 2.5 K 0
## 5 TORNADO 0 2 2.5 K 0
## 6 TORNADO 0 6 2.5 K 0
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
levels(stormDTS$PROPDMGEXP)
## [1] "" "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
levels(stormDTS$CROPDMGEXP)
## [1] "" "?" "0" "2" "B" "k" "K" "m" "M"
#Change PROPDMGEXP alphanumeric exponents to numeric values
changePROPDMGEXP <-c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,
"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,"H" = 10^2,
"K" = 10^3,"M" = 10^6,"B" = 10^9)
#Change CROPDMGEXP alphanumeric exponents to numeric values
changeCROPDMGEXP <-c("\"\"" = 10^0,"?" = 10^0,"0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)
stormDTS[,PROPDMGEXP:=changePROPDMGEXP[as.numeric(stormDTS[,PROPDMGEXP])]]
stormDTS[is.na(PROPDMGEXP),PROPDMGEXP:=10^0]
stormDTS[,CROPDMGEXP:=changeCROPDMGEXP[as.numeric(stormDTS[,CROPDMGEXP])]]
stormDTS[is.na(CROPDMGEXP),CROPDMGEXP:=10^0]
library(data.table)
PC <-stormDTS$PROPDMG * stormDTS$PROPDMGEXP
CC <-stormDTS$CROPDMG * stormDTS$CROPDMGEXP
stormDTS <-stormDTS[,.(EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,PC,CROPDMG,CROPDMGEXP,CC)]
totalCostDTS <-stormDTS[,.(PC=sum(PC),CC=sum(CC),TotalCost=sum(PC)+sum(CC)),by=.(EVTYPE)]
#Arrange in descending order
totalCostDTS <-arrange(totalCostDTS,desc(TotalCost))
totalCostDTS$PC <-as.numeric(totalCostDTS$PC)
class(totalCostDTS$PC)
## [1] "numeric"
totalCostDTS$CC <-as.numeric(totalCostDTS$CC)
class(totalCostDTS$CC)
## [1] "numeric"
head(totalCostDTS,10)
## EVTYPE PC CC TotalCost
## 1 TORNADO 3.163481e+15 1.000185e+05 3.163481e+15
## 2 FLASH FLOOD 1.405838e+15 1.792005e+05 1.405838e+15
## 3 TSTM WIND 1.332758e+15 1.092026e+05 1.332758e+15
## 4 FLOOD 8.785298e+14 1.680379e+05 8.785298e+14
## 5 THUNDERSTORM WIND 8.740911e+14 6.679145e+04 8.740911e+14
## 6 HAIL 6.751068e+14 4.170006e+11 6.755238e+14
## 7 LIGHTNING 6.028593e+14 3.580610e+03 6.028593e+14
## 8 THUNDERSTORM WINDS 4.388250e+14 4.000019e+09 4.388290e+14
## 9 HIGH WIND 3.210463e+14 1.728321e+04 3.210463e+14
## 10 WINTER STORM 1.311573e+14 1.978990e+03 1.311573e+14
library(data.table)
totalInjuriesDTS <-stormDTS[,.(FATALITIES=sum(FATALITIES),INJURIES=sum(INJURIES),Totals=sum(FATALITIES)+sum(INJURIES)),by=.(EVTYPE)]
#Arrange in descending order
totalInjuriesDTS <-arrange(totalInjuriesDTS,desc(Totals))
head(totalInjuriesDTS,10)
## EVTYPE FATALITIES INJURIES Totals
## 1 TORNADO 5633 91346 96979
## 2 EXCESSIVE HEAT 1903 6525 8428
## 3 TSTM WIND 504 6957 7461
## 4 FLOOD 470 6789 7259
## 5 LIGHTNING 816 5230 6046
## 6 HEAT 937 2100 3037
## 7 FLASH FLOOD 978 1777 2755
## 8 ICE STORM 89 1975 2064
## 9 THUNDERSTORM WIND 133 1488 1621
## 10 WINTER STORM 206 1321 1527
As can be seen from the top totalInjuriesDTS table above.
#Plot the top ten most harmful events
names <- c("TORNADO","EXCESSIVE HEAT","TSTM WIND", "FLOOD", "LIGHTNING","HEAT","FLASH FLOOD","ICE STORM",
"THUNDERSTORM","WINTERSTORM")
barplot(head(totalInjuriesDTS$Totals,10),xlab="EVTYPE",ylab="Totals",
main="Total Injuries and Fatalities by EVTYPE",col="red",names.arg=names,cex.names=0.60)
As can be seen from the Economic Cost analysis above.
#Plot the events with the greatest economic consequencies
names <- c("TORNADO","FLASHFLOOD","TSTMWIND", "FLOOD", "THUNDERSTORMWIND","HAIL","LIGHTNING",
"THUNDERSTORMWINDS","HIGHWIND","WINTERSTORM")
barplot(head(totalCostDTS$TotalCost,10),xlab="Event Type",ylab="Total Cost",
main="Events causing Economic Consequencies",col="red",names.arg=names,cex.names=0.60)