Synopsis

The goal of the assignment is to explore the NOAA Storm Database and answer some basic questions about the effects of severe weather events.The analysis must probe which types of severe weather events are most harmful on;

1.Population Health - injuries and fatalities.

2.Economic Consequences - property and crops.

The events in the database covers the period from 1950 and ends in November 2011.

Data Processing and Data loading

1.Download the dataset and point to a destination to avoid desktop short cut.

2.Extract the dataset into a dataframe.

3.Convert to data.table.

4.Changed working directory to “C:/Users/kumi/Desktop/repdata%2Fdata%2FStormData.csv.bz2”

Packages to be used

library(data.table)
library(ggplot2)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
fileUrl <- "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2"
download.file(fileUrl, destfile = paste0("/Users/kumi/Desktop", '/repdata%2Fdata%2FStormData.csv.bz2'))

#Read csv data
path <-getwd()
stormDF <- read.csv("/Users/kumi/Desktop/repdata%2Fdata%2FStormData.csv.bz2")


#Convert data.frame to data.table
stormDT <-as.data.table(stormDF)


dim(stormDT)
## [1] 902297     37
sum(is.na(stormDT))
## [1] 1745947
mean(is.na(stormDT))
## [1] 0.05229737

Check the column names

names(stormDT)
##  [1] "STATE__"    "BGN_DATE"   "BGN_TIME"   "TIME_ZONE"  "COUNTY"    
##  [6] "COUNTYNAME" "STATE"      "EVTYPE"     "BGN_RANGE"  "BGN_AZI"   
## [11] "BGN_LOCATI" "END_DATE"   "END_TIME"   "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE"  "END_AZI"    "END_LOCATI" "LENGTH"     "WIDTH"     
## [21] "F"          "MAG"        "FATALITIES" "INJURIES"   "PROPDMG"   
## [26] "PROPDMGEXP" "CROPDMG"    "CROPDMGEXP" "WFO"        "STATEOFFIC"
## [31] "ZONENAMES"  "LATITUDE"   "LONGITUDE"  "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS"    "REFNUM"

Select columns relevant for the analysis

Focus on data where fatalities and injuries occured.

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.5.1
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
stormDTS <-select(stormDT,EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,CROPDMG,CROPDMGEXP)

head(stormDTS)
##    EVTYPE FATALITIES INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP
## 1 TORNADO          0       15    25.0          K       0           
## 2 TORNADO          0        0     2.5          K       0           
## 3 TORNADO          0        2    25.0          K       0           
## 4 TORNADO          0        2     2.5          K       0           
## 5 TORNADO          0        2     2.5          K       0           
## 6 TORNADO          0        6     2.5          K       0

Correcting columns with exponents(PROPDMGEXP and CROPDMGEXP)

library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
levels(stormDTS$PROPDMGEXP)
##  [1] ""  "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K"
## [18] "m" "M"
levels(stormDTS$CROPDMGEXP)
## [1] ""  "?" "0" "2" "B" "k" "K" "m" "M"
#Change PROPDMGEXP alphanumeric exponents to numeric values
changePROPDMGEXP <-c("\"\"" = 10^0,"-" = 10^0,"+" = 10^0,"0" = 10^0,"1" = 10^1,"2" = 10^2,
"3" = 10^3,"4" = 10^4,"5" = 10^5,"6" = 10^6,"7" = 10^7,"8" = 10^8,"9" = 10^9,"H" = 10^2,
"K" = 10^3,"M" = 10^6,"B" = 10^9)

#Change CROPDMGEXP alphanumeric exponents to numeric values
changeCROPDMGEXP <-c("\"\"" = 10^0,"?" = 10^0,"0" = 10^0,"K" = 10^3,"M" = 10^6,"B" = 10^9)

stormDTS[,PROPDMGEXP:=changePROPDMGEXP[as.numeric(stormDTS[,PROPDMGEXP])]]
stormDTS[is.na(PROPDMGEXP),PROPDMGEXP:=10^0]

stormDTS[,CROPDMGEXP:=changeCROPDMGEXP[as.numeric(stormDTS[,CROPDMGEXP])]]
stormDTS[is.na(CROPDMGEXP),CROPDMGEXP:=10^0]

Economic cost columns

library(data.table)

PC <-stormDTS$PROPDMG * stormDTS$PROPDMGEXP
CC <-stormDTS$CROPDMG * stormDTS$CROPDMGEXP
stormDTS <-stormDTS[,.(EVTYPE,FATALITIES,INJURIES,PROPDMG,PROPDMGEXP,PC,CROPDMG,CROPDMGEXP,CC)]

Total Cost (Property and Crops)

totalCostDTS <-stormDTS[,.(PC=sum(PC),CC=sum(CC),TotalCost=sum(PC)+sum(CC)),by=.(EVTYPE)]

#Arrange in descending order
totalCostDTS <-arrange(totalCostDTS,desc(TotalCost))
totalCostDTS$PC <-as.numeric(totalCostDTS$PC)
class(totalCostDTS$PC)
## [1] "numeric"
totalCostDTS$CC <-as.numeric(totalCostDTS$CC)
class(totalCostDTS$CC)
## [1] "numeric"
head(totalCostDTS,10)
##                EVTYPE           PC           CC    TotalCost
## 1             TORNADO 3.163481e+15 1.000185e+05 3.163481e+15
## 2         FLASH FLOOD 1.405838e+15 1.792005e+05 1.405838e+15
## 3           TSTM WIND 1.332758e+15 1.092026e+05 1.332758e+15
## 4               FLOOD 8.785298e+14 1.680379e+05 8.785298e+14
## 5   THUNDERSTORM WIND 8.740911e+14 6.679145e+04 8.740911e+14
## 6                HAIL 6.751068e+14 4.170006e+11 6.755238e+14
## 7           LIGHTNING 6.028593e+14 3.580610e+03 6.028593e+14
## 8  THUNDERSTORM WINDS 4.388250e+14 4.000019e+09 4.388290e+14
## 9           HIGH WIND 3.210463e+14 1.728321e+04 3.210463e+14
## 10       WINTER STORM 1.311573e+14 1.978990e+03 1.311573e+14

Total Fatalities and Injuries

library(data.table)

totalInjuriesDTS <-stormDTS[,.(FATALITIES=sum(FATALITIES),INJURIES=sum(INJURIES),Totals=sum(FATALITIES)+sum(INJURIES)),by=.(EVTYPE)]

#Arrange in descending order
totalInjuriesDTS <-arrange(totalInjuriesDTS,desc(Totals))
head(totalInjuriesDTS,10)
##               EVTYPE FATALITIES INJURIES Totals
## 1            TORNADO       5633    91346  96979
## 2     EXCESSIVE HEAT       1903     6525   8428
## 3          TSTM WIND        504     6957   7461
## 4              FLOOD        470     6789   7259
## 5          LIGHTNING        816     5230   6046
## 6               HEAT        937     2100   3037
## 7        FLASH FLOOD        978     1777   2755
## 8          ICE STORM         89     1975   2064
## 9  THUNDERSTORM WIND        133     1488   1621
## 10      WINTER STORM        206     1321   1527

Results

Most Harmful events to Population Health

As can be seen from the top totalInjuriesDTS table above.

#Plot the top ten most harmful events
names <- c("TORNADO","EXCESSIVE HEAT","TSTM WIND", "FLOOD", "LIGHTNING","HEAT","FLASH FLOOD","ICE STORM",
"THUNDERSTORM","WINTERSTORM")

barplot(head(totalInjuriesDTS$Totals,10),xlab="EVTYPE",ylab="Totals",
main="Total Injuries and Fatalities by EVTYPE",col="red",names.arg=names,cex.names=0.60)

Types of events with the greatest Economic Consequencies

As can be seen from the Economic Cost analysis above.

#Plot the events with the greatest economic consequencies
names <- c("TORNADO","FLASHFLOOD","TSTMWIND", "FLOOD", "THUNDERSTORMWIND","HAIL","LIGHTNING",
"THUNDERSTORMWINDS","HIGHWIND","WINTERSTORM")

barplot(head(totalCostDTS$TotalCost,10),xlab="Event Type",ylab="Total Cost",
main="Events causing Economic Consequencies",col="red",names.arg=names,cex.names=0.60)