This project evaluate population health and economic impact by disaster events. Introduction
Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Data Processing First,we read the data, and pick the relevent columns
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
storm<-read.csv("C:/Users/hplaptop-25/Downloads/repdata-data-StormData.csv.bz2")
names(storm)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
s<-storm[,c(8,23:24)]
t<-storm[,c(8,25:28)]
rm(storm)
head(s)
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 0 15
## 2 TORNADO 0 0
## 3 TORNADO 0 2
## 4 TORNADO 0 2
## 5 TORNADO 0 2
## 6 TORNADO 0 6
Next, we sum up fatalities and injuries totals to assess the harm that different events caused with respect to population health. We take the top 15 harmful events.
Results
s1<-aggregate(cbind(FATALITIES,INJURIES) ~ EVTYPE, data = s, sum, na.rm=TRUE)
rm(s)
s1<-arrange(s1, desc(FATALITIES+INJURIES))
s1<-s1[1:15,]
s1
## EVTYPE FATALITIES INJURIES
## 1 TORNADO 5633 91346
## 2 EXCESSIVE HEAT 1903 6525
## 3 TSTM WIND 504 6957
## 4 FLOOD 470 6789
## 5 LIGHTNING 816 5230
## 6 HEAT 937 2100
## 7 FLASH FLOOD 978 1777
## 8 ICE STORM 89 1975
## 9 THUNDERSTORM WIND 133 1488
## 10 WINTER STORM 206 1321
## 11 HIGH WIND 248 1137
## 12 HAIL 15 1361
## 13 HURRICANE/TYPHOON 64 1275
## 14 HEAVY SNOW 127 1021
## 15 WILDFIRE 75 911
n <- s1$EVTYPE
s2 <- as.matrix(t(s1[,-1]))
colnames(s2) <- n
par(las=2,mar=c(6,4,1,1))
barplot(s2, col=c("dark blue","red"),main="Disaster Casualties",cex.names = 0.6,cex.axis = 0.6)
legend("topright",c("Fatalities","Injuries"),fill=c("dark blue","red"),bty = "n")
The Barplot ranks top 15 disaster events that causes most population health harms
Data Processing We check the values for PROPDMGEXP, CROPDMGEXP.
table(t$PROPDMGEXP)
##
## - ? + 0 1 2 3 4 5
## 465934 1 8 5 216 25 13 4 4 28
## 6 7 8 B h H K m M
## 4 5 1 40 1 6 424665 7 11330
table(t$CROPDMGEXP)
##
## ? 0 2 B k K m M
## 618413 7 19 1 9 21 281832 1 1994
Assign NA values with level “O” Assign numeric valus to the multiplier, H<-100,K<-1000, M<-1e6, B<- 1e9
t$propFactor<-factor(t$PROPDMGEXP,levels=c("H","K","M","B","h","m","O"))
t$propFactor[is.na(t$propFactor)] <- "O"
table(t$propFactor)
##
## H K M B h m O
## 6 424665 11330 40 1 7 466248
t$cropFactor<-factor(t$CROPDMGEXP,levels=c("K","M","B","k","m","O"))
t$cropFactor[is.na(t$cropFactor)] <- "O"
table(t$cropFactor)
##
## K M B k m O
## 281832 1994 9 21 1 618440
t<- mutate(t,PROP= 0, CROP=0)
t$PROP[t$propFactor=="K"]<-1000
t$PROP[t$propFactor=="H"|t$propFactor=="h"]<-100
t$PROP[t$propFactor=="M"|t$propFactor=="m"]<-1e6
t$PROP[t$propFactor=="B"]<-1e9
t$PROP[t$propFactor=="O"]<-1
t$CROP[t$cropFactor=="K"|t$cropFactor=="k"]<-1000
t$CROP[t$cropFactor=="M"|t$cropFactor=="m"]<-1e6
t$CROP[t$cropFactor=="B"]<-1e9
t$CROP[t$cropFactor=="O"]<-1
Multiply PROPDMG, CROPDMG with the multipliers. Sum the total damage group by different events.
Results
t<- mutate(t,PROPdmgVal= PROPDMG*PROP/1e6, CROPdmgVal=CROPDMG*CROP/1e6)
#t2<-aggregate(cbind(PROPdmgVal,CROPdmgVal) ~ EVTYPE, data = t, sum, na.rm=TRUE)
t2 <- t %>%
group_by(EVTYPE) %>%
summarize(PROPdmgVal=sum(PROPdmgVal,na.rm=TRUE),CROPdmgVal=sum(CROPdmgVal,na.rm=TRUE))
t2<-arrange(t2,desc(PROPdmgVal+CROPdmgVal))
t2<-t2[1:15,]
t2
## # A tibble: 15 × 3
## EVTYPE PROPdmgVal CROPdmgVal
## <fctr> <dbl> <dbl>
## 1 FLOOD 144657.710 5661.9685
## 2 HURRICANE/TYPHOON 69305.840 2607.8728
## 3 TORNADO 56937.161 414.9533
## 4 STORM SURGE 43323.536 0.0050
## 5 HAIL 15732.268 3025.9545
## 6 FLASH FLOOD 16140.812 1421.3171
## 7 DROUGHT 1046.106 13972.5660
## 8 HURRICANE 11868.319 2741.9100
## 9 RIVER FLOOD 5118.945 5029.4590
## 10 ICE STORM 3944.928 5022.1135
## 11 TROPICAL STORM 7703.891 678.3460
## 12 WINTER STORM 6688.497 26.9440
## 13 HIGH WIND 5270.046 638.5713
## 14 WILDFIRE 4765.114 295.4728
## 15 TSTM WIND 4484.928 554.0073
rm(t)
n <- t2$EVTYPE
t3 <- as.matrix(t(t2[,-1]))
colnames(t3) <- n
par(las=2,mar=c(6,4,1,1))
barplot(t3, col=c("blue","dark red"),main="Disaster Economic Impact",cex.names = 0.6,cex.axis = 0.6)
legend("topright",c("Property Damage Million $","Crop Damage Million $"),fill=c("blue","dark red"),bty = "n")
The Barplot shows top 15 disaster events that causes the most economic impact