Coursera - Reproducible Research - Project 2
This analysis is for the Coursera Reproducible Research course Week 4 Project 2, part of the Data Science Specialization. The project is to explore the impact of severe weather events on human heatlth (injuries and fatalities) and finanical damage (property and crop damage)
## [1] "STATE__" "BGN_DATE" "BGN_TIME" "TIME_ZONE" "COUNTY"
## [6] "COUNTYNAME" "STATE" "EVTYPE" "BGN_RANGE" "BGN_AZI"
## [11] "BGN_LOCATI" "END_DATE" "END_TIME" "COUNTY_END" "COUNTYENDN"
## [16] "END_RANGE" "END_AZI" "END_LOCATI" "LENGTH" "WIDTH"
## [21] "F" "MAG" "FATALITIES" "INJURIES" "PROPDMG"
## [26] "PROPDMGEXP" "CROPDMG" "CROPDMGEXP" "WFO" "STATEOFFIC"
## [31] "ZONENAMES" "LATITUDE" "LONGITUDE" "LATITUDE_E" "LONGITUDE_"
## [36] "REMARKS" "REFNUM"
stormData <- NOAA[,c("EVTYPE", "FATALITIES", "INJURIES", "PROPDMG",
"PROPDMGEXP","CROPDMG","CROPDMGEXP")]## [1] "character"
# convert to factor
stormData$PROPDMGEXP <- as.factor(stormData$PROPDMGEXP)
stormData$CROPDMGEXP <- as.factor(stormData$CROPDMGEXP)
# subset stormData
ED <- stormData[,-c(2,3)]
# check PROPDMGEXP levels
levels(ED$PROPDMGEXP)## [1] "-" "?" "+" "0" "1" "2" "3" "4" "5" "6" "7" "8" "B" "h" "H" "K" "m"
## [18] "M"
##
## - ? + 0 1 2 3 4 5 6
## 1 8 5 216 25 13 4 4 28 4
## 7 8 B h H K m M
## 5 1 40 1 6 424665 7 11330
## [1] 465934
## Warning: Unknown or uninitialised column: 'Prop.DMG'.
ED$Prop.DMG[ED$PROPDMGEXP %in% c("h","H")] <- 100
ED$Prop.DMG[ED$PROPDMGEXP == "0"] <- 1
ED$Prop.DMG[ED$PROPDMGEXP == ""] <- 0
ED$Prop.DMG[ED$PROPDMGEXP == "1"] <- 10
ED$Prop.DMG[ED$PROPDMGEXP == "2"] <- 100
ED$Prop.DMG[ED$PROPDMGEXP == "3"] <- 1000
ED$Prop.DMG[ED$PROPDMGEXP == "4"] <- 1*10^4
ED$Prop.DMG[ED$PROPDMGEXP == "5"] <- 1*10^5
ED$Prop.DMG[ED$PROPDMGEXP == "6"] <- 1*10^6
ED$Prop.DMG[ED$PROPDMGEXP == "7"] <- 1* 10^7
ED$Prop.DMG[ED$PROPDMGEXP == "8"] <- 1* 10^8
ED$Prop.DMG[ED$PROPDMGEXP == "K"] <- 1000
ED$Prop.DMG[ED$PROPDMGEXP == "B"] <- 1*10^9
ED$Prop.DMG[ED$PROPDMGEXP %in% c("m","M")] <- 1*10^6
sum(is.na(ED$Prop.DMG))## [1] 465934
## [1] "?" "0" "2" "B" "k" "K" "m" "M"
##
## ? 0 2 B k K m M
## 7 19 1 9 21 281832 1 1994
## Warning: Unknown or uninitialised column: 'Crop.DMG'.
Top 6 weather events cause injuries.
| EVTYPE | Injury |
|---|---|
| TORNADO | 91346 |
| TSTM WIND | 6957 |
| FLOOD | 6789 |
| EXCESSIVE HEAT | 6525 |
| LIGHTNING | 5230 |
| HEAT | 2100 |
Top 6 weather events cause fatalities.
| EVTYPE | Fatality |
|---|---|
| TORNADO | 5633 |
| EXCESSIVE HEAT | 1903 |
| FLASH FLOOD | 978 |
| HEAT | 937 |
| LIGHTNING | 816 |
| TSTM WIND | 504 |
Property$DMG.Value <- Property$PROPDMG*Property$Prop.DMG
Crop$DMG.Value <- Crop$CROPDMG*Crop$Crop.DMG
PropertyDMD <- Property %>% group_by(EVTYPE) %>%
summarise(Damage = sum(DMG.Value)) %>%
arrange(desc(Damage))
PropertyDMDTop5 <- PropertyDMD [c(1:5),]
PropertyDMDTop5$Damage <- PropertyDMDTop5$Damage/10^9
names(PropertyDMDTop5) <- c("Events","Billions")
CropDMD <- Crop %>% group_by(EVTYPE) %>%
summarise(Damage = sum(DMG.Value)) %>%
arrange(desc(Damage))
CropDMDTop5 <- CropDMD[c(1:5),]
CropDMDTop5$Damage <- CropDMDTop5$Damage/10^9
names(CropDMDTop5) <- c("Events","Billions")
pander(PropertyDMDTop5)| Events | Billions |
|---|---|
| TORNADOES, TSTM WIND, HAIL | 1.6 |
| WILD FIRES | 0.6241 |
| HAILSTORM | 0.241 |
| HIGH WINDS/COLD | 0.1105 |
| River Flooding | 0.1062 |
| Events | Billions |
|---|---|
| EXCESSIVE WETNESS | 0.142 |
| COLD AND WET CONDITIONS | 0.066 |
| Early Frost | 0.042 |
| Damaging Freeze | 0.03413 |
| Freeze | 0.0105 |
Top 5 weather events on Economic Impact
g1 <- ggplot(PropertyDMDTop5,aes(x=Events, y = Billions)) + geom_col()+
labs(title = "Property Damage (Top 5)")
g2 <- ggplot(CropDMDTop5,aes(x=Events, y = Billions)) + geom_col()+
labs(title = " Crop Damage (Top 5)")
grid.arrange(g1,g2,nrow = 2)