library(knitr)
library(markdown)
knitr::opts_chunk$set(echo = TRUE, autodep = TRUE)
This project analyzes the storm data from the National Oceanic and Atmospheric Administration (NOAA). It aims to find out the impact of the weather event on population health and on economic development. This report shows the top 6 events causing the highest fatalities and those causing the highest injuries. Further, it reveals the top 6 events that result in the largest economic loss.
download.file("https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2", "RepProject2/StormHealth.csv.bz2")
weatherdata <- read.csv("RepProject2/StormHealth.csv.bz2", header = TRUE, sep = ',')
head(weatherdata)
## STATE__ BGN_DATE BGN_TIME TIME_ZONE COUNTY COUNTYNAME STATE
## 1 1 4/18/1950 0:00:00 0130 CST 97 MOBILE AL
## 2 1 4/18/1950 0:00:00 0145 CST 3 BALDWIN AL
## 3 1 2/20/1951 0:00:00 1600 CST 57 FAYETTE AL
## 4 1 6/8/1951 0:00:00 0900 CST 89 MADISON AL
## 5 1 11/15/1951 0:00:00 1500 CST 43 CULLMAN AL
## 6 1 11/15/1951 0:00:00 2000 CST 77 LAUDERDALE AL
## EVTYPE BGN_RANGE BGN_AZI BGN_LOCATI END_DATE END_TIME COUNTY_END
## 1 TORNADO 0 0
## 2 TORNADO 0 0
## 3 TORNADO 0 0
## 4 TORNADO 0 0
## 5 TORNADO 0 0
## 6 TORNADO 0 0
## COUNTYENDN END_RANGE END_AZI END_LOCATI LENGTH WIDTH F MAG FATALITIES
## 1 NA 0 14.0 100 3 0 0
## 2 NA 0 2.0 150 2 0 0
## 3 NA 0 0.1 123 2 0 0
## 4 NA 0 0.0 100 2 0 0
## 5 NA 0 0.0 150 2 0 0
## 6 NA 0 1.5 177 2 0 0
## INJURIES PROPDMG PROPDMGEXP CROPDMG CROPDMGEXP WFO STATEOFFIC ZONENAMES
## 1 15 25.0 K 0
## 2 0 2.5 K 0
## 3 2 25.0 K 0
## 4 2 2.5 K 0
## 5 2 2.5 K 0
## 6 6 2.5 K 0
## LATITUDE LONGITUDE LATITUDE_E LONGITUDE_ REMARKS REFNUM
## 1 3040 8812 3051 8806 1
## 2 3042 8755 0 0 2
## 3 3340 8742 0 0 3
## 4 3458 8626 0 0 4
## 5 3412 8642 0 0 5
## 6 3450 8748 0 0 6
summary(weatherdata$EVTYPE)
## HAIL TSTM WIND THUNDERSTORM WIND
## 288661 219940 82563
## TORNADO FLASH FLOOD FLOOD
## 60652 54277 25326
## THUNDERSTORM WINDS HIGH WIND LIGHTNING
## 20843 20212 15754
## HEAVY SNOW HEAVY RAIN WINTER STORM
## 15708 11723 11433
## WINTER WEATHER FUNNEL CLOUD MARINE TSTM WIND
## 7026 6839 6175
## MARINE THUNDERSTORM WIND WATERSPOUT STRONG WIND
## 5812 3796 3566
## URBAN/SML STREAM FLD WILDFIRE BLIZZARD
## 3392 2761 2719
## DROUGHT ICE STORM EXCESSIVE HEAT
## 2488 2006 1678
## HIGH WINDS WILD/FOREST FIRE FROST/FREEZE
## 1533 1457 1342
## DENSE FOG WINTER WEATHER/MIX TSTM WIND/HAIL
## 1293 1104 1028
## EXTREME COLD/WIND CHILL HEAT HIGH SURF
## 1002 767 725
## TROPICAL STORM FLASH FLOODING EXTREME COLD
## 690 682 655
## COASTAL FLOOD LAKE-EFFECT SNOW FLOOD/FLASH FLOOD
## 650 636 624
## LANDSLIDE SNOW COLD/WIND CHILL
## 600 587 539
## FOG RIP CURRENT MARINE HAIL
## 538 470 442
## DUST STORM AVALANCHE WIND
## 427 386 340
## RIP CURRENTS STORM SURGE FREEZING RAIN
## 304 261 250
## URBAN FLOOD HEAVY SURF/HIGH SURF EXTREME WINDCHILL
## 249 228 204
## STRONG WINDS DRY MICROBURST ASTRONOMICAL LOW TIDE
## 196 186 174
## HURRICANE RIVER FLOOD LIGHT SNOW
## 174 173 154
## STORM SURGE/TIDE RECORD WARMTH COASTAL FLOODING
## 148 146 143
## DUST DEVIL MARINE HIGH WIND UNSEASONABLY WARM
## 141 135 126
## FLOODING ASTRONOMICAL HIGH TIDE MODERATE SNOWFALL
## 120 103 101
## URBAN FLOODING WINTRY MIX HURRICANE/TYPHOON
## 98 90 88
## FUNNEL CLOUDS HEAVY SURF RECORD HEAT
## 87 84 81
## FREEZE HEAT WAVE COLD
## 74 74 72
## RECORD COLD ICE THUNDERSTORM WINDS HAIL
## 64 61 61
## TROPICAL DEPRESSION SLEET UNSEASONABLY DRY
## 60 59 56
## FROST GUSTY WINDS THUNDERSTORM WINDSS
## 53 53 51
## MARINE STRONG WIND OTHER SMALL HAIL
## 48 48 47
## FUNNEL FREEZING FOG THUNDERSTORM
## 46 45 45
## Temperature record TSTM WIND (G45) Coastal Flooding
## 43 39 38
## WATERSPOUTS MONTHLY PRECIPITATION WINDS
## 37 36 36
## (Other)
## 2940
Method: Both the average of fetalities and that of injuries will be used for measuring the harm caused by each weather event to the population health. The events with the top average fetalities and injuries are separately selected. Plots will be generated to show these events.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
top_fatalities <- weatherdata %>%
group_by(EVTYPE) %>%
summarise(avg_fatalities = mean(FATALITIES)) %>%
top_n(6)
## Selecting by avg_fatalities
top_injuries <- weatherdata %>%
group_by(EVTYPE) %>%
summarise(avg_injuries = mean(INJURIES)) %>%
top_n(6)
## Selecting by avg_injuries
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
p1 <- ggplot(top_fatalities, aes(EVTYPE, avg_fatalities)) + geom_bar(fill = "olivedrab", stat = "identity")+coord_flip() +ylab("Average Fatalities") + xlab("Event Types") + ggtitle("Top 6 Events with Highest Average Fatalities")
p2 <- ggplot(top_injuries, aes(EVTYPE, avg_injuries)) + geom_bar(fill = "olivedrab", stat = "identity")+coord_flip() +ylab("Average Injuries") + xlab("Event Types") + ggtitle("Top 6 Events with Highest Average Injuries")
grid.arrange(p1, p2, nrow = 2)
Method: The economic lost was measured by the sum of the property damage and the crop damage.
unique(weatherdata$PROPDMGEXP)
## [1] K M B m + 0 5 6 ? 4 2 3 h 7 H - 1 8
## Levels: - ? + 0 1 2 3 4 5 6 7 8 B h H K m M
weatherdata$EXP[weatherdata$PROPDMGEXP == "K"] <- 1000
weatherdata$EXP[weatherdata$PROPDMGEXP == "M" | weatherdata$PROPDMGEXP == "m"] <- 1e+06
weatherdata$EXP[weatherdata$PROPDMGEXP == "h" | weatherdata$PROPDMGEXP == "H"] <- 100
weatherdata$EXP[weatherdata$PROPDMGEXP == "B"] <- 1e+09
weatherdata$EXP[weatherdata$PROPDMGEXP == 8] <- 1e+08
weatherdata$EXP[weatherdata$PROPDMGEXP == 7] <- 1e+07
weatherdata$EXP[weatherdata$PROPDMGEXP == 6] <- 1e+06
weatherdata$EXP[weatherdata$PROPDMGEXP == 5] <- 1e+05
weatherdata$EXP[weatherdata$PROPDMGEXP == 4] <- 10000
weatherdata$EXP[weatherdata$PROPDMGEXP == 3] <- 1000
weatherdata$EXP[weatherdata$PROPDMGEXP == 2] <- 100
weatherdata$EXP[weatherdata$PROPDMGEXP == 1] <- 10
weatherdata$EXP[weatherdata$PROPDMGEXP == 0] <- 1
weatherdata$EXP[weatherdata$PROPDMGEXP == "-" | weatherdata$PROPDMGEXP == "?" | weatherdata$PROPDMGEXP == "+"] <- 0
weatherdata$EXP[weatherdata$PROPDMGEXP == ""] <- 0
unique(weatherdata$CROPDMGEXP)
## [1] M K m B ? 0 k 2
## Levels: ? 0 2 B k K m M
weatherdata$CROPEXP <- NULL
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "m" | weatherdata$CROPDMGEXP == "M"] <- 1e+06
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "k" | weatherdata$CROPDMGEXP == "K"] <- 1000
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "B"] <- 1e+09
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "2"] <- 100
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "0"] <- 1
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == "?"] <- 0
weatherdata$CROPEXP[weatherdata$CROPDMGEXP == ""] <- 0
weatherdata$EconomicLoss <- (weatherdata$PROPDMG*weatherdata$EXP) + (weatherdata$CROPDMG*weatherdata$CROPEXP)
Weather_EcoDMG <- weatherdata %>%
group_by(EVTYPE) %>%
summarise(total_DMG = sum(EconomicLoss))
top_EcoLoss <- top_n(Weather_EcoDMG, 6, total_DMG)
top_EcoLoss <- top_EcoLoss[order(top_EcoLoss$total_DMG, decreasing=TRUE), ]
par(mfrow = c(1, 1))
barplot(top_EcoLoss$total_DMG, las=2, names.arg =top_EcoLoss$EVTYPE, main = "Top 6 Events with Highest Economic Damage", col = "lightblue")
The first two figures displays that the events of tornadoes, TSTM wind, and hail have the highest average fatalities and the events of heat wave have the highest average injuries. The third figure illustrates that floods causes the largest economic damage of over 140 billions.