Severe weather is responsible for property and crop damage and for many injuries and fatalities every year. Since 1950, data has been collected by the National Oceanic and Atmospheric Administration which documents the severe weather, storms, and other significant weather phenomona which caused loss of life, injuries, significant property damage, and/or disruption to commerce. The ability to determine the greatest cost of damage due to a specific severe weather event type, could influence decisions made by municipalities on relocation of property or preparation for the severe weather events which could save lives. For population health, the most harmful severe weather events are tornadoes. The greatest economic consequences due to property and crop damage are from flooding, hurricanes, tornadoes and drought.
knitr::opts_chunk$set(echo=TRUE ,cache = TRUE)
library(ggplot2)
library(knitr)
library(dplyr)
library(tidyr)
library(scales)
storm_data <- read.csv("repdata-data-StormData.csv.bz2")
#Look at the structure and contents of the file
#str(storm_data)
nrow(storm_data)
## [1] 902297
#names(storm_data)
The event types contain many duplicates and similar types. These need to be simplified to the main categories of the data set.
#Change the Event Type names to be consistant
# unique(storm_data$EVTYPE)
storm_data$evtype_simplified <- tolower(storm_data$EVTYPE)
storm_data$evtype_simplified[grep("avalan",storm_data$evtype_simplified)] = "Avalanche"
storm_data$evtype_simplified[grep("snow|winter|blizzard",storm_data$evtype_simplified)] = "WinterStorm"
storm_data$evtype_simplified[grep("wild|brush",storm_data$evtype_simplified)] = "Wildfire"
storm_data$evtype_simplified[grep("torn|funnel|spout",storm_data$evtype_simplified)] = "Tornado"
storm_data$evtype_simplified[grep("hurricane|typhoon",storm_data$evtype_simplified)] = "Hurricane"
storm_data$evtype_simplified[grep("floo?d",storm_data$evtype_simplified)] = "Flood"
storm_data$evtype_simplified[grep("hail",storm_data$evtype_simplified)] = "Hail"
storm_data$evtype_simplified[grep("wi?nd",storm_data$evtype_simplified)] = "Wind"
storm_data$evtype_simplified[grep("thunderstorm",storm_data$evtype_simplified)] = "Thunderstorm"
storm_data$evtype_simplified[grep("cold",storm_data$evtype_simplified)] = "Cold"
storm_data$evtype_simplified[grep("surge|surf",storm_data$evtype_simplified)] = "StormSurge"
storm_data$evtype_simplified[grep("ic[ey]|sleet|wintry",storm_data$evtype_simplified)] = "Ice"
storm_data$evtype_simplified[grep("seas|swell",storm_data$evtype_simplified)] = "Seas"
storm_data$evtype_simplified[grep("tropical",storm_data$evtype_simplified)] = "TropicalStorm"
storm_data$evtype_simplified[grep("rain|wet|shower|precip",storm_data$evtype_simplified)] = "HeavyRain"
storm_data$evtype_simplified[grep("heat|warm|hot",storm_data$evtype_simplified)] = "Heat"
storm_data$evtype_simplified[grep("dr",storm_data$evtype_simplified)] = "Drought"
storm_data$evtype_simplified[grep("frost|freeze",storm_data$evtype_simplified)] = "Frost"
storm_data$evtype_simplified[grep("lightning",storm_data$evtype_simplified)] = "Lightning"
storm_data$evtype_simplified[grep("f?og",storm_data$evtype_simplified)] = "Fog"
storm_data$evtype_simplified[grep("tsunami",storm_data$evtype_simplified)] = "Tsunami"
storm_data$evtype_simplified[grep("slide",storm_data$evtype_simplified)] = "Landslides"
storm_data$evtype_simplified[grep("ash|volc",storm_data$evtype_simplified)] = "HeavyRain"
storm_data$evtype_simplified[grep("coastal",storm_data$evtype_simplified)] = "CoastalStorm"
#unique(storm_data$evtype_simplified)
The relevant data for analysis is the subset which includes the property and crop damage values.
#Combine the storm crop damage amount and exp into US dollars
Cropdamage_in_usd = ifelse( storm_data$CROPDMGEXP == "B", storm_data$CROPDMG * 1e9, ifelse(storm_data$CROPDMGEXP == "M", storm_data$CROPDMG * 1e6, ifelse(storm_data$CROPDMGEXP == "K", storm_data$CROPDMG * 1e3, storm_data$CROPDMG ) ) )
storm_data$Cropdamage_in_usd <- Cropdamage_in_usd
#Combine the storm property damage amount and exp into US dollars
Propdamage_in_usd = ifelse( storm_data$PROPDMGEXP == "B", storm_data$PROPDMG * 1e9, ifelse(storm_data$PROPDMGEXP == "M", storm_data$PROPDMG * 1e6, ifelse(storm_data$PROPDMGEXP == "K", storm_data$PROPDMG * 1e3, storm_data$PROPDMG ) ) )
storm_data$Propdamage_in_usd <- Propdamage_in_usd
Group the property and crop damage by the simplified event types and by sum of the damage.
storm_data_DMG <- storm_data %>%
select(evtype_simplified, Propdamage_in_usd, Cropdamage_in_usd) %>%
group_by(evtype_simplified) %>%
summarise(TotalPropdamage_in_usd = sum(Propdamage_in_usd), TotalCropdamage_in_usd = sum(Cropdamage_in_usd))
Arrange the data in decreasing order by the sum of the total economic damage and keep the top 10.
stormdata_EconDam_10 <- storm_data_DMG %>%
mutate(Totaldamage_in_usd = TotalPropdamage_in_usd + TotalCropdamage_in_usd) %>%
arrange(desc(Totaldamage_in_usd)) %>% slice(1:10)
Now the crop and property damage are grouped by event type and the total sums of the damage recorded.
kable(stormdata_EconDam_10)
| evtype_simplified | TotalPropdamage_in_usd | TotalCropdamage_in_usd | Totaldamage_in_usd |
|---|---|---|---|
| Flood | 167523190932 | 12380079110 | 179903270042 |
| Hurricane | 85336410030 | 5506117810 | 90842527840 |
| Tornado | 58591369840 | 417461520 | 59008831360 |
| StormSurge | 48080199000 | 2355000 | 48082554000 |
| Hail | 16015091077 | 3111296290 | 19126387367 |
| Wind | 15925508030 | 1964120147 | 17889628177 |
| Drought | 1052983600 | 13972581000 | 15025564600 |
| Ice | 3958908060 | 5022114300 | 8981022360 |
| Wildfire | 8491618500 | 402781630 | 8894400130 |
| WinterStorm | 8464008953 | 294187100 | 8758196053 |
#Subset the data for fatality and injury occurances higher than the zero and summarize.
storm_data_FatInj <- storm_data %>%
select(evtype_simplified, FATALITIES, INJURIES) %>%
filter(FATALITIES > 0 | INJURIES > 0) %>%
group_by(evtype_simplified) %>%
summarise(total_fatalities = sum(FATALITIES), total_injuries = sum(INJURIES))
storm_data_FatInj <- storm_data_FatInj %>%
mutate(total_fatalities_injuries = total_fatalities + total_injuries)
Arrange the data in decreasing order by the sum of the total fatalities and injuries, keeping only the top 10.
storm_data_FatInj_10 <- storm_data_FatInj %>%
arrange(desc(total_fatalities_injuries)) %>%
slice(1:10)
The simplified total of Fatalities and Injuries by event type is:
kable(storm_data_FatInj_10)
| evtype_simplified | total_fatalities | total_injuries | total_fatalities_injuries |
|---|---|---|---|
| Tornado | 5664 | 91439 | 97103 |
| Wind | 1413 | 11344 | 12757 |
| Heat | 3138 | 9226 | 12364 |
| Flood | 1525 | 8604 | 10129 |
| Lightning | 817 | 5231 | 6048 |
| WinterStorm | 548 | 3861 | 4409 |
| Ice | 105 | 2260 | 2365 |
| Wildfire | 90 | 1608 | 1698 |
| Hail | 20 | 1467 | 1487 |
| Hurricane | 135 | 1333 | 1468 |
ggplot(stormdata_EconDam_10,
aes(x=reorder(evtype_simplified, Totaldamage_in_usd), Totaldamage_in_usd))+
geom_bar(stat = "identity") +
coord_flip() +
ggtitle("Economic Damage Due to Severe Weather Events") +
xlab("") +
ylab("Total Damage in US Dollars") +
scale_y_continuous(labels=dollar) +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1))
damage_names <- list(
'TotalPropdamage_in_usd'="Property Damage",
'TotalCropdamage_in_usd'="Crop Damage"
)
damage_labeller <- function(variable,value){
return(damage_names[value])
}
stormdata_EconDam_10 %>%
gather(key=damage_type, value=measurement, TotalPropdamage_in_usd, TotalCropdamage_in_usd) %>%
ggplot(aes(x=reorder(evtype_simplified, Totaldamage_in_usd), measurement)) +
facet_grid(damage_type ~ ., labeller=damage_labeller) +
geom_bar(stat = "identity") +
coord_flip() +
ggtitle("Economic Damage by Type") +
xlab("") +
ylab("Total Damage in US Dollars") +
scale_y_continuous(labels=dollar) +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1))
## Warning: The labeller API has been updated. Labellers taking `variable`and
## `value` arguments are now deprecated. See labellers documentation.
ggplot(storm_data_FatInj_10,
aes(x=reorder(evtype_simplified, total_fatalities_injuries), total_fatalities_injuries))+
geom_bar(stat = "identity") +
coord_flip() +
ggtitle("Damage to Population Health Due to Severe Weather Events") +
xlab("") +
ylab("Total Injuries and Fatalities") +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1))
harm_names <- list(
'total_fatalities'="Fatalities",
'total_injuries'="Injuries"
)
harm_labeller <- function(variable,value){
return(harm_names[value])
}
storm_data_FatInj_10 %>%
gather(key=harm_type, value=measurement, total_fatalities, total_injuries) %>%
ggplot(aes(x=reorder(evtype_simplified, total_fatalities_injuries), measurement))+
facet_grid(harm_type ~ ., labeller=harm_labeller) +
geom_bar(stat = "identity") +
coord_flip() +
ggtitle("Fatalities and Injuries Due to Severe Weather Events") +
xlab("") +
ylab("Number of People Harmed") +
scale_y_continuous() +
theme_classic() +
theme(axis.text.x=element_text(angle=45, hjust=1))
## Warning: The labeller API has been updated. Labellers taking `variable`and
## `value` arguments are now deprecated. See labellers documentation.