Storms and other severe weather events can cause both public health and economic problems for communities and municipalities. Many severe events can result in fatalities, injuries, and property damage, and preventing such outcomes to the extent possible is a key concern.
This project involves exploring the U.S. National Oceanic and Atmospheric Administration’s (NOAA) storm database. This database tracks characteristics of major storms and weather events in the United States, including when and where they occur, as well as estimates of any fatalities, injuries, and property damage.
Load the data and store in the cache
library(stringr)
library(stringdist)
library(R.cache)
#load conditionally
if(!exists("storm_data")){
storm_data<-loadCache(list("repdata_data_StormData.csv.bz2"))
if(is.null(storm_data)){ #read if the data is not in cache
storm_data <- read.csv("./data/repdata_data_StormData.csv.bz2")
key<-list("repdata_data_StormData.csv.bz2")
saveCache(storm_data, key=key, comment="raw csv data")
}
}
According to National Weather Service Storm Data Documentation only the following event-types are permitted
# Create the allowed_events from the NOAA recognized events and call it, allowed_events_uncleaned.
library(readr)
event_name_file<-read_file("./data/event_names.txt")
#Question: https://stackoverflow.com/questions/10738729/r-strsplit-with-multiple-unordered-split-arguments
# Based on the answer by : https://stackoverflow.com/users/583830/jthetzel
strsplits <- function(x, splits, ...)
{
for (split in splits)
{
x <- unlist(strsplit(x, split, ...))
}
x<-trimws(x) #trim leading and trailing white-spaces
return(x[!x == ""]) # Remove empty values
}
allowed_events_unclean<-strsplits(event_name_file,c("Z ","C ","M "))
The ambiguous events, constitute 1.1% They are not considered in this analysis.
amb<-which(storm_newv3$mapped_events == "")
storm_newv4 <- storm_newv3[-c(amb),]
damage_<-storm_newv4 %>% group_by(NOAA_recognised) %>%
summarize(count=n(), total_prop_dmg = sum(PROPDMG), total_crop_dmg = sum(CROPDMG), total_injuries = sum(INJURIES), total_fatalities=sum(FATALITIES)) %>% arrange(desc(count), .by_group = TRUE)
kable(damage_, "html", caption="Damage Summary by Event") %>%
kable_styling( bootstrap_options = "striped", full_width = F, position = "right") %>%
scroll_box(width = "100%", height = "200px")
| NOAA_recognised | count | total_prop_dmg | total_crop_dmg | total_injuries | total_fatalities |
|---|---|---|---|---|---|
| Thunderstorm Wind | 336723 | 2675800.61 | 199317.18 | 9504 | 728 |
| Hail | 288841 | 689832.78 | 581518.51 | 1371 | 15 |
| Flood | 84253 | 2438468.86 | 364749.53 | 8873 | 1753 |
| Tornado | 60686 | 3214533.51 | 100029.27 | 91364 | 5658 |
| High Wind | 21800 | 381949.76 | 21587.81 | 1487 | 292 |
| Winter Weather | 19928 | 155058.96 | 2498.99 | 1914 | 286 |
| Heavy Snow | 15775 | 124409.99 | 2165.72 | 1034 | 127 |
| Lightning | 15762 | 603396.78 | 3580.61 | 5231 | 817 |
| Heavy Rain | 11813 | 54517.69 | 12038.80 | 276 | 101 |
| Waterspout | 11082 | 12712.90 | 15.00 | 104 | 9 |
| Strong Wind | 3827 | 64667.63 | 1621.90 | 323 | 125 |
| Heat | 2960 | 27045.01 | 12275.97 | 10554 | 3271 |
| Wildfire | 2770 | 85059.34 | 4864.20 | 911 | 76 |
| Blizzard | 2749 | 25483.48 | 172.00 | 819 | 103 |
| Drought | 2505 | 4099.55 | 33898.62 | 4 | 0 |
| Ice Storm | 2025 | 66100.67 | 1688.95 | 1975 | 89 |
| Frost/Freeze | 1527 | 2003.52 | 8107.31 | 39 | 5 |
| Dense Fog | 1296 | 8225.45 | 0.00 | 342 | 18 |
| High Surf | 1083 | 7044.12 | 1.50 | 252 | 167 |
| Rip Current | 777 | 163.00 | 0.00 | 529 | 577 |
| Tropical Storm | 697 | 49932.68 | 6465.12 | 383 | 66 |
| Extreme Cold/Wind Chill | 665 | 7657.54 | 6141.14 | 231 | 164 |
| Lake-Effect Snow | 659 | 14208.00 | 0.00 | 0 | 0 |
| Dust Storm | 480 | 5049.50 | 1601.50 | 440 | 22 |
| Marine Hail | 442 | 4.00 | 0.00 | 0 | 0 |
| Avalanche | 387 | 1623.90 | 0.00 | 170 | 225 |
| Astronomical Low Tide | 174 | 320.00 | 0.00 | 0 | 0 |
| Dust Devil | 149 | 718.63 | 0.00 | 43 | 2 |
| Storm Surge/Tide | 148 | 6777.05 | 850.00 | 5 | 11 |
| Marine High Wind | 135 | 298.01 | 0.00 | 1 | 1 |
| Sleet | 89 | 551.30 | 0.00 | 0 | 2 |
| Tropical Depression | 60 | 738.00 | 0.00 | 0 | 0 |
| Debris Flow | 39 | 1131.10 | 0.00 | 2 | 5 |
| Volcanic Ash | 27 | 500.00 | 0.00 | 0 | 0 |
| Lakeshore Flood | 23 | 47.50 | 0.00 | 0 | 0 |
| Seiche | 21 | 980.00 | 0.00 | 0 | 0 |
| Tsunami | 20 | 905.30 | 20.00 | 129 | 33 |
| Dense Smoke | 10 | 100.00 | 0.00 | 0 | 0 |
library(ggplot2)
library(ggpubr)
g1 <-ggplot(damage_[1:10,], aes(x = NOAA_recognised, y = total_prop_dmg)) + geom_bar(stat = "identity",alpha=0.75) +scale_y_continuous(name ="Prop. Damage", ) + scale_alpha(guide = 'none')+
theme( axis.line = element_line(colour = "darkblue",
size = 1, linetype = "solid"), axis.text.y = element_text(face="bold", color="#993333", size=14, angle=45),
axis.title.y = element_text(size=rel(0.8)),
axis.text.x = element_text(angle=45,color="#993333"), axis.title.x=element_blank(), plot.margin = margin(t = 0, r = 0, b = 0, l = 0, unit = "pt"))
g2 <- ggplot(damage_[1:10,], aes(x = NOAA_recognised, y =total_crop_dmg)) + geom_bar(stat = "identity",alpha=0.75) +scale_y_continuous(name ="Crop Damage" ) + scale_alpha(guide = 'none') +
labs(ylab="")+theme( axis.line = element_line(colour = "darkblue",
size = 1, linetype = "solid"), axis.text.y = element_text(face="bold", color="#993333", size=14, angle=45), axis.text.x = element_text(angle=45,color="#993333"),axis.title.x=element_blank(),plot.margin = margin(t = -30, r = 0, b = 0, l = 0, unit = "pt"))
figure<-ggarrange(g1,g2, heights = c(2, 2),
ncol = 1, nrow = 2, align = "v")
annotate_figure(figure,
left = text_grob("Damage Top 10 most Frequent Events", color = "darkgreen", rot = 90),
fig.lab = "Figure 1", fig.lab.pos = "bottom", fig.lab.face = "bold"
)
library(ggplot2)
library("tidyverse")
df <- damage_[1:10,] %>%
dplyr::select(total_injuries, total_fatalities,NOAA_recognised) %>%
gather(key = "variable", value = "value", -NOAA_recognised)
g3 <- ggplot(df, aes(x = NOAA_recognised, y = value, group=variable)) +
geom_line(aes(color = variable), size = 1) + ylab("Fatalities and Injuries by top 10 most frequent Events") +
scale_colour_discrete(name = "Health Impact",
guide = guide_legend(override.aes = list(size = 1)))+theme_pubr(x.text.angle = 90 )
cap<-"Health Impact"
g3
Health Impact
library(knitr)
library(kableExtra)
library(formattable)
inj<- damage_ %>% select(total_injuries,count,NOAA_recognised) %>%
arrange(desc(total_injuries), .by_group = TRUE)
fat<-damage_ %>% select( total_fatalities,NOAA_recognised) %>%
arrange(desc(total_fatalities), .by_group = TRUE)
inj_fat <- merge(inj,fat, by ="NOAA_recognised" )
max_inj<-max(inj_fat$total_injuries)
max_fatality<-max(inj_fat$total_fatalities)
quarter_<-quantile(inj_fat$count)[[2]]
fift_ <-quantile(inj_fat$count)[[3]]
sevent_<-quantile(inj_fat$count)[[4]]
colfunc <- colorRampPalette(c("red", "orange"))
my_colors<-colfunc(3)
#
inj_fat %>%
mutate(
'NOAA Event' = NOAA_recognised,
'Event Frequency' = cell_spec(count, "html", align = "c",
color = ifelse(count >= sevent_, my_colors[1],
ifelse(count >= fift_, my_colors[2],
ifelse(count >= quarter_, my_colors[3], "#666666")))),
Injuries = cell_spec(total_injuries, "html", color = ifelse(total_injuries == max_inj, "darkred", "blue")),
Fatalaties = cell_spec(total_fatalities, "html", color = ifelse(total_fatalities == max_fatality, "darkred", "blue"))
) %>%
select('NOAA Event','Event Frequency',Injuries,Fatalaties) %>%
kable(escape = FALSE, format = "html", caption="Injuries and Fatalities")%>%
kable_styling(fixed_thead = T, bootstrap_options = "striped", full_width = F, position = "center") %>%
scroll_box(width = "100%", height = "800px")
| NOAA Event | Event Frequency | Injuries | Fatalaties |
|---|---|---|---|
| Astronomical Low Tide | 174 | 0 | 0 |
| Avalanche | 387 | 170 | 225 |
| Blizzard | 2749 | 819 | 103 |
| Debris Flow | 39 | 2 | 5 |
| Dense Fog | 1296 | 342 | 18 |
| Dense Smoke | 10 | 0 | 0 |
| Drought | 2505 | 4 | 0 |
| Dust Devil | 149 | 43 | 2 |
| Dust Storm | 480 | 440 | 22 |
| Extreme Cold/Wind Chill | 665 | 231 | 164 |
| Flood | 84253 | 8873 | 1753 |
| Frost/Freeze | 1527 | 39 | 5 |
| Hail | 288841 | 1371 | 15 |
| Heat | 2960 | 10554 | 3271 |
| Heavy Rain | 11813 | 276 | 101 |
| Heavy Snow | 15775 | 1034 | 127 |
| High Surf | 1083 | 252 | 167 |
| High Wind | 21800 | 1487 | 292 |
| Ice Storm | 2025 | 1975 | 89 |
| Lake-Effect Snow | 659 | 0 | 0 |
| Lakeshore Flood | 23 | 0 | 0 |
| Lightning | 15762 | 5231 | 817 |
| Marine Hail | 442 | 0 | 0 |
| Marine High Wind | 135 | 1 | 1 |
| Rip Current | 777 | 529 | 577 |
| Seiche | 21 | 0 | 0 |
| Sleet | 89 | 0 | 2 |
| Storm Surge/Tide | 148 | 5 | 11 |
| Strong Wind | 3827 | 323 | 125 |
| Thunderstorm Wind | 336723 | 9504 | 728 |
| Tornado | 60686 | 91364 | 5658 |
| Tropical Depression | 60 | 0 | 0 |
| Tropical Storm | 697 | 383 | 66 |
| Tsunami | 20 | 129 | 33 |
| Volcanic Ash | 27 | 0 | 0 |
| Waterspout | 11082 | 104 | 9 |
| Wildfire | 2770 | 911 | 76 |
| Winter Weather | 19928 | 1914 | 286 |
damagev2_<-damage_ %>% group_by(NOAA_recognised) %>% mutate( prop_dmg_norm = total_prop_dmg/count, crop_dmg_norm = total_crop_dmg/count) %>% arrange(desc(count), .by_group = TRUE)
damagev2_prop_norm<- damagev2_ %>% ungroup() %>% arrange(desc(prop_dmg_norm),.by_group = TRUE)
damagev2_crop_norm<- damagev2_ %>% ungroup() %>% arrange(desc(crop_dmg_norm),.by_group = TRUE)
t3 <- kable(damagev2_[,c("NOAA_recognised","count","prop_dmg_norm")], "html",align = 'clc', caption = 'Property Damage Normalized by Count') %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "center") %>%
column_spec(1:3, bold = T) %>%
scroll_box(width = "100%", height = "200px")
t4 <- kable(damagev2_[,c("NOAA_recognised","count","crop_dmg_norm")], "html",align = 'clc', caption ='Crop Damage Normalized by Count') %>%
kable_styling(bootstrap_options = "striped", full_width = F, position = "center") %>%
column_spec(1:2, bold = T) %>%
scroll_box(width = "100%", height = "200px")
cat(c('<table><tr valign="top"><td>', t3, '</td>', '<td>', t4, '</td></tr></table>'),
sep = '')
|
|
library(ggplot2)
library(ggpubr)
g4 <-ggplot(damagev2_prop_norm[1:10,], aes(x = NOAA_recognised, y = prop_dmg_norm, group=1)) + geom_point() + geom_line()+scale_y_continuous(name ="Prop. Damage Normalized" ) + theme( axis.line = element_line(colour = "darkblue",
size = 1, linetype = "solid"), axis.text.y = element_text(face="bold", color="darkred", size=rel(0.5), angle=30), axis.title.y = element_text( size=rel(0.8)), axis.text.x = element_text(angle = 90), axis.title.x=element_blank())
g5 <- ggplot(damagev2_crop_norm[1:10,], aes(x = NOAA_recognised, y =total_crop_dmg, group=1)) + geom_point() + geom_line()+scale_y_continuous(name ="Crop Dmg. Normalized" ) + theme( axis.line = element_line(colour = "darkblue",
size = 1, linetype = "solid"), axis.text.y = element_text(face="bold", color="darkred", size=rel(0.5), angle=30), axis.title.x=element_blank(), axis.text.x = element_text(angle = 90))
figure<-ggarrange(g4,g5, heights = c(2, 2),
ncol = 1, nrow = 2, align = "v")
annotate_figure(figure,
left = text_grob("Damage in $1000", color = "darkgreen", rot = 90),
fig.lab = "Figure 3", fig.lab.pos = "bottom", fig.lab.face = "bold"
)
crops are, Hail and Flood at $13.53K and $9.28K respectively.property are, Storm and Tornado at $71.64K and $52.97K respectively