Course Project 2:
Explore NOAA storm database Data = zip file Documentation of database provided by weblink NOAA Storm Events FAQ provided by weblink
Years in “storm” database = 1950-2011 15 items in critique list for reviewers of project
Answer 1 of 2 questions 1. Across US, which types of events(EVTYPE) are most harmful w/respect to population health? 1ANSWER: Torandoes are by far the most harmful to population health. Two types of illustrations will be presented with and without the Tornado event type.
suppressWarnings({
#var for type is "EVTYPE"
#vars for determining most harmful to population health = c("FATALITIES", "INJURIES")
#var naming not given for this csv file in FAQ or documentation pdf files or NOAA web
#What is WFO?!! Weather Forecast Offices
#you find out here -> "https://www.weather.gov/aviation/wfo"
#might be useful to have a bunch of graphs of EVTYPE vs. c(fatalities AND INJURIES), refer to book on how to do this
#narrow it down from there
#load packages
library(lattice) #xyplot()
library(dplyr) # %>% dataframe manipulation
library(ggplot2) #qplot()
library(viridis) #color pallette for qplot
#try1, WAY TOO MANY PLOTS!!!
#library(ggplot2)
#p <- ggplot(data = storm) +
# geom_histogram(aes(x = FATALITIES), bins = 5, colour = "black", fill = "white") +
# facet_wrap(~ EVTYPE)
#print(p)
#get storm types and sum of fatalities for each storm type, filter evtype with over 50 fatalities = 30 evtypes, too much
#change to over 200 fatalities gives 12 evtypes, while over 50 fatalities gives 30 evtypes(too many evtypes)
storm2 <- na.omit(storm %>% group_by(EVTYPE) %>% summarise(totalfatal = sum(FATALITIES))) %>% filter(totalfatal >= 200)
#USE THIS ONE
#make new df using fatalities, injuries, with evtype as the condition for multiplot panels for top 30 event types
#do a lattice plot
#new dataframe...
storm3 <- na.omit(storm %>% group_by(EVTYPE) %>% summarise(totalfatal = sum(FATALITIES), totalinj = sum(INJURIES)) %>% filter(totalfatal >= 200)) #filter evtype down to 9
storm4 <- na.omit(storm %>% group_by(EVTYPE) %>% summarise(totalfatal = sum(FATALITIES), totalinj = sum(INJURIES)) %>% filter(totalfatal >= 200 & totalfatal <= 2000)) #get rid of tornadoes from the 9 types
#initial plots
#make histogram...not really, more than 1 variable dot plot
#qplot(EVTYPE, totalfatal, data = storm2, color = "red") + theme(axis.text = element_text(angle = 90, vjust = 0.5, #hjust = 1)) + ylim(0, 6000)
#boxplots, not real great
#boxplot top 30 evtypes
#ggplot(storm2, aes(x=EVTYPE, y=totalfatal)) +
# geom_boxplot() + coord_flip()
#qplot same as above boxplot
#qplot(data = storm2, EVTYPE, totalfatal, geom = "boxplot") + coord_flip()
#mosaic plot not useful
#mosaicplot(storm3, main='events', col='steelblue')
#xyplot strips, play around with colors and fonts
#two types, from Josh O'Brien here in 2015
#https://stackoverflow.com/questions/8536239/change-background-and-text-of-strips-associated-to-multiple-panels-in-r-lattic
bgColors <- c("black", "green4", "blue", "red", "purple", "yellow")
txtColors <- c("white", "yellow", "white", "white", "green", "red")
myStripStyle <- function(which.panel, factor.levels, ...) {
panel.rect(0, 0, 1, 1,
col = bgColors[which.panel],
border = 1)
panel.text(x = 0.5, y = 0.5,
font=2,
lab = factor.levels[which.panel],
col = txtColors[which.panel])
}
myStripStyle2 <- function(which.panel, factor.levels, ...) { #works
panel.rect(0, 0, 1, 1,
col = "black",
border = 1)
panel.text(x = 0.5, y = 0.5,
font=3,
lab = factor.levels[which.panel],
col = "white")
}
#########################
#MAKE PLOTS IN VARIABLE
#########################
#revised plots with above strip stylized
p1 <- xyplot(totalinj ~ totalfatal | EVTYPE, data = storm3, pch=17, col = "red", strip = myStripStyle) #works
p2 <- xyplot(totalinj ~ totalfatal | EVTYPE, data = storm4, pch=17, col = "red", strip = myStripStyle2)
#detailed multiplots
#WITH TORNADO EVENT IN TOP 12 EVTYPES(storm3)
#xyplot(injuries ~ fatalities | evtype, data = storm3) #leave layout blank, add later 'layout = c(1,4)' to xyplot() function
#p1 <- xyplot(totalinj ~ totalfatal | EVTYPE, data = storm3, col = "red")
#explicit scale
#xyplot(totalinj ~ totalfatal | EVTYPE, data = storm3, col = "red", ylim = c(-10000,100000), xlim = c(-10000, 8000))
#WITHOUT TORNADO EVENT IN TOP 12 EVTYPES(storm4)
#NOTE: TORNADO BLOWS OUT SCALE SO THIS ALLOWS TO LOOK AT FINER DETAIL FOR 29 OTHERS
#p2 <- xyplot(totalinj ~ totalfatal | EVTYPE, data = storm4, col = "red")
#explicit scale
#xyplot(totalinj ~ totalfatal | EVTYPE, data = storm4, col = "red", ylim = c(-10000,8000), xlim = c(-10000, 8000))
#USE THIS ONE, PERHAPS NARROW DOWN TO 10 EVTYPES
#condense above multiplots to single plot
#qplot(totalinj, totalfatal, data = storm4, color = EVTYPE) #w/o Tornadoes evtype
#qplot(totalinj, totalfatal, data = storm3, color = EVTYPE) #w/ Tornadoes evtype
#control color w/viridis, w/o Tornadoes evtype
#qplot(totalinj, totalfatal, data = storm4, color = EVTYPE) + scale_color_viridis(discrete = TRUE, option = "D") #inferno col.
#Final plots?
#control color with Color Brewer 'Paired' palette w/o Tornadoes evtype, colors better to differentiate
p3 <- qplot(totalinj, totalfatal, data = storm4, color = EVTYPE, size=I(4), xlab = "TOTAL INJURED", ylab = "TOTAL FATALITIES") + scale_color_brewer(palette = "Paired") + labs(color='EVENT') + ggtitle("Without TORNADO") #paired has 12 colors
#control color with Color Brewer 'Paired' palette w/ Tornadoes evtype, colors better to differentiate
p4 <- qplot(totalinj, totalfatal, data = storm3, color = EVTYPE, size=I(4), xlab = "TOTAL INJURED", ylab = "TOTAL FATALITIES") + scale_color_brewer(palette = "Paired") + labs(color='EVENT') + ggtitle("With TORNADO") #paired has 12 colors
#first 2 plots, simplest
#library(gridExtra)
#grid.arrange(p1, p2, ncol = 2)
#all plots together or modify 'plist' to whatever you want
#plist <- list(p1, p2, p3, p4)
#plist2 <- list(p1, p2)
#n <- length(plist)
#nCol <- floor(sqrt(n))
#do.call("grid.arrange", c(plist, ncol=nCol)) #all four plots
#do.call("grid.arrange", c(plist2, ncol=nCol)) #first 2 plots
#library(egg)
#ggarrange(p3, p4, ncol = 2, nrow = 1)
#PLOT TO FILE
#p1
#p2
#best, to show p3 + p4 with same color representation(minus TSTM Wind) except addition of toranado is different
#does not work with p1 + p2 in lattice system
library(patchwork)
#PDF
#section of code below to print to pdf only works in Console, 'Latex' error problem
#pdf("NOAAplot1.pdf")
p3 + p4 + plot_layout(ncol = 2, nrow = 1, guides = "auto")
#dev.off()
#end pdf file generation
#########
})
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: viridisLite
Answer 2 of 2 questions 2. Across US, which events have the greatest economic consequences?
-Report should be written as if to be read by government manager(needs to know priority) who wants brief detail to make decisions quickly -Document layout requirements -Must be written in Rstudio and published on Rpubs website w/HTML document from knit markdown file -Submission is a weblink OR PDF(if necessary)
#var for type is "EVTYPE"
#vars for determining greatest economic consequences crop & property damages = c("PROPDMG", "CROPDMG")
#not sure if we need to re-create 'storm' data frame'
#try a new 'storm5', get event type, property damage and crop damage only into this dataframe
storm5 <- na.omit(storm[,c("EVTYPE","PROPDMG","CROPDMG")])
#summarize event by property damage
storm6 <- na.omit(storm5 %>% group_by(EVTYPE) %>% summarise(totalprop = sum(PROPDMG))) %>% filter(totalprop >= 19000) #get rid of lower damages with filter
#summarize event by crop damage
storm7 <- na.omit(storm5 %>% group_by(EVTYPE) %>% summarise(totalcrop = sum(CROPDMG))) %>% filter(totalcrop >= 2500) #get rid of lower damages with filter
#try a plot based on the previous ones, simple plot, too many event types for crop damage
p5 <- qplot(EVTYPE, totalcrop, data = storm7, color = EVTYPE) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "none")
p6 <- qplot(EVTYPE, totalprop, data = storm6, color = EVTYPE) + theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1), legend.position = "none")
library(patchwork)
#p5 + p6 + plot_layout(ncol = 1, nrow = 2, guides = "auto")
#try this alternative plot for total crop damage by event type
library(ggpubr)
library(Polychrome)
#create color palette
#set.seed(723451) # for reproducibility
#mypal <- createPalette(30, c("#010101", "#ff0000"), M=1000)
#mypal
#keep getting erorr
#Error in `ggplot2::scale_color_manual()`:
#! Continuous values supplied to discrete scale.
#ℹ Example values: 2793.8, 3361, 3490, 3580.61, and 4189.54
#Run `rlang::last_trace()` to see where the error occurred.
#make totalcrop values discrete
storm7$totalcrop <- round(storm7$totalcrop)
#still this does not help get rid of error
#still not understanding error
#getting colors using paletteer_d() function, custom size
#mypal4 <- as.list(paletteer_d("ggsci::default_igv", 23)) #as list creates problems
#mypal5 <- unname(unlist(mypal4)) #unlists to just character
#mypal5[1] gives just the 1st character
#put commas between and put into the function below
#error solution to get colors I desire on plot
#get rid of 'palette' and 'group' parameters and adding color list to 'color
#parameter gets rid of error to get the colors desired on plot
#adjust size of bubble with 'dot.size' parameter to see whole number
#add custom labels, for some reason they are flipped
#PDF
#section of code below to print to pdf only works in Console, 'Latex' error problem
#pdf("NOAAplot2.pdf")
pdc1 <- ggdotchart(storm7, x = "EVTYPE", y = "totalcrop",
color = c("#5050FFFF", "#CE3D32FF", "#749B58FF", "#F0E685FF", "#466983FF", "#BA6338FF", "#5DB1DDFF", "#802268FF", "#6BD76BFF", "#D595A7FF", "#924822FF", "#837B8DFF", "#C75127FF", "#D58F5CFF", "#7A65A5FF", "#E4AF69FF", "#3B1B53FF", "#CDDEB7FF", "#612A79FF", "#AE1F63FF", "#E7C76FFF" ,"#5A655EFF", "#CC9900FF"), # Custom color palette, # Color by groups
#palette = c("#5050FFFF", "#CE3D32FF", "#749B58FF", "#F0E685FF", "#466983FF", "#BA6338FF", "#5DB1DDFF", "#802268FF", "#6BD76BFF", "#D595A7FF", "#924822FF", "#837B8DFF", "#C75127FF", "#D58F5CFF", "#7A65A5FF", "#E4AF69FF", "#3B1B53FF", "#CDDEB7FF", "#612A79FF", "#AE1F63FF", "#E7C76FFF" ,"#5A655EFF", "#CC9900FF"), # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
rotate = TRUE, # Rotate vertically
#group = "totalcrop", # Order by groups
dot.size = 14, # Large dot size
label = round(storm7$totalcrop), # Add mpg values as dot labels
font.label = list(color = "white", size = 9,
vjust = 0.5), # Adjust label parameters
ggtheme = theme_pubr(), # ggplot2 theme
xlab = "EVENT TYPE",
ylab = "TOTAL CROP DAMAGES"
) + scale_y_continuous(labels = scales::comma)
#get palette, custom size
#mypal6 <- as.list(paletteer_d("ggsci::default_igv", 30))
#mypal7 <- unname(unlist(mypal6))
#copy and paste mypal7 and add commas
#mypal8 <- c("#5050FFFF" ,"#CE3D32FF", "#749B58FF", "#F0E685FF", "#466983FF", "#BA6338FF", "#5DB1DDFF", "#802268FF", "#6BD76BFF" ,"#D595A7FF", "#924822FF", "#837B8DFF", "#C75127FF", "#D58F5CFF", "#7A65A5FF", "#E4AF69FF" ,"#3B1B53FF" ,"#CDDEB7FF" ,"#612A79FF" ,"#AE1F63FF" ,"#E7C76FFF" ,"#5A655EFF" ,"#CC9900FF", "#99CC00FF" ,"#A9A9A9FF" ,"#CC9900FF" ,"#99CC00FF" ,"#33CC00FF" ,"#00CC33FF" ,"#00CC99FF")
pdc2 <- ggdotchart(storm6, x = "EVTYPE", y = "totalprop",
color = c("#5050FFFF" ,"#CE3D32FF", "#749B58FF", "#F0E685FF", "#466983FF", "#BA6338FF", "#5DB1DDFF", "#802268FF", "#6BD76BFF" ,"#D595A7FF", "#924822FF", "#837B8DFF", "#C75127FF", "#D58F5CFF", "#7A65A5FF", "#E4AF69FF" ,"#3B1B53FF" ,"#CDDEB7FF" ,"#612A79FF" ,"#AE1F63FF" ,"#E7C76FFF" ,"#5A655EFF" ,"#CC9900FF"),#, "#99CC00FF" ,"#A9A9A9FF" ,"#CC9900FF" ,"#99CC00FF" ,"#33CC00FF" ,"#00CC33FF" ,"#00CC99FF"), # Color by groups
#palette = mypal, # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
rotate = TRUE, # Rotate vertically
#group = "cyl", # Order by groups
dot.size = 12, # Large dot size
label = round(storm6$totalprop), # Add mpg values as dot labels
font.label = list(color = "white", size = 9,
vjust = 0.5), # Adjust label parameters
ggtheme = theme_pubr(), # ggplot2 theme
xlab = "EVENT TYPE",
ylab = "TOTAL PROPERTY DAMAGES"
) + scale_y_continuous(labels = scales::comma)
pdc1 + pdc2 + plot_layout(ncol = 1, nrow = 2, guides = "auto")
#dev.off()
#end of pdf generation