# loading
library(data.table); library(dplyr); library(tidyr)
library(ggplot2); library(gridExtra); library(plotly)
loader()
storm <- fread("./data/1119_DS-RR-w4_Storm/storm.csv")
# duplicates1
duo<- select(storm, -c(REFNUM,REMARKS)) %>% duplicated()
storm <- storm[!duo,]
# subsetting
storm <- storm[,BGN_DATE:= as.IDate(BGN_DATE, "%m/%d/%Y %H:%M:%S")][
,.(REFNUM, YEAR= year(BGN_DATE), BGN_DATE, EVTYPE, STATE,C_ZONE= COUNTY,
DEATHS=FATALITIES, INJURIES, PROPDMG, PROPDMGEXP, CROPDMG,
CROPDMGEXP, REMARKS)]
# monetary units
storm <- storm[
,`:=` ("PROP ($, mln)"=mult(PROPDMG,PROPDMGEXP),
"CROP ($, mln)"=mult(CROPDMG,CROPDMGEXP))][
, c(1:8,14:15,13)]
# duplicates2
duo<- select(storm, -c(REFNUM, REMARKS)) %>% duplicated()
storm <- storm[!duo,]
To Chapter 4.1.1: Interactive scatter plots “DEATHS and INJURIES”
DEATHS <- storm[DEATHS >0, c(3,7)]
gDEATHS<-ggplot(DEATHS, aes(x = BGN_DATE,y=round(DEATHS,0)))+
geom_point(shape = 16, size = 2.3, colour = "firebrick3") +
xlab("")+ ylab("deaths, count")+
labs(title ="DEATHS by DATE \n(interactive)")+
theme(plot.title = element_text(size = rel(0.9)),
axis.line = element_line(size = 3, colour = "grey80"))
gDEATHS<-ggplotly(gDEATHS)
INJURIES <- storm[INJURIES >0, c(3,8)]
gINJUR<- ggplot(INJURIES, aes(x = BGN_DATE, y = round(INJURIES,0))) +
geom_point(shape = 16, size = 2.3, colour = "springgreen3") +
xlab("")+ ylab("injuries, count")+
labs(title ="INJURIES by DATE \n(interactive)")+
theme(plot.title = element_text(size = rel(0.9)),
axis.line = element_line(size = 3, colour = "grey80"))
gINJUR<-ggplotly(gINJUR)
gDEATHS
To Chapter 4.1.3: Interactive scatter plot “DAMAGE to PROPERTY, corrected”
# entries correction
storm<- storm[!(REFNUM %in% c(605951, 605943)), ]
PROP <- storm[`PROP ($, mln)` >0, c(3,9)]
CROP <- storm[`CROP ($, mln)` >0, c(3,10)]
gPROP<-ggplot(PROP, aes(x = BGN_DATE,y=`PROP ($, mln)`))+
geom_point(shape = 16, size = 2.3, colour = "mediumorchid3")+
xlab("")+ ylab("damage ($, mln)")+
labs(title ="DAMAGE to PROPERTY by DATE, corrected \n(interactive)")+
theme(plot.title = element_text(size = rel(0.9)),
axis.line = element_line(size = 3, colour = "grey80"))
ggplotly(gPROP)
Appendix
loader1
loader <- function() {
library("R.utils"); library("data.table")
myload <- function(url, year = "", zip = "gz") {
dest <- paste0("./data/1119_DS-RR-w4_Storm/storm",
year, ".csv.", zip)
storm <- paste0("./data/1119_DS-RR-w4_Storm/storm",
year, ".csv")
if(!file.exists(dest)) {download.file(url, destfile = dest,
method = "curl")}
if(!file.exists(storm)) {ifelse(zip=="gz",
gunzip(dest, remove=FALSE),
bunzip2(dest, remove=FALSE))
}
}
myload(url = "https://d396qusza40orc.cloudfront.net/repdata%2Fdata%2FStormData.csv.bz2",
zip = "bz2")
}
mult
mult <- function(d,e) {
e <- toupper(e)
d <- ifelse(e %in% c("","-"), d,
ifelse(e %in% c("?","+"), as.numeric(d)*10^1,
ifelse(e == "H", as.numeric(d)*10^2,
ifelse (e == "K", as.numeric(d)*10^3,
ifelse(e == "M", as.numeric(d)*10^6,
ifelse (e =="B", as.numeric(d)*10^9,
paste0(d,e)))))))
d<- round(as.numeric(d)/10^6,2)
d
}
sessionInf (for reproducibility)
R version 4.0.3 (2020-10-10)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Big Sur 10.16
Matrix products: default
BLAS: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRblas.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] R.utils_2.10.1 R.oo_1.24.0 R.methodsS3_1.8.1 plotly_4.9.2.1
[5] gridExtra_2.3 ggplot2_3.3.2 tidyr_1.1.2 dplyr_1.0.2
[9] data.table_1.13.4
loaded via a namespace (and not attached):
[1] pillar_1.4.7 compiler_4.0.3 tools_4.0.3 digest_0.6.27
[5] jsonlite_1.7.2 evaluate_0.14 lifecycle_0.2.0 tibble_3.0.4
[9] gtable_0.3.0 viridisLite_0.3.0 pkgconfig_2.0.3 rlang_0.4.9
[13] crosstalk_1.1.0.1 yaml_2.2.1 xfun_0.19 withr_2.3.0
[17] stringr_1.4.0 httr_1.4.2 knitr_1.30 generics_0.1.0
[21] vctrs_0.3.5 htmlwidgets_1.5.2 grid_4.0.3 tidyselect_1.1.0
[25] glue_1.4.2 R6_2.5.0 rmarkdown_2.5 purrr_0.3.4
[29] magrittr_2.0.1 scales_1.1.1 ellipsis_0.3.1 htmltools_0.5.0
[33] colorspace_2.0-0 labeling_0.4.2 stringi_1.5.3 lazyeval_0.2.2
[37] munsell_0.5.0 crayon_1.3.4