fig <- function(width, heigth){
options(repr.plot.width = width, repr.plot.height = heigth)
}
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(skimr)
library(cowplot)
##
## Attaching package: 'cowplot'
##
## The following object is masked from 'package:lubridate':
##
## stamp
options(warn=-1, message=FALSE)
volcano_events <- read_csv("/cloud/project/volcano-events-1300-2021.csv", show_col_type=FALSE)
v_events <- volcano_events %>% clean_names()
# glimpse(v_events)
# skim_without_charts(v_events)
v_events <- v_events %>% mutate(vei = if_else(is.na(vei), 2, vei),
total_deaths = if_else(is.na(total_deaths), 0, total_deaths))
# glimpse(v_events)
fig(12, 7)
n_eruptbyisland <- v_events %>%
group_by(island) %>%
summarize(cnt_mount=n())
ggplot(data=n_eruptbyisland, aes(x=reorder(island, -cnt_mount), y=cnt_mount, fill=island)) +
geom_bar(stat="identity", width=0.9, color="white") +
geom_text(aes(label=cnt_mount), position=position_stack(vjust=0.5), size=5) +
labs(title="Number of Eruptions by Island") +
xlab("Island") +
ylab("Number of Mountains") +
theme(legend.position = "none", text = element_text(size=18))

fig(12, 7)
n_eruptbymountain <- v_events %>%
group_by(name) %>%
summarise(cnt_bymount=n()) %>%
arrange(desc(cnt_bymount))
top10_eruptbymountain <- n_eruptbymountain %>% top_n(10) %>% arrange(desc(cnt_bymount))
## Selecting by cnt_bymount
ggplot(data=top10_eruptbymountain, aes(x=reorder(name, -cnt_bymount), y=cnt_bymount, fill=name)) +
geom_bar(stat="identity", width=0.9, color="white") +
scale_x_discrete(guide = guide_axis(angle=45)) +
geom_text(aes(label=cnt_bymount), position=position_stack(vjust=0.5), size=5) +
labs(title="Number of Eruption by Mountain") +
xlab("Mountains Name") +
ylab("Eruptions") +
theme(legend.position = "none", text = element_text(size=18))

vei_eruptions <- v_events %>% group_by(vei) %>% summarise(cnt_vei=n())
ggplot(data=vei_eruptions, aes(x=reorder(vei, -cnt_vei), y=cnt_vei, fill=vei)) +
geom_bar(stat="identity", width=0.8, color="white", position = position_dodge(width = 0.1)) +
geom_text(aes(label=cnt_vei), position=position_stack(vjust=0.5), size=5) +
labs(title="VEI Score") +
xlab("VEI Score") +
ylab("Eruptions") +
theme(legend.position = "none", text = element_text(size=18))

n_eruptbytype <- v_events %>%
group_by(type) %>%
summarise(cnt_bytype=n())
ggplot(data=n_eruptbytype, aes(x=reorder(type, -cnt_bytype), y=cnt_bytype, fill=type)) +
geom_bar(stat="identity", width=0.8, color="white", position = position_dodge(width = 0.1)) +
geom_text(aes(label=cnt_bytype), position=position_stack(vjust=0.5), size=5) +
scale_x_discrete(guide = guide_axis(angle=45)) +
labs(title="Mountain Type") +
xlab("Mountains Type") +
ylab("Number of mountains") +
theme(legend.position = "none", text = element_text(size=18))

fig(12,7)
e_mounts <- v_events %>% select(name, elevation_m) %>% distinct()
ggplot(data=e_mounts, aes(x=reorder(name, -elevation_m), y=elevation_m, fill=name)) +
geom_bar(stat="identity", width=0.8, color="white", position = position_dodge(width = 0.1)) +
scale_x_discrete(guide = guide_axis(angle=45)) +
# geom_text(aes(label=elevation_m), position=position_stack(vjust=0.5), size=5) +
labs(title="Mountain Elevation") +
xlab("Mountains Name") +
ylab("Elevation") +
theme(legend.position = "none", text = element_text(size=15))

n_deathspereruptions <- v_events %>% select(name, erupt_date, total_deaths) %>% arrange(desc(total_deaths)) %>% top_n(10)
## Selecting by total_deaths
ggplot(data=n_deathspereruptions, aes(x=reorder(name, -total_deaths), y=total_deaths, fill=name)) +
geom_bar(stat="identity", width=0.9, color="white") +
scale_x_discrete(guide = guide_axis(angle=45)) +
geom_text(aes(label=total_deaths), position=position_stack(vjust=0.5), size=5) +
labs(title="Total Deaths by Mountain") +
xlab("Mountains Name") +
ylab("Deaths") +
theme(legend.position = "none", text = element_text(size=15))

n_mounts <- v_events %>% select(year) %>% filter(year>1900)
ggplot(n_mounts, aes(year)) +
geom_histogram(aes(x=year, y=..density..), bins = 50, fill="#d3d3d3", color="black") +
stat_function(fun=dnorm, args = list(mean=mean(n_mounts$year), sd=sd(n_mounts$year)), color="red") +
geom_density(color="blue") +
ggtitle("Histogram of Eruptions") +
xlab("Year") +
ylab("Density") +
theme_bw()

e_mounts <- v_events %>% select(elevation_m)
ggplot(e_mounts, aes(elevation_m)) +
geom_histogram(aes(x=elevation_m, y=..density..), bins = 50, fill="#d3d3d3", color="black") +
stat_function(fun=dnorm, args = list(mean=mean(e_mounts$elevation_m), sd=sd(e_mounts$elevation_m)), color="red") +
geom_density(color="blue") +
ggtitle("Histogram of Eruptions") +
xlab("Elevation (m)") +
ylab("Density") +
theme_bw()

y <- quantile(n_mounts$year, c(0.25, 0.75)) # Find the 1st and 3rd quantiles
x <- qnorm( c(0.25, 0.75)) # Find the matching normal values on the x-axis
slope <- diff(y) / diff(x) # Compute the line slope
int <- y[1] - slope * x[1] # Compute the line intercept
ggplot(n_mounts, aes(sample = year)) +
stat_qq() +
geom_abline(intercept = int, slope = slope, color = "blue", size = 2, alpha = 0.5 )

y <- quantile(e_mounts$elevation_m, c(0.25, 0.75)) # Find the 1st and 3rd quantiles
x <- qnorm( c(0.25, 0.75)) # Find the matching normal values on the x-axis
slope <- diff(y) / diff(x) # Compute the line slope
int <- y[1] - slope * x[1] # Compute the line intercept
ggplot(e_mounts, aes(sample = elevation_m)) +
stat_qq() +
geom_abline(intercept = int, slope = slope, color = "blue", size = 2, alpha = 0.5 )

p <- ggplot(v_events, aes(island, y=elevation_m))
p + geom_boxplot(fill = "white", colour = "#3366FF")

# Load libraries to make the chart look nicer
library(colorspace) # for darken()
library(ggrepel) # for geom_text_repel()
fig(12, 7)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
e_events <- v_events
ggplot(data=e_events, aes(x=year, y=elevation_m)) +
geom_point(aes(color = island, fill = island, size = total_deaths), alpha = 0.5, shape = 21) +
geom_smooth(
aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = 'loess', formula = y~log(x), se = FALSE, fullrange = TRUE) +
scale_size_continuous(range = c(3, 10)) +
scale_color_manual(values = darken(island_cols, 0.5)) +
scale_fill_manual(values = island_cols) +
labs(title="Total Eruptions in the Past 30 years")

fig(18, 12)
e_events_1970 <- v_events %>% filter(year>1970 & elevation_m>0)
# e_events_top10 <- e_events_1970 %>% filter(vei>2)
ggplot(data=e_events_1970, aes(x=year, y=elevation_m)) +
geom_point(aes(color = island, fill = island, size = vei), alpha = 0.5, shape = 21) +
geom_smooth(aes(color = "y ~ log(x)", fill = "y ~ log(x)", alpha=.1), size=3,
method = 'loess', formula = y~log(x), se = FALSE, fullrange = TRUE) +
geom_text_repel(
aes(label = name),
color = "black",
size = 11/.pt, # font size 9 pt
point.padding = 0.1,
box.padding = .6,
min.segment.length = 0,
max.overlaps = 1000,
seed = 7654
) +
scale_size_continuous(range = c(3, 15)) +
scale_color_manual(values = darken(island_cols, 0.5)) +
scale_fill_manual(values = island_cols) +
scale_x_continuous(
name = "Eruption Events Year 1970-2021",
) +
scale_y_continuous(
name = "Elevation (m)",
limits = c(10, 4100),
breaks = c(1, 900, 1800, 2700, 3600, 3900),
expand = c(0, 0)
) +
labs(title="Total Eruptions in the Past 30 years") +
theme_minimal_hgrid(12, rel_small = 1) +
theme(
legend.position = "bottom",
legend.justification = "right",
legend.text = element_text(size = 9),
legend.box.spacing = unit(0, "pt")
)

fig(12, 7)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
ve_data <- v_events %>% filter(elevation_m>0) %>% select(island, type, name, vei, elevation_m, total_deaths)
ggplot(ve_data, aes(vei, elevation_m)) +
geom_smooth(aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = 'lm', formula = y~log(x), se = FALSE, fullrange = TRUE) +
geom_point(aes(color = island, fill = island, size = total_deaths), alpha = 0.5, shape = 21) +
labs(title="Elevation (m) vs VEI") +
xlab("VEI") +
ylab("Elevation (m)") +
scale_color_manual(values = darken(island_cols, 0.3)) +
scale_fill_manual(values = island_cols) +
scale_size_continuous(range = c(3, 21)) +
guides(size=guide_legend("Total Deaths")) +
theme_minimal_hgrid(12, rel_small = 1) +
theme(
legend.position = "right",
legend.justification = "right",
legend.text = element_text(size = 9),
legend.box.spacing = unit(3, "pt")
)

fig(12, 7)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
ve_data <- v_events %>% filter(elevation_m>0 & total_deaths<10000) %>% select(island, type, name, vei, elevation_m, total_deaths)
ggplot(ve_data, aes(total_deaths, elevation_m)) +
geom_smooth(aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
method = 'lm', formula = y~log(x), se = FALSE, fullrange = TRUE) +
geom_point(aes(color = island, fill = island, size = total_deaths), alpha = 0.5, shape = 21) +
labs(title="Elevation (m) vs Total Deaths") +
xlab("Total Deaths") +
ylab("Elevation (m)") +
scale_color_manual(values = darken(island_cols, 0.3)) +
scale_fill_manual(values = island_cols) +
scale_size_continuous(range = c(3, 21)) +
guides(size=guide_legend("Total Deaths")) +
guides(colour=guide_legend("Island")) +
theme_minimal_hgrid(12, rel_small = 1)

require("maps")
## Loading required package: maps
##
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
##
## map
fig(20, 9)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
type_cols <- c("#6F69AC", "#95DAC1", "#FFEBA1", "#FD6F96")
mydata <- e_events %>%
dplyr::group_by(island, type, name, longitude, latitude) %>%
dplyr::summarise(sum_erpt = n(), m_vei = mean(vei), m_elev = max(elevation_m), m_deaths = max(total_deaths), .groups="drop") %>%
select(island, type, name, m_vei, m_elev, longitude, latitude, m_deaths)
global <- map_data("world") # World longitude and latitude data
ggplot() +
geom_polygon(data = global, aes(x=long, y = lat, group = group),
fill = "gray85", color = "gray80") +
coord_fixed(1.3) +
xlim(94,142) + ylim(-11,7.5) +
geom_point(data = mydata, aes(x = longitude, y = latitude, color = type, fill = type, size = m_deaths),
alpha = 0.8, show.legend = F, shape=24) +
geom_text_repel(data = mydata,
aes(x = longitude, y = latitude, label = name),
color = "grey30",
size = 11/.pt, # font size 9 pt
point.padding = 0.1,
box.padding = .6,
min.segment.length = 0,
max.overlaps = 1000,
seed = 7654
) +
ggtitle ("Volcano Eruption") +
scale_color_manual(values = darken(type_cols, 0.7)) +
scale_fill_manual(values = type_cols) +
scale_size_continuous(range = c(3, 9)) +
guides(size=guide_legend("Total Deaths")) +
guides(color=guide_legend("Type")) +
theme_minimal_hgrid(12, rel_small = 1) +
theme(
legend.position = "bottom",
legend.justification = "right",
legend.text = element_text(size = 9),
legend.box.spacing = unit(3, "pt"))
