fig <- function(width, heigth){
     options(repr.plot.width = width, repr.plot.height = heigth)
}
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(lubridate)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(skimr)
library(cowplot)
## 
## Attaching package: 'cowplot'
## 
## The following object is masked from 'package:lubridate':
## 
##     stamp
options(warn=-1, message=FALSE)
volcano_events <- read_csv("/cloud/project/volcano-events-1300-2021.csv", show_col_type=FALSE)
v_events <- volcano_events %>% clean_names() 
# glimpse(v_events)
# skim_without_charts(v_events)
v_events <- v_events %>% mutate(vei = if_else(is.na(vei), 2, vei),
                               total_deaths = if_else(is.na(total_deaths), 0, total_deaths))
# glimpse(v_events)
fig(12, 7)
n_eruptbyisland <- v_events %>% 
  group_by(island) %>% 
  summarize(cnt_mount=n())

ggplot(data=n_eruptbyisland, aes(x=reorder(island, -cnt_mount), y=cnt_mount, fill=island)) +
  geom_bar(stat="identity", width=0.9, color="white") +
  geom_text(aes(label=cnt_mount), position=position_stack(vjust=0.5), size=5) +
  labs(title="Number of Eruptions by Island") +
  xlab("Island") +
  ylab("Number of Mountains") +
  theme(legend.position = "none", text = element_text(size=18))

fig(12, 7)
n_eruptbymountain <- v_events %>% 
  group_by(name) %>% 
  summarise(cnt_bymount=n()) %>% 
  arrange(desc(cnt_bymount))

top10_eruptbymountain <- n_eruptbymountain %>% top_n(10) %>% arrange(desc(cnt_bymount))
## Selecting by cnt_bymount
ggplot(data=top10_eruptbymountain, aes(x=reorder(name, -cnt_bymount), y=cnt_bymount, fill=name)) +
  geom_bar(stat="identity", width=0.9, color="white") +
  scale_x_discrete(guide = guide_axis(angle=45)) +
  geom_text(aes(label=cnt_bymount), position=position_stack(vjust=0.5), size=5) +
  labs(title="Number of Eruption by Mountain") +
  xlab("Mountains Name") +
  ylab("Eruptions") +
  theme(legend.position = "none", text = element_text(size=18))

vei_eruptions <- v_events %>% group_by(vei) %>% summarise(cnt_vei=n())
ggplot(data=vei_eruptions, aes(x=reorder(vei, -cnt_vei), y=cnt_vei, fill=vei)) +
  geom_bar(stat="identity", width=0.8, color="white", position = position_dodge(width = 0.1)) +
  geom_text(aes(label=cnt_vei), position=position_stack(vjust=0.5), size=5) +
  labs(title="VEI Score") +
  xlab("VEI Score") +
  ylab("Eruptions") +
  theme(legend.position = "none", text = element_text(size=18))

n_eruptbytype <- v_events %>% 
  group_by(type) %>% 
  summarise(cnt_bytype=n())
ggplot(data=n_eruptbytype, aes(x=reorder(type, -cnt_bytype), y=cnt_bytype, fill=type)) +
      geom_bar(stat="identity", width=0.8, color="white", position = position_dodge(width = 0.1)) +
      geom_text(aes(label=cnt_bytype), position=position_stack(vjust=0.5), size=5) +
      scale_x_discrete(guide = guide_axis(angle=45)) +
      labs(title="Mountain Type") +
      xlab("Mountains Type") +
      ylab("Number of mountains") +
      theme(legend.position = "none", text = element_text(size=18))

fig(12,7)
e_mounts <- v_events %>% select(name, elevation_m) %>% distinct()
ggplot(data=e_mounts, aes(x=reorder(name, -elevation_m), y=elevation_m, fill=name)) +
  geom_bar(stat="identity", width=0.8, color="white", position = position_dodge(width = 0.1)) +
  scale_x_discrete(guide = guide_axis(angle=45)) +
  # geom_text(aes(label=elevation_m), position=position_stack(vjust=0.5), size=5) +
  labs(title="Mountain Elevation") +
  xlab("Mountains Name") +
  ylab("Elevation") +
  theme(legend.position = "none", text = element_text(size=15))

n_deathspereruptions <- v_events %>% select(name, erupt_date, total_deaths) %>% arrange(desc(total_deaths)) %>% top_n(10)
## Selecting by total_deaths
ggplot(data=n_deathspereruptions, aes(x=reorder(name, -total_deaths), y=total_deaths, fill=name)) +
  geom_bar(stat="identity", width=0.9, color="white") +
  scale_x_discrete(guide = guide_axis(angle=45)) +
  geom_text(aes(label=total_deaths), position=position_stack(vjust=0.5), size=5) +
  labs(title="Total Deaths by Mountain") +
  xlab("Mountains Name") +
  ylab("Deaths") +
  theme(legend.position = "none", text = element_text(size=15))

n_mounts <- v_events %>% select(year) %>% filter(year>1900) 

ggplot(n_mounts, aes(year)) +
  geom_histogram(aes(x=year, y=..density..), bins = 50, fill="#d3d3d3", color="black") +
  stat_function(fun=dnorm, args = list(mean=mean(n_mounts$year), sd=sd(n_mounts$year)), color="red") +
  geom_density(color="blue") +
  ggtitle("Histogram of Eruptions") +
  xlab("Year") +
  ylab("Density") +
  theme_bw()

e_mounts <- v_events %>% select(elevation_m)
ggplot(e_mounts, aes(elevation_m)) +
  geom_histogram(aes(x=elevation_m, y=..density..), bins = 50, fill="#d3d3d3", color="black") +
  stat_function(fun=dnorm, args = list(mean=mean(e_mounts$elevation_m), sd=sd(e_mounts$elevation_m)), color="red") +
  geom_density(color="blue") +
  ggtitle("Histogram of Eruptions") +
  xlab("Elevation (m)") +
  ylab("Density") +
  theme_bw()

y     <- quantile(n_mounts$year, c(0.25, 0.75)) # Find the 1st and 3rd quantiles
x     <- qnorm( c(0.25, 0.75))           # Find the matching normal values on the x-axis
slope <- diff(y) / diff(x)               # Compute the line slope
int   <- y[1] - slope * x[1]             # Compute the line intercept

ggplot(n_mounts, aes(sample = year)) +
  stat_qq() + 
  geom_abline(intercept = int, slope = slope, color = "blue", size = 2, alpha = 0.5 )

y     <- quantile(e_mounts$elevation_m, c(0.25, 0.75)) # Find the 1st and 3rd quantiles
x     <- qnorm( c(0.25, 0.75))           # Find the matching normal values on the x-axis
slope <- diff(y) / diff(x)               # Compute the line slope
int   <- y[1] - slope * x[1]             # Compute the line intercept

ggplot(e_mounts, aes(sample = elevation_m)) +
  stat_qq() + 
  geom_abline(intercept = int, slope = slope, color = "blue", size = 2, alpha = 0.5 )

p <- ggplot(v_events, aes(island, y=elevation_m))
p + geom_boxplot(fill = "white", colour = "#3366FF")

# Load libraries to make the chart look nicer
library(colorspace) # for darken()
library(ggrepel)    # for geom_text_repel()
fig(12, 7)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
e_events <- v_events 
ggplot(data=e_events, aes(x=year, y=elevation_m)) +
  geom_point(aes(color = island, fill = island, size = total_deaths), alpha = 0.5, shape = 21) +
  geom_smooth(
        aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
        method = 'loess', formula = y~log(x), se = FALSE, fullrange = TRUE) +
  scale_size_continuous(range = c(3, 10)) +
  scale_color_manual(values = darken(island_cols, 0.5)) +
  scale_fill_manual(values = island_cols) +
  labs(title="Total Eruptions in the Past 30 years")

fig(18, 12)
e_events_1970 <- v_events %>% filter(year>1970 & elevation_m>0)
# e_events_top10 <- e_events_1970 %>% filter(vei>2)
ggplot(data=e_events_1970, aes(x=year, y=elevation_m)) +
  geom_point(aes(color = island, fill = island, size = vei), alpha = 0.5, shape = 21) +
  geom_smooth(aes(color = "y ~ log(x)", fill = "y ~ log(x)", alpha=.1), size=3,
    method = 'loess', formula = y~log(x), se = FALSE, fullrange = TRUE) +
  geom_text_repel(
    aes(label = name),
    color = "black",
    size = 11/.pt, # font size 9 pt
    point.padding = 0.1, 
    box.padding = .6,
    min.segment.length = 0,
    max.overlaps = 1000,
    seed = 7654
  ) +
  scale_size_continuous(range = c(3, 15)) +
  scale_color_manual(values = darken(island_cols, 0.5)) +
  scale_fill_manual(values = island_cols) +
  scale_x_continuous(
    name = "Eruption Events Year 1970-2021",
  ) +
  scale_y_continuous(
    name = "Elevation (m)",
    limits = c(10, 4100),
    breaks = c(1, 900, 1800, 2700, 3600, 3900),
    expand = c(0, 0)
  ) +
  labs(title="Total Eruptions in the Past 30 years") +
  theme_minimal_hgrid(12, rel_small = 1) +
  theme(
    legend.position = "bottom",
    legend.justification = "right",
    legend.text = element_text(size = 9),
    legend.box.spacing = unit(0, "pt")
  )

fig(12, 7)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
ve_data <- v_events %>% filter(elevation_m>0) %>% select(island, type, name, vei, elevation_m, total_deaths)
ggplot(ve_data, aes(vei, elevation_m)) +
  geom_smooth(aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
        method = 'lm', formula = y~log(x), se = FALSE, fullrange = TRUE) +
  geom_point(aes(color = island, fill = island, size = total_deaths), alpha = 0.5, shape = 21) +
  labs(title="Elevation (m) vs VEI") +
  xlab("VEI") +
  ylab("Elevation (m)") +
  scale_color_manual(values = darken(island_cols, 0.3)) +
  scale_fill_manual(values = island_cols) +
  scale_size_continuous(range = c(3, 21)) +
  guides(size=guide_legend("Total Deaths")) +
  theme_minimal_hgrid(12, rel_small = 1) + 
  theme(
    legend.position = "right",
    legend.justification = "right",
    legend.text = element_text(size = 9),
    legend.box.spacing = unit(3, "pt")
  )

fig(12, 7)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
ve_data <- v_events %>% filter(elevation_m>0 & total_deaths<10000) %>% select(island, type, name, vei, elevation_m, total_deaths)
ggplot(ve_data, aes(total_deaths, elevation_m)) +
  geom_smooth(aes(color = "y ~ log(x)", fill = "y ~ log(x)"),
        method = 'lm', formula = y~log(x), se = FALSE, fullrange = TRUE) +
  geom_point(aes(color = island, fill = island, size = total_deaths), alpha = 0.5, shape = 21) +
  labs(title="Elevation (m) vs Total Deaths") +
  xlab("Total Deaths") +
  ylab("Elevation (m)") +
  scale_color_manual(values = darken(island_cols, 0.3)) +
  scale_fill_manual(values = island_cols) +
  scale_size_continuous(range = c(3, 21)) +
  guides(size=guide_legend("Total Deaths")) +
  guides(colour=guide_legend("Island")) +
  theme_minimal_hgrid(12, rel_small = 1)

require("maps")
## Loading required package: maps
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
fig(20, 9)
island_cols <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#999999", "#889EAF", "#D4B499")
type_cols <- c("#6F69AC", "#95DAC1", "#FFEBA1", "#FD6F96")
mydata <- e_events %>% 
    dplyr::group_by(island, type, name, longitude, latitude) %>%
    dplyr::summarise(sum_erpt = n(), m_vei = mean(vei), m_elev = max(elevation_m), m_deaths = max(total_deaths), .groups="drop") %>%
    select(island, type, name, m_vei, m_elev, longitude, latitude, m_deaths)
global <- map_data("world") # World longitude and latitude data 
ggplot() + 
    geom_polygon(data = global, aes(x=long, y = lat, group = group), 
        fill = "gray85", color = "gray80") + 
    coord_fixed(1.3) +
    xlim(94,142) + ylim(-11,7.5) +
    geom_point(data = mydata, aes(x = longitude, y = latitude, color = type, fill = type, size = m_deaths), 
        alpha = 0.8, show.legend = F, shape=24) +
    geom_text_repel(data = mydata,
        aes(x = longitude, y = latitude, label = name),
        color = "grey30",
        size = 11/.pt, # font size 9 pt
        point.padding = 0.1, 
        box.padding = .6,
        min.segment.length = 0,
        max.overlaps = 1000,
        seed = 7654
      ) +
    ggtitle ("Volcano Eruption") + 
    scale_color_manual(values = darken(type_cols, 0.7)) +
    scale_fill_manual(values = type_cols) +
    scale_size_continuous(range = c(3, 9)) +
    guides(size=guide_legend("Total Deaths")) +
    guides(color=guide_legend("Type")) +
    theme_minimal_hgrid(12, rel_small = 1) + 
    theme(
      legend.position = "bottom",
      legend.justification = "right",
      legend.text = element_text(size = 9),
      legend.box.spacing = unit(3, "pt"))