Set up environment

library(tidyverse)
library(gridExtra)
library(pander)
library(ggpubr)

theme_set(theme_bw(base_size = 13, base_family = "Times") +
            theme(plot.title = element_text(size = 14, hjust = 0.0),
                  panel.grid.minor = element_blank(),
                  panel.grid.major = element_line(colour = "gray", linetype="dashed",size = 0.1),
                  legend.position = "none", 
                  legend.title = element_blank(),
                  legend.text.align = 0))

## FUNCTION: Get legend
get_legend = function(myggplot){
  tmp = ggplot_gtable(ggplot_build(myggplot))
  leg = which(sapply(tmp$grobs, function (x) x$name) == "guide-box")
  legend = tmp$grobs[[leg]]
  return(legend)
}

1 Data Processing


Use CDO to extract data from “netCDF” to “text” format

Assume that the data including daily observation and model simulation of temperature over period from 2000-2002. These datasets are normally in netCDF format, and can be extracted by using CDO as below:

  • cdo -L -outputtab,date,value -fldmean -selvar,tas OBS.nc 2> /dev/null > data-txt/OBS-TAS.txt
  • cdo -L -outputtab,date,value -fldmean -selvar,tas Model.nc 2> /dev/null > data-txt/Model-TAS.txt


Read data & re-shape data to work with “tidyverse” environment

df.type = c("OBS", "Model")

# Read data
df.tas = NULL
for (itype in df.type) {
  df.tas.tmp = read.table(paste0("data-txt/", itype, "-TAS.txt"), header = FALSE)
  
  colnames(df.tas.tmp) = c("date", "var")
  df.tas.tmp$type = itype
  
  df.tas = rbind(df.tas, df.tas.tmp)
}

# Transfrom date to year-mon-day
f.convdate = function(date) {
  return(data_frame(year = as.integer(substr(date,1,4)),
                    mon  = as.integer(substr(date,6,7)),
                    day  = as.integer(substr(date,9,10))))
}

df.tas = cbind(type = df.tas$type, f.convdate(df.tas$date), var = df.tas$var)


Quick look at annual cycle (More detailed later)

df.tas %>% group_by(type, mon) %>% summarise(var = mean(var, na.rm = TRUE)) %>% 
  spread(mon, var) %>% pander(split.table = 130)
type 1 2 3 4 5 6 7 8 9 10 11 12
Model 13.5 15.36 18.76 22.24 24.9 25.91 26.61 26.13 24.78 22.59 17.94 14.38
OBS 14.42 15.01 19.07 22.38 25.89 27.47 27.95 27.98 26.43 23.99 18.63 15.2

2 Daily Analysis


Create basic plot contents (template)

p0 = df.tas %>% spread(type, var) %>%
  ggplot(aes(x = OBS, y = Model)) +
  scale_x_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
  scale_y_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
  coord_fixed(ratio = 1) +
  labs(x = "Observation (C-degree)", y = "Model (C-degree)")


Scatter-plot BASIC

Using “alpha”, the density of points can estimately seen

p1 = p0 +
  geom_point(alpha = 0.08, color = "red") +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "Scatter-plot BASIC") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")


Heatmap of point Density

p2 = p0 +
  stat_density_2d(geom = "raster", aes(fill = ..density..), contour = FALSE) +
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 0.015), oob = scales::squish) +
  labs(title = "Heatmap of point Density") +
  theme(legend.position = "right")


Heatmap & Hex-Heatmap of point Count over ranges

The ranges are defined by “bins”

p3 = p0 +
  geom_bin2d(bins=36, aes(fill=..count..))+
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 20), oob = scales::squish) +
  labs(title = "Heatmap of point Count over ranges") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")

p4 = p0 +
  geom_hex(bins=36, aes(fill=..count..))+
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 20), oob = scales::squish) +
  labs(title = "Hex-Heatmap of point Count over ranges") +
  theme(legend.position = "right")


2D Kernel density plot of point Density

p5 = p0 +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha= 1, col = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "2D Kernel density plot of point Density") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")

p6 = p0 +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 1, show.legend = F) +
  stat_density_2d(aes(size = ..density.., fill = ..density..), geom = "point", 
                  shape = 21, n = 12, contour = FALSE, col = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "Modified 2D Kernel density plot of point Density") +
  theme(legend.position = "right")


Put all plots together using grid.arrange (gridExtra)

grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2, heights = c(3, 3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4), c(5, 6)))

3 Monthly Analysis


Calculate some climatology indices for each month over period

df.stat = df.tas %>% group_by(type, mon) %>% 
  summarise(Mean   = mean(var, na.rm = TRUE),
            Median = quantile(var, 0.50, na.rm = TRUE),
            Q25    = quantile(var, 0.25, na.rm = TRUE),
            Q75    = quantile(var, 0.75, na.rm = TRUE))


Print out as a table

df.stat %>% ungroup() %>% 
  gather(key = indices, value = value, -c(type, mon)) %>% spread(key = mon, value = value) %>%
  arrange(indices) %>% pander(emphasize.strong.cols = c(1, 2), split.table = 130)
type indices 1 2 3 4 5 6 7 8 9 10 11 12
Model Mean 13.5 15.36 18.76 22.24 24.9 25.91 26.61 26.13 24.78 22.59 17.94 14.38
OBS Mean 14.42 15.01 19.07 22.38 25.89 27.47 27.95 27.98 26.43 23.99 18.63 15.2
Model Median 13.38 15.45 19.16 22.55 24.82 26.15 26.44 26.1 24.98 23 18.38 14.95
OBS Median 14.73 14.92 19.47 22.35 25.96 27.87 27.87 28.04 26.82 24.73 18.46 14.88
Model Q25 10.92 14.21 17.38 21.07 24.33 25.9 26.08 25.87 23.98 21.67 16.39 11.63
OBS Q25 11.52 12.24 17.34 20.5 24.88 26.19 26.81 27.13 25.51 22.14 16.37 11.7
Model Q75 16.41 16.91 20.43 23.6 25.85 26.53 27.12 26.33 25.72 24.04 20.32 17.82
OBS Q75 17.39 17.93 21.13 24.93 27.14 28.62 29 29 27.46 25.97 20.59 18.49


Plot the table as heatmap

df.stat.plot = df.stat %>% ungroup() %>% 
  gather(key = indices, value = value, -c(type, mon)) %>% 
  mutate(idx = paste0(indices, "-", type))
df.stat.plot$value = round(df.stat.plot$value, digits = 1)

c.month = c("J", "F", "M", "A","M", "J", "J", "A", "S", "O", "N", "D")

p = df.stat.plot %>% ggplot() +
  geom_tile(aes(x = factor(mon), y = idx, fill = value), color = "black") +
  geom_text(aes(x = factor(mon), y = idx, label = value), color = "black", size = 3) +
  geom_hline(yintercept = c(2.5, 4.5, 6.5), size = 1, color = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(10, 30), oob = scales::squish) +
  scale_x_discrete(labels = c.month) +
  labs(title = "Heatmap for indices comparison",
       x = "Month", y = "") +
  theme(legend.position = "bottom")
p


Create basic plot contents (template)

# Create function for cleaner code later
f.ggplot = function(p, discrete = FALSE) {
  p = p +
    scale_color_manual(values = c("red", "blue")) +
    scale_fill_manual(values = c("red", "blue")) +
    labs(x = "Months", y = "Temperature (C-degree)") +
    theme(legend.position = "bottom")
  if (discrete == TRUE) {
    p = p + scale_x_discrete(breaks = seq(12), labels = c.month)
  } else {
    p = p + scale_x_continuous(breaks = seq(12), labels = c.month)
  }
  return(p)
}


Plotting annual cycle

p1 = f.ggplot(df.stat %>% ggplot() +
  geom_col(aes(x = mon, y = Mean, fill = factor(type, levels = c("OBS", "Model"))),
           alpha = 0.6, position = position_dodge(0.8)) +
  geom_errorbar(aes(x = mon , ymin = Q25, ymax = Q75, 
                    group = factor(type, levels = c("OBS", "Model"))),
                width = 0.5, position = position_dodge(0.8)) +
  labs(title = "Climatology of Temperature over period 2000-2002",
       subtitle = "Errorbars present range from Q25 to Q75"))

p2 = f.ggplot(df.stat %>% ggplot() +
  geom_line(aes(x = mon, y = Mean, color = factor(type, levels = c("OBS", "Model")))) +
  geom_ribbon(aes(x = mon, ymin = Q25, ymax = Q75, 
                  fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.2) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The shaded area presents range from Q25 & Q75"))
  
p3 = f.ggplot(df.tas %>% ggplot() +
  stat_summary(aes(x = mon, y = var, color = factor(type, levels = c("OBS", "Model"))),
               geom = "pointrange", position = position_dodge(width = 0.5),
               fun.y = mean, fun.ymax = max, fun.ymin = min, size = 0.5) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The points present for Mean, the lines present range from MIN to MAX"))

p4 = f.ggplot(df.tas %>% ggplot() +
  geom_boxplot(aes(x = factor(mon), y = var, fill = factor(type, levels = c("OBS", "Model"))),
               alpha = 0.6, outlier.color = "purple", outlier.shape = 4, outlier.size = 1) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The boxes present Median, Min, Max, Q25, Q75, and outliers"),
  discrete = TRUE)
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4)))

4 PDF


Starting with histogram

m.breaks = seq(4, 32, 0.5)

p1 = df.tas %>% ggplot() +
  geom_histogram(aes(x = var, y = ..count.., fill = factor(type, levels = c("OBS", "Model"))), 
                 breaks = m.breaks, position = "identity", alpha = 0.3) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_y_continuous(breaks = seq(0, 250, 50)) +
  labs(title = "Histogram", x = "Temperature", y = "Frequency (count)") +
  theme(legend.position = c(0.2, 0.85))


Manually get histogram value and plot as step line

f.hist = function(df, breaks, density = FALSE) {
  h = hist(df, breaks = breaks, plot = FALSE)
  if (density == FALSE) {
    df.h = data_frame(x = h$breaks, y = c(h$count, NA))
  } else {
    df.h = data_frame(x = h$breaks, y = c(h$density, NA))
  }
  
  is.na(df.h$y) = df.h$y == 0.0
  
  return(df.h)
}

df.hist = f.hist(df.tas %>% filter(type == "OBS") %>% .$var, breaks = m.breaks) %>%
  mutate(type = "OBS")
df.hist = rbind(df.hist,
                f.hist(df.tas %>% filter(type == "Model") %>% .$var, breaks = m.breaks) %>%
                  mutate(type = "Model"))

p2 = p1 + 
  geom_step(data = df.hist, aes(x = x, y = y, color = factor(type, levels = c("OBS", "Model"))),
            stat = "identity", size = 0.35) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "Histogram with step lines")


Plot histogram with smoothing density

p3 = df.tas %>% ggplot() +
  geom_histogram(aes(x = var, y = ..density.., fill = factor(type, levels = c("OBS", "Model"))), 
                 breaks = m.breaks, position = "identity", alpha = 0.3) +
  geom_density(aes(x = var , fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.3) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_y_continuous(breaks = seq(0, 0.3, 0.05)) +
  labs(title = "Histogram (Density) with adding smooth", x = "Temperature", y = "Density") +
  theme(legend.position = c(0.2, 0.85))


Using combination of Violin-plot and Box-Plot

p4 = df.tas %>% ggplot() +
  geom_violin(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))), 
              trim = TRUE, alpha = 0.3) +
  geom_boxplot(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))), 
               width=0.1, alpha = 0.6) +
  scale_fill_manual(values = c("red", "blue")) + 
  scale_y_continuous(breaks = seq(10, 30, 10)) +
  coord_flip() +
  labs(title = "Violin-plot combined with Box-plot", x = "", y = "Temperature") +
  theme(legend.position = c(0.2, 0.5))


grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4)))


Appling “ggridges” density plot for monthly density

library(ggridges)
library(viridis)

p1 = df.tas %>% ggplot() +
  geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)), 
                      scale = 4, show.legend = FALSE) +
  scale_y_discrete(labels = c.month) +
  scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
  scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
  labs(title = "Kernel density plot using 'ggridges'", 
       x = "Temperature", y = "Months")

p2 = df.tas %>% ggplot() +
  geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)), 
                      scale = 4, show.legend = FALSE, alpha = 0.5,
                      stat = "binline", binwidth = 0.5, rel_min_height = 0.005) +
  scale_y_discrete(labels = c.month) +
  scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
  scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
  labs(title = "Kernel density plot using 'ggridges'", 
       x = "Temperature", y = "Months")


grid.arrange(p1, p2, ncol = 2, heights = c(3),
             layout_matrix = rbind(c(1, 2)))

5 CDF


Starting CDF with auto function in “ggplot”

p1 = df.tas %>% ggplot() +
  stat_ecdf(aes(x = var, color = type), geom = "step", pad = F) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "CDF using 'ggplot' auto-function", x = "Temperature (C-degree)", y = "CDF") +
  theme(legend.position = c(0.2, 0.85))


Manually calculate CDF and plot

This help to extract the correct value

f.getcdf = function (df, step = 1, qth = -999, xth = -999) {
  ## Recreate ecdf data
  df.ecdf = data.frame(x = unique(df),
                        y = ecdf(df)(unique(df))*length(df))
  ## rescale y to 0,1 range
  df.ecdf$y = 
    scale(df.ecdf$y,center = min(df.ecdf$y),scale = diff(range(df.ecdf$y)))
  
  df.ecdf = df.ecdf %>% arrange (by = x)
  return (df.ecdf[c(seq(1, length(df.ecdf$x), step), length(df.ecdf$x)), ])
  
  ## To extract correct value
  # if (xth == -999) {
  #   out = dat_ecdf %>% filter (rank(abs(y - qth)) == min(rank(abs(y - qth))))
  #   out = round (out, digits = 3)
  # } else {
  #   out = dat_ecdf %>% filter (rank(abs(x - xth)) == min(rank(abs(x - xth))))
  #   out = round (out, digits = 3)
  # }
  # 
  # return (out)
}

##
df.cdf = f.getcdf(df.tas %>% filter(type == "OBS") %>% .$var, step = 20) %>% 
  mutate(type = "OBS")
df.cdf = rbind(df.cdf,
               f.getcdf(df.tas %>% filter(type == "Model") %>% .$var, step = 20) %>% 
                 mutate(type = "Model"))

p2 = df.cdf %>% ggplot() +
  geom_step(aes(x = x, y = y, color = type)) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "CDF by manually calculation", x = "Temperature (C-degree)", y = "CDF") +
  theme(legend.position = c(0.2, 0.85))


Plot it out

grid.arrange(p1, p2, ncol = 2, heights = c(3),
             layout_matrix = rbind(c(1, 2)))

6 Bonus - Plotly

One reason to use the manual calculation is that the auto function still not working properly with “plotly” for example:

The CDF by “ggplot” auto-function will be broken as:

library(plotly)
ggplotly(p1)


Instead of

library(plotly)
ggplotly(p2)
---
title: "Visualization: Temperature (Scatter-plot, Heatmap, Box-plot, PDF, CDF, etc.)"
author: "by NXTTNX"
output:
  html_document: 
    code_download: TRUE
    code_folding: show
    number_sections: TRUE
    theme: "default"
    toc: TRUE
    toc_float: TRUE
    dev: 'svg'  
---

<style>
/* resize the widget container */
.plotly { 
  width: 60% !important;
  margin: auto;
}

/* center the widget */
div.svg-container {
  margin: auto !important;
}
</style>


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")
```

\
**Set up environment**

```{r message = FALSE, warning = FALSE}
library(tidyverse)
library(gridExtra)
library(pander)
library(ggpubr)

theme_set(theme_bw(base_size = 13, base_family = "Times") +
            theme(plot.title = element_text(size = 14, hjust = 0.0),
                  panel.grid.minor = element_blank(),
                  panel.grid.major = element_line(colour = "gray", linetype="dashed",size = 0.1),
                  legend.position = "none", 
                  legend.title = element_blank(),
                  legend.text.align = 0))

## FUNCTION: Get legend
get_legend = function(myggplot){
  tmp = ggplot_gtable(ggplot_build(myggplot))
  leg = which(sapply(tmp$grobs, function (x) x$name) == "guide-box")
  legend = tmp$grobs[[leg]]
  return(legend)
}

```


# Data Processing

\
**Use CDO to extract data from "netCDF" to "text" format**

Assume that the data including daily observation and model simulation of temperature over period from 
2000-2002. These datasets are normally in netCDF format, and can be extracted by using CDO as below:

* cdo -L -outputtab,date,value -fldmean -selvar,tas OBS.nc 2> /dev/null > data-txt/OBS-TAS.txt
* cdo -L -outputtab,date,value -fldmean -selvar,tas Model.nc 2> /dev/null > data-txt/Model-TAS.txt

\
**Read data & re-shape data to work with "tidyverse" environment**


```{r message = FALSE, warning = FALSE}
df.type = c("OBS", "Model")

# Read data
df.tas = NULL
for (itype in df.type) {
  df.tas.tmp = read.table(paste0("data-txt/", itype, "-TAS.txt"), header = FALSE)
  
  colnames(df.tas.tmp) = c("date", "var")
  df.tas.tmp$type = itype
  
  df.tas = rbind(df.tas, df.tas.tmp)
}

# Transfrom date to year-mon-day
f.convdate = function(date) {
  return(data_frame(year = as.integer(substr(date,1,4)),
                    mon  = as.integer(substr(date,6,7)),
                    day  = as.integer(substr(date,9,10))))
}

df.tas = cbind(type = df.tas$type, f.convdate(df.tas$date), var = df.tas$var)
```

\
**Quick look at annual cycle (More detailed later)**

```{r message = FALSE, warning = FALSE}
df.tas %>% group_by(type, mon) %>% summarise(var = mean(var, na.rm = TRUE)) %>% 
  spread(mon, var) %>% pander(split.table = 130)
```


# Daily Analysis

\
**Create basic plot contents (template)**
```{r message = FALSE, warning = FALSE}
p0 = df.tas %>% spread(type, var) %>%
  ggplot(aes(x = OBS, y = Model)) +
  scale_x_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
  scale_y_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
  coord_fixed(ratio = 1) +
  labs(x = "Observation (C-degree)", y = "Model (C-degree)")
```

\
**Scatter-plot BASIC**

Using "alpha", the density of points can estimately seen
```{r message = FALSE, warning = FALSE}
p1 = p0 +
  geom_point(alpha = 0.08, color = "red") +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "Scatter-plot BASIC") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")
```

\
**Heatmap of point Density**
```{r message = FALSE, warning = FALSE}
p2 = p0 +
  stat_density_2d(geom = "raster", aes(fill = ..density..), contour = FALSE) +
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 0.015), oob = scales::squish) +
  labs(title = "Heatmap of point Density") +
  theme(legend.position = "right")
```

\
**Heatmap & Hex-Heatmap of point Count over ranges**

The ranges are defined by "bins"

```{r message = FALSE, warning = FALSE}
p3 = p0 +
  geom_bin2d(bins=36, aes(fill=..count..))+
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 20), oob = scales::squish) +
  labs(title = "Heatmap of point Count over ranges") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")

p4 = p0 +
  geom_hex(bins=36, aes(fill=..count..))+
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 20), oob = scales::squish) +
  labs(title = "Hex-Heatmap of point Count over ranges") +
  theme(legend.position = "right")
```

\
**2D Kernel density plot of point Density**
```{r message = FALSE, warning = FALSE}
p5 = p0 +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha= 1, col = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "2D Kernel density plot of point Density") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")

p6 = p0 +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 1, show.legend = F) +
  stat_density_2d(aes(size = ..density.., fill = ..density..), geom = "point", 
                  shape = 21, n = 12, contour = FALSE, col = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "Modified 2D Kernel density plot of point Density") +
  theme(legend.position = "right")
```

\
**Put all plots together using grid.arrange (gridExtra)**
```{r message = FALSE, warning = FALSE, fig.width = 10, fig.height = 12}
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2, heights = c(3, 3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4), c(5, 6)))
```


# Monthly Analysis

\
**Calculate some climatology indices for each month over period**
```{r message = FALSE, warning = FALSE}
df.stat = df.tas %>% group_by(type, mon) %>% 
  summarise(Mean   = mean(var, na.rm = TRUE),
            Median = quantile(var, 0.50, na.rm = TRUE),
            Q25    = quantile(var, 0.25, na.rm = TRUE),
            Q75    = quantile(var, 0.75, na.rm = TRUE))
```

\
**Print out as a table**
```{r message = FALSE, warning = FALSE}
df.stat %>% ungroup() %>% 
  gather(key = indices, value = value, -c(type, mon)) %>% spread(key = mon, value = value) %>%
  arrange(indices) %>% pander(emphasize.strong.cols = c(1, 2), split.table = 130)
```

\
**Plot the table as heatmap**
```{r message = FALSE, warning = FALSE}
df.stat.plot = df.stat %>% ungroup() %>% 
  gather(key = indices, value = value, -c(type, mon)) %>% 
  mutate(idx = paste0(indices, "-", type))
df.stat.plot$value = round(df.stat.plot$value, digits = 1)

c.month = c("J", "F", "M", "A","M", "J", "J", "A", "S", "O", "N", "D")

p = df.stat.plot %>% ggplot() +
  geom_tile(aes(x = factor(mon), y = idx, fill = value), color = "black") +
  geom_text(aes(x = factor(mon), y = idx, label = value), color = "black", size = 3) +
  geom_hline(yintercept = c(2.5, 4.5, 6.5), size = 1, color = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(10, 30), oob = scales::squish) +
  scale_x_discrete(labels = c.month) +
  labs(title = "Heatmap for indices comparison",
       x = "Month", y = "") +
  theme(legend.position = "bottom")
p
```

\
**Create basic plot contents (template)**
```{r message = FALSE, warning = FALSE}

# Create function for cleaner code later
f.ggplot = function(p, discrete = FALSE) {
  p = p +
    scale_color_manual(values = c("red", "blue")) +
    scale_fill_manual(values = c("red", "blue")) +
    labs(x = "Months", y = "Temperature (C-degree)") +
    theme(legend.position = "bottom")
  if (discrete == TRUE) {
    p = p + scale_x_discrete(breaks = seq(12), labels = c.month)
  } else {
    p = p + scale_x_continuous(breaks = seq(12), labels = c.month)
  }
  return(p)
}

```

\
**Plotting annual cycle**
```{r message = FALSE, warning = FALSE}
p1 = f.ggplot(df.stat %>% ggplot() +
  geom_col(aes(x = mon, y = Mean, fill = factor(type, levels = c("OBS", "Model"))),
           alpha = 0.6, position = position_dodge(0.8)) +
  geom_errorbar(aes(x = mon , ymin = Q25, ymax = Q75, 
                    group = factor(type, levels = c("OBS", "Model"))),
                width = 0.5, position = position_dodge(0.8)) +
  labs(title = "Climatology of Temperature over period 2000-2002",
       subtitle = "Errorbars present range from Q25 to Q75"))

p2 = f.ggplot(df.stat %>% ggplot() +
  geom_line(aes(x = mon, y = Mean, color = factor(type, levels = c("OBS", "Model")))) +
  geom_ribbon(aes(x = mon, ymin = Q25, ymax = Q75, 
                  fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.2) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The shaded area presents range from Q25 & Q75"))
  
p3 = f.ggplot(df.tas %>% ggplot() +
  stat_summary(aes(x = mon, y = var, color = factor(type, levels = c("OBS", "Model"))),
               geom = "pointrange", position = position_dodge(width = 0.5),
               fun.y = mean, fun.ymax = max, fun.ymin = min, size = 0.5) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The points present for Mean, the lines present range from MIN to MAX"))

p4 = f.ggplot(df.tas %>% ggplot() +
  geom_boxplot(aes(x = factor(mon), y = var, fill = factor(type, levels = c("OBS", "Model"))),
               alpha = 0.6, outlier.color = "purple", outlier.shape = 4, outlier.size = 1) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The boxes present Median, Min, Max, Q25, Q75, and outliers"),
  discrete = TRUE)
```

```{r message = FALSE, warning = FALSE, fig.width = 11, fig.height = 8}
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4)))
```


# PDF

\
**Starting with histogram**
```{r message = FALSE, warning = FALSE}
m.breaks = seq(4, 32, 0.5)

p1 = df.tas %>% ggplot() +
  geom_histogram(aes(x = var, y = ..count.., fill = factor(type, levels = c("OBS", "Model"))), 
                 breaks = m.breaks, position = "identity", alpha = 0.3) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_y_continuous(breaks = seq(0, 250, 50)) +
  labs(title = "Histogram", x = "Temperature", y = "Frequency (count)") +
  theme(legend.position = c(0.2, 0.85))
```

\
**Manually get histogram value and plot as step line**
```{r message = FALSE, warning = FALSE}
f.hist = function(df, breaks, density = FALSE) {
  h = hist(df, breaks = breaks, plot = FALSE)
  if (density == FALSE) {
    df.h = data_frame(x = h$breaks, y = c(h$count, NA))
  } else {
    df.h = data_frame(x = h$breaks, y = c(h$density, NA))
  }
  
  is.na(df.h$y) = df.h$y == 0.0
  
  return(df.h)
}

df.hist = f.hist(df.tas %>% filter(type == "OBS") %>% .$var, breaks = m.breaks) %>%
  mutate(type = "OBS")
df.hist = rbind(df.hist,
                f.hist(df.tas %>% filter(type == "Model") %>% .$var, breaks = m.breaks) %>%
                  mutate(type = "Model"))

p2 = p1 + 
  geom_step(data = df.hist, aes(x = x, y = y, color = factor(type, levels = c("OBS", "Model"))),
            stat = "identity", size = 0.35) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "Histogram with step lines")
```

\
**Plot histogram with smoothing density**
```{r message = FALSE, warning = FALSE}
p3 = df.tas %>% ggplot() +
  geom_histogram(aes(x = var, y = ..density.., fill = factor(type, levels = c("OBS", "Model"))), 
                 breaks = m.breaks, position = "identity", alpha = 0.3) +
  geom_density(aes(x = var , fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.3) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_y_continuous(breaks = seq(0, 0.3, 0.05)) +
  labs(title = "Histogram (Density) with adding smooth", x = "Temperature", y = "Density") +
  theme(legend.position = c(0.2, 0.85))
```
\
**Using combination of Violin-plot and Box-Plot**
```{r message = FALSE, warning = FALSE}
p4 = df.tas %>% ggplot() +
  geom_violin(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))), 
              trim = TRUE, alpha = 0.3) +
  geom_boxplot(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))), 
               width=0.1, alpha = 0.6) +
  scale_fill_manual(values = c("red", "blue")) + 
  scale_y_continuous(breaks = seq(10, 30, 10)) +
  coord_flip() +
  labs(title = "Violin-plot combined with Box-plot", x = "", y = "Temperature") +
  theme(legend.position = c(0.2, 0.5))
```

\
```{r message = FALSE, warning = FALSE, fig.width = 10, fig.height = 7}
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4)))
```

\
**Appling "ggridges" density plot for monthly density**
```{r message = FALSE, warning = FALSE, fig.width = 10, fig.height = 5}
library(ggridges)
library(viridis)

p1 = df.tas %>% ggplot() +
  geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)), 
                      scale = 4, show.legend = FALSE) +
  scale_y_discrete(labels = c.month) +
  scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
  scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
  labs(title = "Kernel density plot using 'ggridges'", 
       x = "Temperature", y = "Months")

p2 = df.tas %>% ggplot() +
  geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)), 
                      scale = 4, show.legend = FALSE, alpha = 0.5,
                      stat = "binline", binwidth = 0.5, rel_min_height = 0.005) +
  scale_y_discrete(labels = c.month) +
  scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
  scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
  labs(title = "Kernel density plot using 'ggridges'", 
       x = "Temperature", y = "Months")


grid.arrange(p1, p2, ncol = 2, heights = c(3),
             layout_matrix = rbind(c(1, 2)))
  
```


# CDF

\
**Starting CDF with auto function in "ggplot"**
```{r message = FALSE, warning = FALSE}
p1 = df.tas %>% ggplot() +
  stat_ecdf(aes(x = var, color = type), geom = "step", pad = F) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "CDF using 'ggplot' auto-function", x = "Temperature (C-degree)", y = "CDF") +
  theme(legend.position = c(0.2, 0.85))

```

\
**Manually calculate CDF and plot**

This help to extract the correct value

```{r message = FALSE, warning = FALSE}
f.getcdf = function (df, step = 1, qth = -999, xth = -999) {
  ## Recreate ecdf data
  df.ecdf = data.frame(x = unique(df),
                        y = ecdf(df)(unique(df))*length(df))
  ## rescale y to 0,1 range
  df.ecdf$y = 
    scale(df.ecdf$y,center = min(df.ecdf$y),scale = diff(range(df.ecdf$y)))
  
  df.ecdf = df.ecdf %>% arrange (by = x)
  return (df.ecdf[c(seq(1, length(df.ecdf$x), step), length(df.ecdf$x)), ])
  
  ## To extract correct value
  # if (xth == -999) {
  #   out = dat_ecdf %>% filter (rank(abs(y - qth)) == min(rank(abs(y - qth))))
  #   out = round (out, digits = 3)
  # } else {
  #   out = dat_ecdf %>% filter (rank(abs(x - xth)) == min(rank(abs(x - xth))))
  #   out = round (out, digits = 3)
  # }
  # 
  # return (out)
}

##
df.cdf = f.getcdf(df.tas %>% filter(type == "OBS") %>% .$var, step = 20) %>% 
  mutate(type = "OBS")
df.cdf = rbind(df.cdf,
               f.getcdf(df.tas %>% filter(type == "Model") %>% .$var, step = 20) %>% 
                 mutate(type = "Model"))

p2 = df.cdf %>% ggplot() +
  geom_step(aes(x = x, y = y, color = type)) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "CDF by manually calculation", x = "Temperature (C-degree)", y = "CDF") +
  theme(legend.position = c(0.2, 0.85))
```

\
**Plot it out**
```{r message = FALSE, warning = FALSE, fig.width = 8, fig.height = 4}
grid.arrange(p1, p2, ncol = 2, heights = c(3),
             layout_matrix = rbind(c(1, 2)))
```

# Bonus - Plotly

One reason to use the manual calculation is that the auto function still not
working properly with "plotly" for example:

**The CDF by "ggplot" auto-function will be broken as:**

```{r message = FALSE, warning = FALSE, fig.width = 5, fig.height = 4}
library(plotly)
ggplotly(p1)
```

\
**Instead of**

```{r message = FALSE, warning = FALSE, fig.width = 5, fig.height = 4}
library(plotly)
ggplotly(p2)
```
