Set up environment
library(tidyverse)
library(gridExtra)
library(pander)
library(ggpubr)
theme_set(theme_bw(base_size = 13, base_family = "Times") +
theme(plot.title = element_text(size = 14, hjust = 0.0),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(colour = "gray", linetype="dashed",size = 0.1),
legend.position = "none",
legend.title = element_blank(),
legend.text.align = 0))
## FUNCTION: Get legend
get_legend = function(myggplot){
tmp = ggplot_gtable(ggplot_build(myggplot))
leg = which(sapply(tmp$grobs, function (x) x$name) == "guide-box")
legend = tmp$grobs[[leg]]
return(legend)
}
Data Processing
Use CDO to extract data from “netCDF” to “text” format
Assume that the data including daily observation and model simulation of temperature over period from 2000-2002. These datasets are normally in netCDF format, and can be extracted by using CDO as below:
- cdo -L -outputtab,date,value -fldmean -selvar,tas OBS.nc 2> /dev/null > data-txt/OBS-TAS.txt
- cdo -L -outputtab,date,value -fldmean -selvar,tas Model.nc 2> /dev/null > data-txt/Model-TAS.txt
Read data & re-shape data to work with “tidyverse” environment
df.type = c("OBS", "Model")
# Read data
df.tas = NULL
for (itype in df.type) {
df.tas.tmp = read.table(paste0("data-txt/", itype, "-TAS.txt"), header = FALSE)
colnames(df.tas.tmp) = c("date", "var")
df.tas.tmp$type = itype
df.tas = rbind(df.tas, df.tas.tmp)
}
# Transfrom date to year-mon-day
f.convdate = function(date) {
return(data_frame(year = as.integer(substr(date,1,4)),
mon = as.integer(substr(date,6,7)),
day = as.integer(substr(date,9,10))))
}
df.tas = cbind(type = df.tas$type, f.convdate(df.tas$date), var = df.tas$var)
Quick look at annual cycle (More detailed later)
df.tas %>% group_by(type, mon) %>% summarise(var = mean(var, na.rm = TRUE)) %>%
spread(mon, var) %>% pander(split.table = 130)
| Model |
13.5 |
15.36 |
18.76 |
22.24 |
24.9 |
25.91 |
26.61 |
26.13 |
24.78 |
22.59 |
17.94 |
14.38 |
| OBS |
14.42 |
15.01 |
19.07 |
22.38 |
25.89 |
27.47 |
27.95 |
27.98 |
26.43 |
23.99 |
18.63 |
15.2 |
Daily Analysis
Create basic plot contents (template)
p0 = df.tas %>% spread(type, var) %>%
ggplot(aes(x = OBS, y = Model)) +
scale_x_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
scale_y_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
coord_fixed(ratio = 1) +
labs(x = "Observation (C-degree)", y = "Model (C-degree)")
Scatter-plot BASIC
Using “alpha”, the density of points can estimately seen
p1 = p0 +
geom_point(alpha = 0.08, color = "red") +
geom_abline(intercept = 0, slope = 1) +
labs(title = "Scatter-plot BASIC") +
stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE,
size = 4, color = "blue")
Heatmap of point Density
p2 = p0 +
stat_density_2d(geom = "raster", aes(fill = ..density..), contour = FALSE) +
geom_abline(intercept = 0, slope = 1) +
scale_fill_distiller(palette = "YlOrRd", direction = 1,
limits = c(0, 0.015), oob = scales::squish) +
labs(title = "Heatmap of point Density") +
theme(legend.position = "right")
Heatmap & Hex-Heatmap of point Count over ranges
The ranges are defined by “bins”
p3 = p0 +
geom_bin2d(bins=36, aes(fill=..count..))+
geom_abline(intercept = 0, slope = 1) +
scale_fill_distiller(palette = "YlOrRd", direction = 1,
limits = c(0, 20), oob = scales::squish) +
labs(title = "Heatmap of point Count over ranges") +
stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE,
size = 4, color = "blue")
p4 = p0 +
geom_hex(bins=36, aes(fill=..count..))+
geom_abline(intercept = 0, slope = 1) +
scale_fill_distiller(palette = "YlOrRd", direction = 1,
limits = c(0, 20), oob = scales::squish) +
labs(title = "Hex-Heatmap of point Count over ranges") +
theme(legend.position = "right")
2D Kernel density plot of point Density
p5 = p0 +
stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha= 1, col = "black") +
scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
geom_abline(intercept = 0, slope = 1) +
labs(title = "2D Kernel density plot of point Density") +
stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE,
size = 4, color = "blue")
p6 = p0 +
stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 1, show.legend = F) +
stat_density_2d(aes(size = ..density.., fill = ..density..), geom = "point",
shape = 21, n = 12, contour = FALSE, col = "black") +
scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
geom_abline(intercept = 0, slope = 1) +
labs(title = "Modified 2D Kernel density plot of point Density") +
theme(legend.position = "right")
Put all plots together using grid.arrange (gridExtra)
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2, heights = c(3, 3, 3),
layout_matrix = rbind(c(1, 2), c(3, 4), c(5, 6)))

Monthly Analysis
Calculate some climatology indices for each month over period
df.stat = df.tas %>% group_by(type, mon) %>%
summarise(Mean = mean(var, na.rm = TRUE),
Median = quantile(var, 0.50, na.rm = TRUE),
Q25 = quantile(var, 0.25, na.rm = TRUE),
Q75 = quantile(var, 0.75, na.rm = TRUE))
Print out as a table
df.stat %>% ungroup() %>%
gather(key = indices, value = value, -c(type, mon)) %>% spread(key = mon, value = value) %>%
arrange(indices) %>% pander(emphasize.strong.cols = c(1, 2), split.table = 130)
| Model |
Mean |
13.5 |
15.36 |
18.76 |
22.24 |
24.9 |
25.91 |
26.61 |
26.13 |
24.78 |
22.59 |
17.94 |
14.38 |
| OBS |
Mean |
14.42 |
15.01 |
19.07 |
22.38 |
25.89 |
27.47 |
27.95 |
27.98 |
26.43 |
23.99 |
18.63 |
15.2 |
| Model |
Median |
13.38 |
15.45 |
19.16 |
22.55 |
24.82 |
26.15 |
26.44 |
26.1 |
24.98 |
23 |
18.38 |
14.95 |
| OBS |
Median |
14.73 |
14.92 |
19.47 |
22.35 |
25.96 |
27.87 |
27.87 |
28.04 |
26.82 |
24.73 |
18.46 |
14.88 |
| Model |
Q25 |
10.92 |
14.21 |
17.38 |
21.07 |
24.33 |
25.9 |
26.08 |
25.87 |
23.98 |
21.67 |
16.39 |
11.63 |
| OBS |
Q25 |
11.52 |
12.24 |
17.34 |
20.5 |
24.88 |
26.19 |
26.81 |
27.13 |
25.51 |
22.14 |
16.37 |
11.7 |
| Model |
Q75 |
16.41 |
16.91 |
20.43 |
23.6 |
25.85 |
26.53 |
27.12 |
26.33 |
25.72 |
24.04 |
20.32 |
17.82 |
| OBS |
Q75 |
17.39 |
17.93 |
21.13 |
24.93 |
27.14 |
28.62 |
29 |
29 |
27.46 |
25.97 |
20.59 |
18.49 |
Plot the table as heatmap
df.stat.plot = df.stat %>% ungroup() %>%
gather(key = indices, value = value, -c(type, mon)) %>%
mutate(idx = paste0(indices, "-", type))
df.stat.plot$value = round(df.stat.plot$value, digits = 1)
c.month = c("J", "F", "M", "A","M", "J", "J", "A", "S", "O", "N", "D")
p = df.stat.plot %>% ggplot() +
geom_tile(aes(x = factor(mon), y = idx, fill = value), color = "black") +
geom_text(aes(x = factor(mon), y = idx, label = value), color = "black", size = 3) +
geom_hline(yintercept = c(2.5, 4.5, 6.5), size = 1, color = "black") +
scale_fill_distiller(palette = "Spectral", limits = c(10, 30), oob = scales::squish) +
scale_x_discrete(labels = c.month) +
labs(title = "Heatmap for indices comparison",
x = "Month", y = "") +
theme(legend.position = "bottom")
p

Create basic plot contents (template)
# Create function for cleaner code later
f.ggplot = function(p, discrete = FALSE) {
p = p +
scale_color_manual(values = c("red", "blue")) +
scale_fill_manual(values = c("red", "blue")) +
labs(x = "Months", y = "Temperature (C-degree)") +
theme(legend.position = "bottom")
if (discrete == TRUE) {
p = p + scale_x_discrete(breaks = seq(12), labels = c.month)
} else {
p = p + scale_x_continuous(breaks = seq(12), labels = c.month)
}
return(p)
}
Plotting annual cycle
p1 = f.ggplot(df.stat %>% ggplot() +
geom_col(aes(x = mon, y = Mean, fill = factor(type, levels = c("OBS", "Model"))),
alpha = 0.6, position = position_dodge(0.8)) +
geom_errorbar(aes(x = mon , ymin = Q25, ymax = Q75,
group = factor(type, levels = c("OBS", "Model"))),
width = 0.5, position = position_dodge(0.8)) +
labs(title = "Climatology of Temperature over period 2000-2002",
subtitle = "Errorbars present range from Q25 to Q75"))
p2 = f.ggplot(df.stat %>% ggplot() +
geom_line(aes(x = mon, y = Mean, color = factor(type, levels = c("OBS", "Model")))) +
geom_ribbon(aes(x = mon, ymin = Q25, ymax = Q75,
fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.2) +
labs(title = "Climatology of Temperature over period 2000-2002",
subtitle = "The shaded area presents range from Q25 & Q75"))
p3 = f.ggplot(df.tas %>% ggplot() +
stat_summary(aes(x = mon, y = var, color = factor(type, levels = c("OBS", "Model"))),
geom = "pointrange", position = position_dodge(width = 0.5),
fun.y = mean, fun.ymax = max, fun.ymin = min, size = 0.5) +
labs(title = "Climatology of Temperature over period 2000-2002",
subtitle = "The points present for Mean, the lines present range from MIN to MAX"))
p4 = f.ggplot(df.tas %>% ggplot() +
geom_boxplot(aes(x = factor(mon), y = var, fill = factor(type, levels = c("OBS", "Model"))),
alpha = 0.6, outlier.color = "purple", outlier.shape = 4, outlier.size = 1) +
labs(title = "Climatology of Temperature over period 2000-2002",
subtitle = "The boxes present Median, Min, Max, Q25, Q75, and outliers"),
discrete = TRUE)
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
layout_matrix = rbind(c(1, 2), c(3, 4)))

PDF
Starting with histogram
m.breaks = seq(4, 32, 0.5)
p1 = df.tas %>% ggplot() +
geom_histogram(aes(x = var, y = ..count.., fill = factor(type, levels = c("OBS", "Model"))),
breaks = m.breaks, position = "identity", alpha = 0.3) +
scale_fill_manual(values = c("red", "blue")) +
scale_y_continuous(breaks = seq(0, 250, 50)) +
labs(title = "Histogram", x = "Temperature", y = "Frequency (count)") +
theme(legend.position = c(0.2, 0.85))
Manually get histogram value and plot as step line
f.hist = function(df, breaks, density = FALSE) {
h = hist(df, breaks = breaks, plot = FALSE)
if (density == FALSE) {
df.h = data_frame(x = h$breaks, y = c(h$count, NA))
} else {
df.h = data_frame(x = h$breaks, y = c(h$density, NA))
}
is.na(df.h$y) = df.h$y == 0.0
return(df.h)
}
df.hist = f.hist(df.tas %>% filter(type == "OBS") %>% .$var, breaks = m.breaks) %>%
mutate(type = "OBS")
df.hist = rbind(df.hist,
f.hist(df.tas %>% filter(type == "Model") %>% .$var, breaks = m.breaks) %>%
mutate(type = "Model"))
p2 = p1 +
geom_step(data = df.hist, aes(x = x, y = y, color = factor(type, levels = c("OBS", "Model"))),
stat = "identity", size = 0.35) +
scale_color_manual(values = c("red", "blue")) +
labs(title = "Histogram with step lines")
Plot histogram with smoothing density
p3 = df.tas %>% ggplot() +
geom_histogram(aes(x = var, y = ..density.., fill = factor(type, levels = c("OBS", "Model"))),
breaks = m.breaks, position = "identity", alpha = 0.3) +
geom_density(aes(x = var , fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.3) +
scale_fill_manual(values = c("red", "blue")) +
scale_y_continuous(breaks = seq(0, 0.3, 0.05)) +
labs(title = "Histogram (Density) with adding smooth", x = "Temperature", y = "Density") +
theme(legend.position = c(0.2, 0.85))
Using combination of Violin-plot and Box-Plot
p4 = df.tas %>% ggplot() +
geom_violin(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))),
trim = TRUE, alpha = 0.3) +
geom_boxplot(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))),
width=0.1, alpha = 0.6) +
scale_fill_manual(values = c("red", "blue")) +
scale_y_continuous(breaks = seq(10, 30, 10)) +
coord_flip() +
labs(title = "Violin-plot combined with Box-plot", x = "", y = "Temperature") +
theme(legend.position = c(0.2, 0.5))
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
layout_matrix = rbind(c(1, 2), c(3, 4)))

Appling “ggridges” density plot for monthly density
library(ggridges)
library(viridis)
p1 = df.tas %>% ggplot() +
geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)),
scale = 4, show.legend = FALSE) +
scale_y_discrete(labels = c.month) +
scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
labs(title = "Kernel density plot using 'ggridges'",
x = "Temperature", y = "Months")
p2 = df.tas %>% ggplot() +
geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)),
scale = 4, show.legend = FALSE, alpha = 0.5,
stat = "binline", binwidth = 0.5, rel_min_height = 0.005) +
scale_y_discrete(labels = c.month) +
scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
labs(title = "Kernel density plot using 'ggridges'",
x = "Temperature", y = "Months")
grid.arrange(p1, p2, ncol = 2, heights = c(3),
layout_matrix = rbind(c(1, 2)))

CDF
Starting CDF with auto function in “ggplot”
p1 = df.tas %>% ggplot() +
stat_ecdf(aes(x = var, color = type), geom = "step", pad = F) +
scale_color_manual(values = c("red", "blue")) +
labs(title = "CDF using 'ggplot' auto-function", x = "Temperature (C-degree)", y = "CDF") +
theme(legend.position = c(0.2, 0.85))
Manually calculate CDF and plot
This help to extract the correct value
f.getcdf = function (df, step = 1, qth = -999, xth = -999) {
## Recreate ecdf data
df.ecdf = data.frame(x = unique(df),
y = ecdf(df)(unique(df))*length(df))
## rescale y to 0,1 range
df.ecdf$y =
scale(df.ecdf$y,center = min(df.ecdf$y),scale = diff(range(df.ecdf$y)))
df.ecdf = df.ecdf %>% arrange (by = x)
return (df.ecdf[c(seq(1, length(df.ecdf$x), step), length(df.ecdf$x)), ])
## To extract correct value
# if (xth == -999) {
# out = dat_ecdf %>% filter (rank(abs(y - qth)) == min(rank(abs(y - qth))))
# out = round (out, digits = 3)
# } else {
# out = dat_ecdf %>% filter (rank(abs(x - xth)) == min(rank(abs(x - xth))))
# out = round (out, digits = 3)
# }
#
# return (out)
}
##
df.cdf = f.getcdf(df.tas %>% filter(type == "OBS") %>% .$var, step = 20) %>%
mutate(type = "OBS")
df.cdf = rbind(df.cdf,
f.getcdf(df.tas %>% filter(type == "Model") %>% .$var, step = 20) %>%
mutate(type = "Model"))
p2 = df.cdf %>% ggplot() +
geom_step(aes(x = x, y = y, color = type)) +
scale_color_manual(values = c("red", "blue")) +
labs(title = "CDF by manually calculation", x = "Temperature (C-degree)", y = "CDF") +
theme(legend.position = c(0.2, 0.85))
Plot it out
grid.arrange(p1, p2, ncol = 2, heights = c(3),
layout_matrix = rbind(c(1, 2)))

Bonus - Plotly
One reason to use the manual calculation is that the auto function still not working properly with “plotly” for example:
The CDF by “ggplot” auto-function will be broken as:
library(plotly)
ggplotly(p1)
Instead of
library(plotly)
ggplotly(p2)
---
title: "Visualization: Temperature (Scatter-plot, Heatmap, Box-plot, PDF, CDF, etc.)"
author: "by NXTTNX"
output:
  html_document: 
    code_download: TRUE
    code_folding: show
    number_sections: TRUE
    theme: "default"
    toc: TRUE
    toc_float: TRUE
    dev: 'svg'  
---

<style>
/* resize the widget container */
.plotly { 
  width: 60% !important;
  margin: auto;
}

/* center the widget */
div.svg-container {
  margin: auto !important;
}
</style>


```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE, fig.align = "center")
```

\
**Set up environment**

```{r message = FALSE, warning = FALSE}
library(tidyverse)
library(gridExtra)
library(pander)
library(ggpubr)

theme_set(theme_bw(base_size = 13, base_family = "Times") +
            theme(plot.title = element_text(size = 14, hjust = 0.0),
                  panel.grid.minor = element_blank(),
                  panel.grid.major = element_line(colour = "gray", linetype="dashed",size = 0.1),
                  legend.position = "none", 
                  legend.title = element_blank(),
                  legend.text.align = 0))

## FUNCTION: Get legend
get_legend = function(myggplot){
  tmp = ggplot_gtable(ggplot_build(myggplot))
  leg = which(sapply(tmp$grobs, function (x) x$name) == "guide-box")
  legend = tmp$grobs[[leg]]
  return(legend)
}

```


# Data Processing

\
**Use CDO to extract data from "netCDF" to "text" format**

Assume that the data including daily observation and model simulation of temperature over period from 
2000-2002. These datasets are normally in netCDF format, and can be extracted by using CDO as below:

* cdo -L -outputtab,date,value -fldmean -selvar,tas OBS.nc 2> /dev/null > data-txt/OBS-TAS.txt
* cdo -L -outputtab,date,value -fldmean -selvar,tas Model.nc 2> /dev/null > data-txt/Model-TAS.txt

\
**Read data & re-shape data to work with "tidyverse" environment**


```{r message = FALSE, warning = FALSE}
df.type = c("OBS", "Model")

# Read data
df.tas = NULL
for (itype in df.type) {
  df.tas.tmp = read.table(paste0("data-txt/", itype, "-TAS.txt"), header = FALSE)
  
  colnames(df.tas.tmp) = c("date", "var")
  df.tas.tmp$type = itype
  
  df.tas = rbind(df.tas, df.tas.tmp)
}

# Transfrom date to year-mon-day
f.convdate = function(date) {
  return(data_frame(year = as.integer(substr(date,1,4)),
                    mon  = as.integer(substr(date,6,7)),
                    day  = as.integer(substr(date,9,10))))
}

df.tas = cbind(type = df.tas$type, f.convdate(df.tas$date), var = df.tas$var)
```

\
**Quick look at annual cycle (More detailed later)**

```{r message = FALSE, warning = FALSE}
df.tas %>% group_by(type, mon) %>% summarise(var = mean(var, na.rm = TRUE)) %>% 
  spread(mon, var) %>% pander(split.table = 130)
```


# Daily Analysis

\
**Create basic plot contents (template)**
```{r message = FALSE, warning = FALSE}
p0 = df.tas %>% spread(type, var) %>%
  ggplot(aes(x = OBS, y = Model)) +
  scale_x_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
  scale_y_continuous(breaks = seq(0, 35, 5), limits = c(5, 32.5)) +
  coord_fixed(ratio = 1) +
  labs(x = "Observation (C-degree)", y = "Model (C-degree)")
```

\
**Scatter-plot BASIC**

Using "alpha", the density of points can estimately seen
```{r message = FALSE, warning = FALSE}
p1 = p0 +
  geom_point(alpha = 0.08, color = "red") +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "Scatter-plot BASIC") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")
```

\
**Heatmap of point Density**
```{r message = FALSE, warning = FALSE}
p2 = p0 +
  stat_density_2d(geom = "raster", aes(fill = ..density..), contour = FALSE) +
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 0.015), oob = scales::squish) +
  labs(title = "Heatmap of point Density") +
  theme(legend.position = "right")
```

\
**Heatmap & Hex-Heatmap of point Count over ranges**

The ranges are defined by "bins"

```{r message = FALSE, warning = FALSE}
p3 = p0 +
  geom_bin2d(bins=36, aes(fill=..count..))+
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 20), oob = scales::squish) +
  labs(title = "Heatmap of point Count over ranges") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")

p4 = p0 +
  geom_hex(bins=36, aes(fill=..count..))+
  geom_abline(intercept = 0, slope = 1) +
  scale_fill_distiller(palette = "YlOrRd", direction = 1, 
                       limits = c(0, 20), oob = scales::squish) +
  labs(title = "Hex-Heatmap of point Count over ranges") +
  theme(legend.position = "right")
```

\
**2D Kernel density plot of point Density**
```{r message = FALSE, warning = FALSE}
p5 = p0 +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha= 1, col = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "2D Kernel density plot of point Density") +
  stat_cor(aes(label = paste(..r.label.., ..rr.label.., ..p.label.., sep = "~`,`~")),
           method = "pearson", label.x = 5, label.y = 31, na.rm = FALSE, 
           size = 4, color = "blue")

p6 = p0 +
  stat_density_2d(aes(fill = ..level..), geom = "polygon", alpha = 1, show.legend = F) +
  stat_density_2d(aes(size = ..density.., fill = ..density..), geom = "point", 
                  shape = 21, n = 12, contour = FALSE, col = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(0, 0.015), oob = scales::squish) +
  geom_abline(intercept = 0, slope = 1) +
  labs(title = "Modified 2D Kernel density plot of point Density") +
  theme(legend.position = "right")
```

\
**Put all plots together using grid.arrange (gridExtra)**
```{r message = FALSE, warning = FALSE, fig.width = 10, fig.height = 12}
grid.arrange(p1, p2, p3, p4, p5, p6, ncol = 2, heights = c(3, 3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4), c(5, 6)))
```


# Monthly Analysis

\
**Calculate some climatology indices for each month over period**
```{r message = FALSE, warning = FALSE}
df.stat = df.tas %>% group_by(type, mon) %>% 
  summarise(Mean   = mean(var, na.rm = TRUE),
            Median = quantile(var, 0.50, na.rm = TRUE),
            Q25    = quantile(var, 0.25, na.rm = TRUE),
            Q75    = quantile(var, 0.75, na.rm = TRUE))
```

\
**Print out as a table**
```{r message = FALSE, warning = FALSE}
df.stat %>% ungroup() %>% 
  gather(key = indices, value = value, -c(type, mon)) %>% spread(key = mon, value = value) %>%
  arrange(indices) %>% pander(emphasize.strong.cols = c(1, 2), split.table = 130)
```

\
**Plot the table as heatmap**
```{r message = FALSE, warning = FALSE}
df.stat.plot = df.stat %>% ungroup() %>% 
  gather(key = indices, value = value, -c(type, mon)) %>% 
  mutate(idx = paste0(indices, "-", type))
df.stat.plot$value = round(df.stat.plot$value, digits = 1)

c.month = c("J", "F", "M", "A","M", "J", "J", "A", "S", "O", "N", "D")

p = df.stat.plot %>% ggplot() +
  geom_tile(aes(x = factor(mon), y = idx, fill = value), color = "black") +
  geom_text(aes(x = factor(mon), y = idx, label = value), color = "black", size = 3) +
  geom_hline(yintercept = c(2.5, 4.5, 6.5), size = 1, color = "black") +
  scale_fill_distiller(palette = "Spectral", limits = c(10, 30), oob = scales::squish) +
  scale_x_discrete(labels = c.month) +
  labs(title = "Heatmap for indices comparison",
       x = "Month", y = "") +
  theme(legend.position = "bottom")
p
```

\
**Create basic plot contents (template)**
```{r message = FALSE, warning = FALSE}

# Create function for cleaner code later
f.ggplot = function(p, discrete = FALSE) {
  p = p +
    scale_color_manual(values = c("red", "blue")) +
    scale_fill_manual(values = c("red", "blue")) +
    labs(x = "Months", y = "Temperature (C-degree)") +
    theme(legend.position = "bottom")
  if (discrete == TRUE) {
    p = p + scale_x_discrete(breaks = seq(12), labels = c.month)
  } else {
    p = p + scale_x_continuous(breaks = seq(12), labels = c.month)
  }
  return(p)
}

```

\
**Plotting annual cycle**
```{r message = FALSE, warning = FALSE}
p1 = f.ggplot(df.stat %>% ggplot() +
  geom_col(aes(x = mon, y = Mean, fill = factor(type, levels = c("OBS", "Model"))),
           alpha = 0.6, position = position_dodge(0.8)) +
  geom_errorbar(aes(x = mon , ymin = Q25, ymax = Q75, 
                    group = factor(type, levels = c("OBS", "Model"))),
                width = 0.5, position = position_dodge(0.8)) +
  labs(title = "Climatology of Temperature over period 2000-2002",
       subtitle = "Errorbars present range from Q25 to Q75"))

p2 = f.ggplot(df.stat %>% ggplot() +
  geom_line(aes(x = mon, y = Mean, color = factor(type, levels = c("OBS", "Model")))) +
  geom_ribbon(aes(x = mon, ymin = Q25, ymax = Q75, 
                  fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.2) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The shaded area presents range from Q25 & Q75"))
  
p3 = f.ggplot(df.tas %>% ggplot() +
  stat_summary(aes(x = mon, y = var, color = factor(type, levels = c("OBS", "Model"))),
               geom = "pointrange", position = position_dodge(width = 0.5),
               fun.y = mean, fun.ymax = max, fun.ymin = min, size = 0.5) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The points present for Mean, the lines present range from MIN to MAX"))

p4 = f.ggplot(df.tas %>% ggplot() +
  geom_boxplot(aes(x = factor(mon), y = var, fill = factor(type, levels = c("OBS", "Model"))),
               alpha = 0.6, outlier.color = "purple", outlier.shape = 4, outlier.size = 1) +
  labs(title = "Climatology of Temperature over period 2000-2002", 
       subtitle = "The boxes present Median, Min, Max, Q25, Q75, and outliers"),
  discrete = TRUE)
```

```{r message = FALSE, warning = FALSE, fig.width = 11, fig.height = 8}
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4)))
```


# PDF

\
**Starting with histogram**
```{r message = FALSE, warning = FALSE}
m.breaks = seq(4, 32, 0.5)

p1 = df.tas %>% ggplot() +
  geom_histogram(aes(x = var, y = ..count.., fill = factor(type, levels = c("OBS", "Model"))), 
                 breaks = m.breaks, position = "identity", alpha = 0.3) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_y_continuous(breaks = seq(0, 250, 50)) +
  labs(title = "Histogram", x = "Temperature", y = "Frequency (count)") +
  theme(legend.position = c(0.2, 0.85))
```

\
**Manually get histogram value and plot as step line**
```{r message = FALSE, warning = FALSE}
f.hist = function(df, breaks, density = FALSE) {
  h = hist(df, breaks = breaks, plot = FALSE)
  if (density == FALSE) {
    df.h = data_frame(x = h$breaks, y = c(h$count, NA))
  } else {
    df.h = data_frame(x = h$breaks, y = c(h$density, NA))
  }
  
  is.na(df.h$y) = df.h$y == 0.0
  
  return(df.h)
}

df.hist = f.hist(df.tas %>% filter(type == "OBS") %>% .$var, breaks = m.breaks) %>%
  mutate(type = "OBS")
df.hist = rbind(df.hist,
                f.hist(df.tas %>% filter(type == "Model") %>% .$var, breaks = m.breaks) %>%
                  mutate(type = "Model"))

p2 = p1 + 
  geom_step(data = df.hist, aes(x = x, y = y, color = factor(type, levels = c("OBS", "Model"))),
            stat = "identity", size = 0.35) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "Histogram with step lines")
```

\
**Plot histogram with smoothing density**
```{r message = FALSE, warning = FALSE}
p3 = df.tas %>% ggplot() +
  geom_histogram(aes(x = var, y = ..density.., fill = factor(type, levels = c("OBS", "Model"))), 
                 breaks = m.breaks, position = "identity", alpha = 0.3) +
  geom_density(aes(x = var , fill = factor(type, levels = c("OBS", "Model"))), alpha = 0.3) +
  scale_fill_manual(values = c("red", "blue")) +
  scale_y_continuous(breaks = seq(0, 0.3, 0.05)) +
  labs(title = "Histogram (Density) with adding smooth", x = "Temperature", y = "Density") +
  theme(legend.position = c(0.2, 0.85))
```
\
**Using combination of Violin-plot and Box-Plot**
```{r message = FALSE, warning = FALSE}
p4 = df.tas %>% ggplot() +
  geom_violin(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))), 
              trim = TRUE, alpha = 0.3) +
  geom_boxplot(aes(x = type, y = var, group = type, fill = factor(type, levels = c("OBS", "Model"))), 
               width=0.1, alpha = 0.6) +
  scale_fill_manual(values = c("red", "blue")) + 
  scale_y_continuous(breaks = seq(10, 30, 10)) +
  coord_flip() +
  labs(title = "Violin-plot combined with Box-plot", x = "", y = "Temperature") +
  theme(legend.position = c(0.2, 0.5))
```

\
```{r message = FALSE, warning = FALSE, fig.width = 10, fig.height = 7}
grid.arrange(p1, p2, p3, p4, ncol = 2, heights = c(3, 3),
             layout_matrix = rbind(c(1, 2), c(3, 4)))
```

\
**Appling "ggridges" density plot for monthly density**
```{r message = FALSE, warning = FALSE, fig.width = 10, fig.height = 5}
library(ggridges)
library(viridis)

p1 = df.tas %>% ggplot() +
  geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)), 
                      scale = 4, show.legend = FALSE) +
  scale_y_discrete(labels = c.month) +
  scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
  scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
  labs(title = "Kernel density plot using 'ggridges'", 
       x = "Temperature", y = "Months")

p2 = df.tas %>% ggplot() +
  geom_density_ridges(aes(x = var, y = factor(mon), fill = factor(mon), col = factor(mon)), 
                      scale = 4, show.legend = FALSE, alpha = 0.5,
                      stat = "binline", binwidth = 0.5, rel_min_height = 0.005) +
  scale_y_discrete(labels = c.month) +
  scale_fill_viridis(option = "A",alpha = 0.5, discrete = T)+
  scale_color_viridis(option = "C",alpha = 1, discrete = T, begin = 1, end = 0) +
  labs(title = "Kernel density plot using 'ggridges'", 
       x = "Temperature", y = "Months")


grid.arrange(p1, p2, ncol = 2, heights = c(3),
             layout_matrix = rbind(c(1, 2)))
  
```


# CDF

\
**Starting CDF with auto function in "ggplot"**
```{r message = FALSE, warning = FALSE}
p1 = df.tas %>% ggplot() +
  stat_ecdf(aes(x = var, color = type), geom = "step", pad = F) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "CDF using 'ggplot' auto-function", x = "Temperature (C-degree)", y = "CDF") +
  theme(legend.position = c(0.2, 0.85))

```

\
**Manually calculate CDF and plot**

This help to extract the correct value

```{r message = FALSE, warning = FALSE}
f.getcdf = function (df, step = 1, qth = -999, xth = -999) {
  ## Recreate ecdf data
  df.ecdf = data.frame(x = unique(df),
                        y = ecdf(df)(unique(df))*length(df))
  ## rescale y to 0,1 range
  df.ecdf$y = 
    scale(df.ecdf$y,center = min(df.ecdf$y),scale = diff(range(df.ecdf$y)))
  
  df.ecdf = df.ecdf %>% arrange (by = x)
  return (df.ecdf[c(seq(1, length(df.ecdf$x), step), length(df.ecdf$x)), ])
  
  ## To extract correct value
  # if (xth == -999) {
  #   out = dat_ecdf %>% filter (rank(abs(y - qth)) == min(rank(abs(y - qth))))
  #   out = round (out, digits = 3)
  # } else {
  #   out = dat_ecdf %>% filter (rank(abs(x - xth)) == min(rank(abs(x - xth))))
  #   out = round (out, digits = 3)
  # }
  # 
  # return (out)
}

##
df.cdf = f.getcdf(df.tas %>% filter(type == "OBS") %>% .$var, step = 20) %>% 
  mutate(type = "OBS")
df.cdf = rbind(df.cdf,
               f.getcdf(df.tas %>% filter(type == "Model") %>% .$var, step = 20) %>% 
                 mutate(type = "Model"))

p2 = df.cdf %>% ggplot() +
  geom_step(aes(x = x, y = y, color = type)) +
  scale_color_manual(values = c("red", "blue")) +
  labs(title = "CDF by manually calculation", x = "Temperature (C-degree)", y = "CDF") +
  theme(legend.position = c(0.2, 0.85))
```

\
**Plot it out**
```{r message = FALSE, warning = FALSE, fig.width = 8, fig.height = 4}
grid.arrange(p1, p2, ncol = 2, heights = c(3),
             layout_matrix = rbind(c(1, 2)))
```

# Bonus - Plotly

One reason to use the manual calculation is that the auto function still not
working properly with "plotly" for example:

**The CDF by "ggplot" auto-function will be broken as:**

```{r message = FALSE, warning = FALSE, fig.width = 5, fig.height = 4}
library(plotly)
ggplotly(p1)
```

\
**Instead of**

```{r message = FALSE, warning = FALSE, fig.width = 5, fig.height = 4}
library(plotly)
ggplotly(p2)
```
