The objective of this analysis is to take a look at what happens when n, where n is the ensembles increases and how does sn, the number of syntheic ensemble sizes change? We look at what happens when the tolerance is set at three different levels. For right now we only have looked at the results for CanESM5.
library(ggplot2)
library(dplyr)
library(knitr)
BASE_DIR <- "/Users/dorh012/projects/2021/cleanup_stitch/stitches/enriching_sample_size"
Looking at the ta
For n vs sn for CanESM5 with tol 0.07, 0.1, and 0.13 we saw a 1 to 1 realtionship regardless of the size of sn. The only time this wasn’t the case was when we had an obscene tolerance set to 0.5, the we saw a non 1 to 1 relationship between n ans sn (n * 3.2 = sn). Which I foud some what puzzeling…
list.files(file.path(BASE_DIR, "outofsample_largetol"), "synthetic", full.names = TRUE) %>%
lapply(function(x){
exp <- ifelse(grepl(pattern = "ssp245", basename(x)), "ssp245", "ssp370")
train_en <- as.integer(gsub(pattern = "_.*", x = basename(x), replacement = ""))
read.csv(x, stringsAsFactors = FALSE) %>%
select(year, value, variable, stitching_id, tol) %>%
mutate(experiment = exp) %>%
mutate(ensemble = train_en) ->
data
return(data)
}) %>%
bind_rows() ->
data
data %>%
group_by(experiment, n = ensemble, tol) %>%
summarise(sn = n_distinct(stitching_id)) %>%
knitr::kable()
## `summarise()` has grouped output by 'experiment', 'n'. You can override using the `.groups` argument.
| experiment | n | tol | sn |
|---|---|---|---|
| ssp245 | 5 | 0.07 | 5 |
| ssp245 | 5 | 0.10 | 5 |
| ssp245 | 5 | 0.13 | 5 |
| ssp245 | 5 | 0.50 | 15 |
| ssp245 | 10 | 0.07 | 10 |
| ssp245 | 10 | 0.10 | 10 |
| ssp245 | 10 | 0.13 | 10 |
| ssp245 | 10 | 0.50 | 32 |
| ssp245 | 15 | 0.07 | 15 |
| ssp245 | 15 | 0.10 | 15 |
| ssp245 | 15 | 0.13 | 15 |
| ssp245 | 15 | 0.50 | 47 |
| ssp245 | 25 | 0.07 | 25 |
| ssp245 | 25 | 0.10 | 25 |
| ssp245 | 25 | 0.13 | 25 |
| ssp245 | 25 | 0.50 | 79 |
| ssp370 | 5 | 0.07 | 5 |
| ssp370 | 5 | 0.10 | 5 |
| ssp370 | 5 | 0.13 | 5 |
| ssp370 | 5 | 0.50 | 16 |
| ssp370 | 10 | 0.07 | 9 |
| ssp370 | 10 | 0.10 | 10 |
| ssp370 | 10 | 0.13 | 10 |
| ssp370 | 10 | 0.50 | 31 |
| ssp370 | 15 | 0.07 | 15 |
| ssp370 | 15 | 0.10 | 15 |
| ssp370 | 15 | 0.13 | 15 |
| ssp370 | 15 | 0.50 | 46 |
| ssp370 | 25 | 0.07 | 26 |
| ssp370 | 25 | 0.10 | 25 |
| ssp370 | 25 | 0.13 | 25 |
| ssp370 | 25 | 0.50 | 80 |
list.files(file.path(BASE_DIR, "in_sample"), "synthetic", full.names = TRUE) %>%
lapply(function(x){
exp <- ifelse(grepl(pattern = "ssp245", basename(x)), "ssp245", "ssp370")
train_en <- as.integer(gsub(pattern = "_.*", x = basename(x), replacement = ""))
read.csv(x, stringsAsFactors = FALSE) %>%
select(year, value, variable, stitching_id, tol) %>%
mutate(experiment = exp) %>%
mutate(ensemble = train_en) ->
data
return(data)
}) %>%
bind_rows() ->
data
data %>%
group_by(experiment, n = ensemble, tol) %>%
summarise(sn = n_distinct(stitching_id)) %>%
knitr::kable()
## `summarise()` has grouped output by 'experiment', 'n'. You can override using the `.groups` argument.
| experiment | n | tol | sn |
|---|---|---|---|
| ssp245 | 5 | 0.07 | 5 |
| ssp245 | 5 | 0.10 | 5 |
| ssp245 | 5 | 0.13 | 5 |
| ssp245 | 10 | 0.07 | 10 |
| ssp245 | 10 | 0.10 | 10 |
| ssp245 | 10 | 0.13 | 10 |
| ssp245 | 15 | 0.07 | 16 |
| ssp245 | 15 | 0.10 | 15 |
| ssp245 | 15 | 0.13 | 15 |
| ssp245 | 25 | 0.07 | 25 |
| ssp245 | 25 | 0.10 | 25 |
| ssp245 | 25 | 0.13 | 25 |
| ssp370 | 5 | 0.07 | 5 |
| ssp370 | 5 | 0.10 | 5 |
| ssp370 | 5 | 0.13 | 5 |
| ssp370 | 10 | 0.07 | 10 |
| ssp370 | 10 | 0.10 | 10 |
| ssp370 | 10 | 0.13 | 10 |
| ssp370 | 15 | 0.07 | 15 |
| ssp370 | 15 | 0.10 | 15 |
| ssp370 | 15 | 0.13 | 15 |
| ssp370 | 25 | 0.07 | 25 |
| ssp370 | 25 | 0.10 | 25 |
| ssp370 | 25 | 0.13 | 25 |
list.files(file.path(BASE_DIR, "unlimted_target_en"), "synthetic", full.names = TRUE) %>%
lapply(function(x){
exp <- ifelse(grepl(pattern = "ssp245", basename(x)), "ssp245", "ssp370")
train_en <- as.integer(gsub(pattern = "_.*", x = basename(x), replacement = ""))
read.csv(x, stringsAsFactors = FALSE) %>%
select(year, value, variable, stitching_id, tol) %>%
mutate(experiment = exp) %>%
mutate(ensemble = train_en) ->
data
return(data)
}) %>%
bind_rows() ->
data
data %>%
group_by(experiment, n = ensemble, tol) %>%
summarise(sn = n_distinct(stitching_id)) %>%
knitr::kable()
## `summarise()` has grouped output by 'experiment', 'n'. You can override using the `.groups` argument.
| experiment | n | tol | sn |
|---|---|---|---|
| ssp245 | 5 | 0.07 | 5 |
| ssp245 | 5 | 0.10 | 5 |
| ssp245 | 5 | 0.13 | 5 |
| ssp245 | 10 | 0.07 | 10 |
| ssp245 | 10 | 0.10 | 10 |
| ssp245 | 10 | 0.13 | 10 |
| ssp245 | 15 | 0.07 | 15 |
| ssp245 | 15 | 0.10 | 15 |
| ssp245 | 15 | 0.13 | 15 |
| ssp245 | 25 | 0.07 | 25 |
| ssp245 | 25 | 0.10 | 25 |
| ssp245 | 25 | 0.13 | 25 |
| ssp370 | 5 | 0.07 | 5 |
| ssp370 | 5 | 0.10 | 5 |
| ssp370 | 5 | 0.13 | 5 |
| ssp370 | 10 | 0.07 | 10 |
| ssp370 | 10 | 0.10 | 10 |
| ssp370 | 10 | 0.13 | 10 |
| ssp370 | 15 | 0.07 | 15 |
| ssp370 | 15 | 0.10 | 15 |
| ssp370 | 15 | 0.13 | 15 |
| ssp370 | 25 | 0.07 | 26 |
| ssp370 | 25 | 0.10 | 25 |
| ssp370 | 25 | 0.13 | 25 |
list.files(file.path(BASE_DIR, "in_sample"), "synthetic", full.names = TRUE) %>%
lapply(function(x){
exp <- ifelse(grepl(pattern = "ssp245", basename(x)), "ssp245", "ssp370")
train_en <- as.integer(gsub(pattern = "_.*", x = basename(x), replacement = ""))
read.csv(x, stringsAsFactors = FALSE) %>%
select(year, value, variable, stitching_id, tol) %>%
mutate(experiment = exp) %>%
mutate(ensemble = train_en) %>%
mutate(tol_en = paste0(tol, " (", train_en, ")"))->
data
return(data)
}) %>%
bind_rows() ->
data
data %>%
filter(experiment == "ssp245") %>%
filter(year %in% 1900:2100) %>%
mutate(tol = as.character(tol)) %>%
ggplot() +
geom_line(aes(year, value, color = tol, groupby = stitching_id), alpha = 0.5) +
facet_grid(tol~ensemble) +
theme_bw() +
labs(title = "Ensemble size in vs. Tol",
y = "Deg C")
## Warning: Ignoring unknown aesthetics: groupby
data %>%
filter(experiment == "ssp245") %>%
filter(year %in% 1900:1905) %>%
mutate(tol = as.character(tol)) %>%
ggplot() +
geom_line(aes(year, value, color = tol, groupby = stitching_id), alpha = 0.5) +
facet_grid(~ensemble) +
theme_bw()+
labs(title = "Ensemble size in vs. Tol 1900 - 1905",
y = "Deg C")
## Warning: Ignoring unknown aesthetics: groupby