# required packages
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(GGally)
library(plotly)

# The load the functions we will want to use, these are currently written in R and will be translated into python. 
source("nearest_neighbor_matching.R") # the function match_nearest_neighbor is defined here
source("stitching_functions.R")  # the function stitch_global_mean is definend here 

1 Objective

Take a look at different metrics we could use in the matching process, does lookin at the mean, range, sd, and such…

# Time series of the raw global mean temperature anomaly, this is the data that is 
# going to be stitched together. Historical data is pasted into every scenario. No
# smoothing has been done.
tgav_data <- read.csv("inputs/main_raw_pasted_tgav_anomaly_all_pangeo_list_models.csv", 
                      stringsAsFactors = FALSE) %>% dplyr::select(-X)


# A chunked smoothed tgav anomaly archive of data. This was tgav_data but several 
# steps were taken in python to transform it into the "chunked" data and save it 
# so that we do not have to repeate this process so many times. 
archive_data <- read.csv('inputs/archive_data.csv', stringsAsFactors = FALSE)


# This is chunked smooth tgav anomaly for a single model/experiment/ensemble
# member, saved for our convenience. If you decide that you want to work with 
# a different different target data subset you can subset it from the
# archive_data data frame. This file is SSP245 Realization 1.
target_data <- read.csv("inputs/target_data.csv", stringsAsFactors = FALSE)

2 Archive Space

Compare two variables with one another.

fx : median value dx : slope from a linear fit to the chunk data mean_x : the mean value std_x : standard deviation of the chunk range_x: difference between min and max value mse_x: mse of the residual error about the slope of the linear regression

cols <- which(names(archive_data) %in% c("fx", "dx", "mean_x", "std_x", "range_x", "mse_x"))

ggpairs(data = archive_data, columns = cols, aes(color = experiment, fill = experiment, alpha = 0.4)) + 
  theme_bw() + 
  labs(title = "CanESM5")

plot_ly(x=archive_data$fx, y=archive_data$dx, z=archive_data$mse_x, type="scatter3d",
        mode="markers", color = archive_data$experiment) %>% 
  layout(
    title = "3-D Matching Space",
    scene = list(
      xaxis = list(title = "fx"),
      yaxis = list(title = "dx"),
      zaxis = list(title = "mse")
    ))