Objective
How does changing the chunk size effect the matching results? Does chunking, matching, and stitching data based on periods of 5 or 9 years matter?
Set Up
library(dplyr)
library(tidyr)
library(ggplot2)
library(knitr)
library(kableExtra)
# Note this will need to changed to run on your local machine
BASE_DIR <- "/Users/dorh012/Documents/2021/stitches/notebooks/stitches_dev"
# Load the functions that we will use!
source(file.path(BASE_DIR, "nearest_neighbor_matching.R"))
source(file.path(BASE_DIR, "stitching_functions.R"))
# Import the archive and target data from the two different periods.
archive_data_5 <- read.csv(file.path(BASE_DIR, "inputs", "archive_data_5yrs.csv"), stringsAsFactors = FALSE) %>%
dplyr::filter(!experiment %in% c("ssp534-over", "ssp245"))
target_data_5 <- read.csv(file.path(BASE_DIR, "inputs", "target_data_5yrs.csv"), stringsAsFactors = FALSE)
archive_data_9 <- read.csv(file.path(BASE_DIR, "inputs", "archive_data_9yrs.csv"), stringsAsFactors = FALSE) %>%
dplyr::filter(!experiment %in% c("ssp534-over", "ssp245"))
target_data_9 <- read.csv(file.path(BASE_DIR, "inputs", "target_data_9yrs.csv"), stringsAsFactors = FALSE)
# Import the data to be stiched together.
tgav_data <- read.csv("inputs/main_raw_pasted_tgav_anomaly_all_pangeo_list_models.csv",
stringsAsFactors = FALSE) %>% dplyr::select(-X)
#read.csv(file.path(BASE_DIR, "inputs", "tgav_data.csv"), stringsAsFactors = FALSE)
Compare the inputs
What is the difference between the fx and dx relationship for the two different chunks?
ggplot() +
geom_point(data = target_data_5, aes(fx, dx, color = "5 yr chunks")) +
geom_point(data = target_data_9, aes(fx, dx, color = "9 yr chunks")) +
labs(title = "Target Data", subtitle = "comparing 5 and 9 yr chunks",
y = "dx (rate of change per chunk)", x = "fx (median value per chunk)") +
theme_bw()

ggplot() +
geom_point(data = archive_data_5, aes(fx, dx, color = "5 yr chunks")) +
geom_point(data = archive_data_9, aes(fx, dx, color = "9 yr chunks")) +
labs(title = "Archive Data", subtitle = "comparing 5 and 9 yr chunks",
y = "dx (rate of change per chunk)", x = "fx (median value per chunk)") +
theme_bw()

Match archive and target data
# Match the target and archive data with one another.
match_5 <- match_nearest_neighbor(target_data = target_data_5, archive_data = archive_data_5)
match_9 <- match_nearest_neighbor(target_data = target_data_9, archive_data = archive_data_9)
match_5$chunk <- "5"
match_9$chunk <- "9"
# Join the data frames together
d <- dplyr::bind_rows(match_5, match_9)
# Plot
ggplot() +
# Add the data from the 5 yr
geom_point(data = d, aes(archive_fx, archive_dx,
color = "matched archive data")) +
geom_point(data = d, aes(target_fx, target_dx,
color = "target data"), alpha = 0.4) +
geom_segment(data = d, aes(x = target_fx, y = target_dx,
xend = archive_fx, yend = archive_dx), alpha = 0.4) +
# Some asthetics
scale_color_manual(values = c("matched archive data" = "red",
"target data" = "blue")) +
facet_wrap("chunk") +
theme_bw() +
labs(y = "dx (rate of change per chunk)",
x = "fx (value of median time per chunk)",
title = "Compare target with matched archive values",
subtitle = "comparing 5 and 9 yr chunks" )

pivot_longer(d, cols = c("dist_dx", "dist_fx", "dist_l2"), names_to = "distance", values_to = "value") %>%
ggplot() +
geom_dotplot(aes(value, fill = chunk), alpha = 0.5) +
theme_bw() +
facet_wrap("distance") +
labs(title = "Compare Match Error", x = "distance")

Match Data

LS0tCnRpdGxlOiAnNSB2IDkgQ2h1bmsgU2l6ZScKZGF0ZTogImByIGZvcm1hdChTeXMudGltZSgpLCAnJWQgJUIsICVZJylgIgpvdXRwdXQ6IAogIGh0bWxfbm90ZWJvb2s6IAogICAgdG9jOiB0cnVlCiAgICB0b2NfZmxvYXQ6CiAgICAgIHRvY19jb2xsYXBzZWQ6IHRydWUKICAgIHRvY19kZXB0aDogNAogICAgbnVtYmVyX3NlY3Rpb25zOiB0cnVlCiAgICB0aGVtZTogbHVtZW4KLS0tCgojIE9iamVjdGl2ZQoKCkhvdyBkb2VzIGNoYW5naW5nIHRoZSBjaHVuayBzaXplIGVmZmVjdCB0aGUgbWF0Y2hpbmcgcmVzdWx0cz8gRG9lcyBjaHVua2luZywgbWF0Y2hpbmcsIGFuZCBzdGl0Y2hpbmcgZGF0YSBiYXNlZCBvbiBwZXJpb2RzIG9mIDUgb3IgOSB5ZWFycyBtYXR0ZXI/IAoKCiMgU2V0IFVwIAoKYGBge3IsIG1lc3NhZ2UgPSBGQUxTRSwgd2FybmluZyA9IEZBTFNFfQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHRpZHlyKQpsaWJyYXJ5KGdncGxvdDIpCmxpYnJhcnkoa25pdHIpCmxpYnJhcnkoa2FibGVFeHRyYSkKCiMgTm90ZSB0aGlzIHdpbGwgbmVlZCB0byBjaGFuZ2VkIHRvIHJ1biBvbiB5b3VyIGxvY2FsIG1hY2hpbmUgCkJBU0VfRElSIDwtICIvVXNlcnMvZG9yaDAxMi9Eb2N1bWVudHMvMjAyMS9zdGl0Y2hlcy9ub3RlYm9va3Mvc3RpdGNoZXNfZGV2IgoKIyBMb2FkIHRoZSBmdW5jdGlvbnMgdGhhdCB3ZSB3aWxsIHVzZSEgCnNvdXJjZShmaWxlLnBhdGgoQkFTRV9ESVIsICJuZWFyZXN0X25laWdoYm9yX21hdGNoaW5nLlIiKSkKc291cmNlKGZpbGUucGF0aChCQVNFX0RJUiwgInN0aXRjaGluZ19mdW5jdGlvbnMuUiIpKQoKIyBJbXBvcnQgdGhlIGFyY2hpdmUgYW5kIHRhcmdldCBkYXRhIGZyb20gdGhlIHR3byBkaWZmZXJlbnQgcGVyaW9kcy4gCmFyY2hpdmVfZGF0YV81IDwtIHJlYWQuY3N2KGZpbGUucGF0aChCQVNFX0RJUiwgImlucHV0cyIsICJhcmNoaXZlX2RhdGFfNXlycy5jc3YiKSwgc3RyaW5nc0FzRmFjdG9ycyA9IEZBTFNFKSAlPiUgCiAgZHBseXI6OmZpbHRlcighZXhwZXJpbWVudCAlaW4lIGMoInNzcDUzNC1vdmVyIiwgInNzcDI0NSIpKQp0YXJnZXRfZGF0YV81IDwtIHJlYWQuY3N2KGZpbGUucGF0aChCQVNFX0RJUiwgImlucHV0cyIsICJ0YXJnZXRfZGF0YV81eXJzLmNzdiIpLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCgphcmNoaXZlX2RhdGFfOSA8LSByZWFkLmNzdihmaWxlLnBhdGgoQkFTRV9ESVIsICJpbnB1dHMiLCAiYXJjaGl2ZV9kYXRhXzl5cnMuY3N2IiksIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkgICU+JSAKICBkcGx5cjo6ZmlsdGVyKCFleHBlcmltZW50ICVpbiUgYygic3NwNTM0LW92ZXIiLCAic3NwMjQ1IikpCnRhcmdldF9kYXRhXzkgPC0gcmVhZC5jc3YoZmlsZS5wYXRoKEJBU0VfRElSLCAiaW5wdXRzIiwgInRhcmdldF9kYXRhXzl5cnMuY3N2IiksIHN0cmluZ3NBc0ZhY3RvcnMgPSBGQUxTRSkKCiMgSW1wb3J0IHRoZSBkYXRhIHRvIGJlIHN0aWNoZWQgdG9nZXRoZXIuIAp0Z2F2X2RhdGEgPC0gcmVhZC5jc3YoImlucHV0cy9tYWluX3Jhd19wYXN0ZWRfdGdhdl9hbm9tYWx5X2FsbF9wYW5nZW9fbGlzdF9tb2RlbHMuY3N2IiwgCiAgICAgICAgICAgICAgICAgICAgICBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpICU+JSBkcGx5cjo6c2VsZWN0KC1YKQogIAogICNyZWFkLmNzdihmaWxlLnBhdGgoQkFTRV9ESVIsICJpbnB1dHMiLCAidGdhdl9kYXRhLmNzdiIpLCBzdHJpbmdzQXNGYWN0b3JzID0gRkFMU0UpCmBgYAoKIyBDb21wYXJlIHRoZSBpbnB1dHMgCgpXaGF0IGlzIHRoZSBkaWZmZXJlbmNlIGJldHdlZW4gdGhlIGZ4IGFuZCBkeCByZWxhdGlvbnNoaXAgZm9yIHRoZSB0d28gZGlmZmVyZW50IGNodW5rcz8gCgpgYGB7cn0KZ2dwbG90KCkgKyAKICBnZW9tX3BvaW50KGRhdGEgPSB0YXJnZXRfZGF0YV81LCBhZXMoZngsIGR4LCBjb2xvciA9ICI1IHlyIGNodW5rcyIpKSArIAogIGdlb21fcG9pbnQoZGF0YSA9IHRhcmdldF9kYXRhXzksIGFlcyhmeCwgZHgsIGNvbG9yID0gIjkgeXIgY2h1bmtzIikpICsgCiAgbGFicyh0aXRsZSA9ICJUYXJnZXQgRGF0YSIsIHN1YnRpdGxlID0gImNvbXBhcmluZyA1IGFuZCA5IHlyIGNodW5rcyIsIAogICAgICAgeSA9ICJkeCAocmF0ZSBvZiBjaGFuZ2UgcGVyIGNodW5rKSIsIHggPSAiZnggKG1lZGlhbiB2YWx1ZSBwZXIgY2h1bmspIikgKyAKICB0aGVtZV9idygpCmBgYAoKCmBgYHtyfQpnZ3Bsb3QoKSArIAogIGdlb21fcG9pbnQoZGF0YSA9IGFyY2hpdmVfZGF0YV81LCBhZXMoZngsIGR4LCBjb2xvciA9ICI1IHlyIGNodW5rcyIpKSArIAogIGdlb21fcG9pbnQoZGF0YSA9IGFyY2hpdmVfZGF0YV85LCBhZXMoZngsIGR4LCBjb2xvciA9ICI5IHlyIGNodW5rcyIpKSArIAogIGxhYnModGl0bGUgPSAiQXJjaGl2ZSBEYXRhIiwgc3VidGl0bGUgPSAiY29tcGFyaW5nIDUgYW5kIDkgeXIgY2h1bmtzIiwgCiAgICAgICB5ID0gImR4IChyYXRlIG9mIGNoYW5nZSBwZXIgY2h1bmspIiwgeCA9ICJmeCAobWVkaWFuIHZhbHVlIHBlciBjaHVuaykiKSArIAogIHRoZW1lX2J3KCkKYGBgCgoKIyMgTWF0Y2ggYXJjaGl2ZSBhbmQgdGFyZ2V0IGRhdGEgCgoKYGBge3J9CiMgTWF0Y2ggdGhlIHRhcmdldCBhbmQgYXJjaGl2ZSBkYXRhIHdpdGggb25lIGFub3RoZXIuIAptYXRjaF81IDwtIG1hdGNoX25lYXJlc3RfbmVpZ2hib3IodGFyZ2V0X2RhdGEgPSB0YXJnZXRfZGF0YV81LCBhcmNoaXZlX2RhdGEgPSBhcmNoaXZlX2RhdGFfNSkKbWF0Y2hfOSA8LSBtYXRjaF9uZWFyZXN0X25laWdoYm9yKHRhcmdldF9kYXRhID0gdGFyZ2V0X2RhdGFfOSwgYXJjaGl2ZV9kYXRhID0gYXJjaGl2ZV9kYXRhXzkpCgptYXRjaF81JGNodW5rIDwtICI1IgptYXRjaF85JGNodW5rIDwtICI5IgpgYGAKCgpgYGB7cn0KIyBKb2luIHRoZSBkYXRhIGZyYW1lcyB0b2dldGhlciAKZCA8LSBkcGx5cjo6YmluZF9yb3dzKG1hdGNoXzUsIG1hdGNoXzkpCgojIFBsb3QgCmdncGxvdCgpICsgCiAgIyBBZGQgdGhlIGRhdGEgZnJvbSB0aGUgNSB5ciAKICBnZW9tX3BvaW50KGRhdGEgPSBkLCBhZXMoYXJjaGl2ZV9meCwgYXJjaGl2ZV9keCwgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xvciA9ICJtYXRjaGVkIGFyY2hpdmUgZGF0YSIpKSArIAogIGdlb21fcG9pbnQoZGF0YSA9IGQsIGFlcyh0YXJnZXRfZngsIHRhcmdldF9keCwgIAogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2xvciA9ICJ0YXJnZXQgZGF0YSIpLCBhbHBoYSA9IDAuNCkgKyAKICBnZW9tX3NlZ21lbnQoZGF0YSA9IGQsIGFlcyh4ID0gdGFyZ2V0X2Z4LCB5ID0gdGFyZ2V0X2R4LCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICB4ZW5kID0gYXJjaGl2ZV9meCwgeWVuZCA9IGFyY2hpdmVfZHgpLCBhbHBoYSA9IDAuNCkgKwogIAogICMgU29tZSBhc3RoZXRpY3MgCiAgc2NhbGVfY29sb3JfbWFudWFsKHZhbHVlcyA9IGMoIm1hdGNoZWQgYXJjaGl2ZSBkYXRhIiA9ICJyZWQiLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICJ0YXJnZXQgZGF0YSIgPSAiYmx1ZSIpKSArCiAgZmFjZXRfd3JhcCgiY2h1bmsiKSArCiAgdGhlbWVfYncoKSArIAogIGxhYnMoeSA9ICJkeCAocmF0ZSBvZiBjaGFuZ2UgcGVyIGNodW5rKSIsIAogICAgICAgeCA9ICJmeCAodmFsdWUgb2YgbWVkaWFuIHRpbWUgcGVyIGNodW5rKSIsIAogICAgICAgdGl0bGUgPSAiQ29tcGFyZSB0YXJnZXQgd2l0aCBtYXRjaGVkIGFyY2hpdmUgdmFsdWVzIiwgCiAgICAgICBzdWJ0aXRsZSA9ICJjb21wYXJpbmcgNSBhbmQgOSB5ciBjaHVua3MiICkKYGBgCgpgYGB7cn0KcGl2b3RfbG9uZ2VyKGQsIGNvbHMgPSBjKCJkaXN0X2R4IiwgImRpc3RfZngiLCAiZGlzdF9sMiIpLCBuYW1lc190byA9ICJkaXN0YW5jZSIsIHZhbHVlc190byA9ICJ2YWx1ZSIpICU+JSAKICBnZ3Bsb3QoKSArIAogIGdlb21fZG90cGxvdChhZXModmFsdWUsIGZpbGwgPSBjaHVuayksIGFscGhhID0gMC41KSArIAogIHRoZW1lX2J3KCkgKyAKICBmYWNldF93cmFwKCJkaXN0YW5jZSIpICsgCiAgbGFicyh0aXRsZSA9ICJDb21wYXJlIE1hdGNoIEVycm9yIiwgeCA9ICJkaXN0YW5jZSIpCmBgYAoKIyBNYXRjaCBEYXRhIAoKYGBge3J9CgptYXRjaF9uZWFyZXN0X25laWdoYm9yKHRhcmdldF9kYXRhID0gdGFyZ2V0X2RhdGFfNSwgYXJjaGl2ZV9kYXRhID0gYXJjaGl2ZV9kYXRhXzUpICU+JSAKICBzdGl0Y2hfZ2xvYmFsX21lYW4obWF0Y2ggPSAuLCBkYXRhID0gdGdhdl9kYXRhKSAlPiUgCiAgbXV0YXRlKGNodW5rID0gIjUiKSAtPiAKICBvdXQ1CgptYXRjaF9uZWFyZXN0X25laWdoYm9yKHRhcmdldF9kYXRhID0gdGFyZ2V0X2RhdGFfOSwgYXJjaGl2ZV9kYXRhID0gYXJjaGl2ZV9kYXRhXzkpICU+JSAKICBzdGl0Y2hfZ2xvYmFsX21lYW4obWF0Y2ggPSAuLCBkYXRhID0gdGdhdl9kYXRhKSAlPiUgCiAgbXV0YXRlKGNodW5rID0gIjkiKSAtPiAKICBvdXQ5CgoKdGdhdl9kYXRhICU+JSAgCiAgZHBseXI6OmZpbHRlcihleHBlcmltZW50ID09ICJzc3AyNDUiKSAlPiUgIAogIGRwbHlyOjpmaWx0ZXIobW9kZWwgPT0gIkNhbkVTTTUiKSAlPiUgIAogIGRwbHlyOjpmaWx0ZXIoZW5zZW1ibGUgPT0gInIxaTFwMWYxIikgJT4lICAKICBkcGx5cjo6ZmlsdGVyKHllYXIgPj0gMjAwMCkgLT4KICBvcmlnaW5hbF9kYXRhCgpkcGx5cjo6YmluZF9yb3dzKG91dDUsIG91dDkpICU+JSAKICBkcGx5cjo6ZmlsdGVyKHllYXIgPj0gMjAwMCkgJT4lIAogIGdncGxvdChhZXMoeWVhciwgdmFsdWUsIGNvbG9yID0gY2h1bmspKSArIAogIGdlb21fbGluZShkYXRhID0gb3JpZ2luYWxfZGF0YSwgYWVzKHllYXIsIHZhbHVlLCBjb2xvciA9ICJvcmlnaW5hbCBkYXRhIikpICsgCiAgZ2VvbV9saW5lKCkgKyAKICB0aGVtZV9idygpICsKICBsYWJzKHRpdGxlID0gIlN0aXRjaGVkIERhdGEiLCBzdWJ0aXRsZSA9ICJmdXR1cmUgc2NlbmFyaW8gb25seSIsIHkgPSAiRGVnIEMiLCB4ID0gIlllYXIiKSArIAogIHNjYWxlX2NvbG9yX21hbnVhbCh2YWx1ZXMgPSBjKCJvcmlnaW5hbCBkYXRhIiA9ICJncmV5IiwgIjUiID0gInJlZCIsICI5IiA9ICJibHVlIikpCgpgYGAKCgoKCgo=