When reading the files you sent me, some data is missing.
source | sland | beta_gpp | beta_reco | nrf |
---|---|---|---|---|
CABLE | 3.875213 | 0.8191375 | 0.8025959 | 8.7791036 |
CLASS_CTEM | 3.204068 | 0.4930046 | 0.4030876 | 0.9971417 |
CLM | 2.240896 | 0.3903936 | 0.3515676 | 2.7268429 |
DLEM | 2.288555 | 0.6974570 | 0.6394701 | 2.3818175 |
ISAM | 2.291047 | 0.6654760 | 0.6007053 | 0.5101950 |
JSBACH | 3.038597 | 0.6165090 | 0.5579764 | 6.3021586 |
JULES | 2.731302 | 0.6043650 | 0.5205092 | -0.6808847 |
LPJ_GUESS | 2.063041 | 0.5558467 | NA | NA |
LPJ | 1.883699 | 0.4430937 | 0.4283084 | NA |
LPX | 1.226619 | 0.4430766 | 0.3732446 | 17.1920030 |
ORCHIDEE_MICT | 2.938975 | 0.5963677 | 0.5206025 | 4.6774682 |
ORCHIDEE | 2.498581 | 0.4696402 | 0.4305576 | 8.3658094 |
VEGAS | NA | 0.2253959 | 0.1916185 | NA |
VISIT | 3.047290 | 0.5818867 | 0.4834557 | 4.5331730 |
GCPresidualSink | 2.549365 | NA | NA | NA |
Olot original Beta-GPP vs the sink. Note that this displays the 95% confidence interval to be somewhat larger than in ED Fig. 1.
df %>%
ggplot(aes(sland, beta_gpp)) +
geom_point() +
geom_smooth(method = "lm", color = "red", level = 0.95) +
theme_classic()
Fit a linear regression model. Note that the R-squared is 0.95, not 0.99 as in the paper.
linmod <- lm(sland ~ beta_gpp + beta_reco * nrf,
data = df %>%
filter(source != "GCPresidualSink")
)
summary(linmod)
##
## Call:
## lm(formula = sland ~ beta_gpp + beta_reco * nrf, data = df %>%
## filter(source != "GCPresidualSink"))
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.23264 -0.09567 0.01719 0.04165 0.32497
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.65371 0.50724 7.203 0.000362 ***
## beta_gpp 12.23562 3.31821 3.687 0.010240 *
## beta_reco -15.96950 3.56665 -4.477 0.004204 **
## nrf -0.49814 0.05727 -8.698 0.000128 ***
## beta_reco:nrf 1.05961 0.12854 8.243 0.000172 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1961 on 6 degrees of freedom
## (3 observations deleted due to missingness)
## Multiple R-squared: 0.9507, Adjusted R-squared: 0.9178
## F-statistic: 28.91 on 4 and 6 DF, p-value: 0.0004625
Apply variance normalization on the sink
cf <- coef(linmod)
df <- df %>%
mutate(betareco_nrf = beta_reco * nrf) %>%
mutate(b_beta_reco = cf["beta_reco"] * beta_reco,
c_betareco_nrf = cf["beta_reco:nrf"] * betareco_nrf )
b_beta_reco_mean <- mean(df$b_beta_reco, na.rm = TRUE)
c_betareco_nrf_mean <- mean(df$c_betareco_nrf, na.rm = TRUE)
df <- df %>%
mutate(sland_norm = sland - ((b_beta_reco + c_betareco_nrf) - (b_beta_reco_mean + c_betareco_nrf_mean)))
Plot normalised sink vs beta-gpp. Doesn’t look the way it should…
df %>%
ggplot(aes(sland_norm, beta_gpp)) +
geom_point() +
geom_smooth(method = "lm", color = "red", level = 0.95) +
theme_classic()
Should it also be normalised for the non-respired flux (nrf)?
cf <- coef(linmod)
df <- df %>%
mutate(d_nrf = cf["nrf"] * nrf )
d_nrf_mean <- mean(df$d_nrf, na.rm = TRUE)
df <- df %>%
mutate(sland_norm2 = sland - ((b_beta_reco + c_betareco_nrf + d_nrf) - (b_beta_reco_mean + c_betareco_nrf_mean + d_nrf_mean)))
Yes!
df %>%
ggplot(aes(sland_norm2, beta_gpp)) +
geom_point() +
geom_smooth(method = "lm", color = "red", level = 0.95) +
theme_classic()
That’s the one that gets the R-squared of 0.99:
yardstick::rsq(df, sland_norm2, beta_gpp) %>% pull(.estimate)
## [1] 0.9901483
Are the means of the original and normalised (red) sinks different? No!!!
df %>%
ggplot() +
geom_density(aes(sland, ..density..)) +
geom_density(aes(sland_norm2, ..density..), color = "red") +
geom_vline(xintercept = mean(df$sland_norm, na.rm = TRUE), size = 2) +
geom_vline(xintercept = mean(df$sland_norm2, na.rm = TRUE), color = "red") +
theme_classic()
## Warning: Removed 1 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing non-finite values (stat_density).
Now, perturb the distribution by removing a model (the first one: CABLE). Let’s make this a function.
normalize_sink <- function(df){
linmod <- lm(sland ~ beta_gpp + beta_reco * nrf,
data = df %>%
filter(source != "GCPresidualSink")
)
cf <- coef(linmod)
out <- df %>%
mutate(betareco_nrf = beta_reco * nrf) %>%
mutate(b_beta_reco = cf["beta_reco"] * beta_reco,
c_betareco_nrf = cf["beta_reco:nrf"] * betareco_nrf,
d_nrf = cf["nrf"] * nrf)
b_beta_reco_mean <- mean(out$b_beta_reco, na.rm = TRUE)
c_betareco_nrf_mean <- mean(out$c_betareco_nrf, na.rm = TRUE)
d_nrf_mean <- mean(out$d_nrf, na.rm = TRUE)
out <- out %>%
mutate(sland_norm2 = sland - ((b_beta_reco + c_betareco_nrf + d_nrf) - (b_beta_reco_mean + c_betareco_nrf_mean + d_nrf_mean)))
return(out)
}
df2 <- df %>%
slice(-1) %>% # perturb sample by removing one model
normalize_sink()
Plot both distributions and means: it does affect the mean (vertical lines).
ggplot() +
geom_density(aes(sland_norm2, ..density..), data = df) +
geom_density(aes(sland_norm2, ..density..), data = df2, color = "red") +
geom_vline(xintercept = mean(df$sland_norm2, na.rm = TRUE), lintetype = "dashed") +
geom_vline(xintercept = mean(df2$sland_norm2, na.rm = TRUE), color = "red", lintetype = "dashed") +
theme_classic()
## Warning: Ignoring unknown parameters: lintetype
## Warning: Ignoring unknown parameters: lintetype
## Warning: Removed 4 rows containing non-finite values (stat_density).
## Warning: Removed 4 rows containing non-finite values (stat_density).