library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(tidyr)

Read data

df_temp <- read_csv("~/data/n2o_Yunke/final_obs_dataset/obs_warming_dataset.csv") |> 
  
  # give it a unique ID - missing in the dataset. Also missing: an experiment name.
  mutate(id = 1:n()) |> 
  
  mutate(logrr = log(n2o_elv / n2o_amb)) |>
  mutate(logrr_norm = logrr / dT) |> 

  # log-transform - XXX not necessary, the errors should be normally distributed, not necessarily the predictors.
  mutate(PPFD_total_a = log(PPFD_total),
         vpd_a = log(vpd),
         orgc_a = log(ORGC),
         ndep_a = log(ndep)
         )
## Rows: 83 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (5): original_ref, location, pft, species, other_treatment
## dbl (18): lon, lat, n2o_amb, n2o_elv, dT, logr, Nfer_kgha, days_of_duration,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Explore data

Normalising the log response reduced the variation in the log response.

df_temp |> 
  pivot_longer(cols = c(logrr, logrr_norm), names_to = "var", values_to = "logrr") |> 
  ggplot(aes(x = var, y = logrr)) +
  geom_boxplot()

Issue:

df_temp |> 
  select(id, Nfer_kgha, other_treatment) |> 
  knitr::kable()
id Nfer_kgha other_treatment
1 0.0 elevated precipitation
2 70.0 elevated precipitation and N addition
3 70.0 N addition
4 0.0 NA
5 0.0 NA
6 0.0 N addition
7 5.0 CO 2 enrichment
8 5.0 without plant
9 110.0 NA
10 0.0 NA
11 0.0 NA
12 0.0 NA
13 0.0 NA
14 0.0 NA
15 64.0 N addition
16 0.0 without plant
17 0.0 NA
18 0.0 NA
19 70.0 biochar addition
20 70.0 NA
21 170.0 NA
22 100.0 NA
23 8.0 NA
24 0.0 NA
25 12.5 NA
26 12.5 drought
27 0.0 NA
28 12.5 CO 2 enrichment
29 0.0 CO 2 enrichment
30 0.0 CO 2 enrichment and drought
31 0.0 drought
32 90.0 CO 2 enrichment
33 90.0 CO 2 enrichment
34 90.0 NA
35 90.0 NA
36 0.0 drained
37 0.0 NA
38 0.0 NA
39 0.0 drained
40 0.0 undrained
41 0.0 undrained
42 0.0 NA
43 0.0 NA
44 0.0 NA
45 0.0 NA
46 0.0 NA
47 0.0 NA
48 40.0 N addition
49 0.0 NA
50 8.0 N addition
51 0.0 befor grazing
52 0.0 grazing
53 0.0 growing season, Grazing
54 0.0 No grazing
55 0.0 no growing season, grazing
56 0.0 growing season, No grazing
57 0.0 grazing and no fertilization
58 40.0 grazing and N addition
59 0.0 no grazing and no fertilization
60 0.0 no growing season,No grazing
61 40.0 no grazing N addition
62 0.0 NA
63 0.0 drying
64 0.0 NA
65 0.0 CO 2 enrichment
66 120.0 high irrigation
67 0.0 NA
68 315.0 N addition
69 120.0 NA
70 285.0 No tillage
71 285.0 Convential
tillage
72 0.0 rainfall redution
73 0.0 rainfall redution
74 0.0 NA
75 400.0 N addition
76 0.0 NA
77 0.0 NA
78 0.0 Richmond D
79 0.0 CO 2
enrichment, Richmond D
80 0.0 CO 2

enrichment, Armidale C | | 81| 0.0|CO 2 enrichment, Armidale R | | 82| 0.0|Armidale R | | 83| 0.0|NA |

We could be conservative, using only data from experiments with no other simultaneous treatment (assuming that this is the case when other_treatment == NA). This yields 36 experiments (out of 83 originally).

df_temp_sub <- df_temp |> 
  filter(is.na(other_treatment))

df_temp |> dim()
## [1] 83 30
df_temp_sub |> dim()
## [1] 36 30

This further reduces the variation.

ggplot() +
  geom_boxplot(aes(x = 1, y = logrr), data = df_temp) +
  geom_boxplot(aes(x = 2, y = logrr_norm), data = df_temp) +
  geom_boxplot(aes(x = 3, y = logrr_norm), data = df_temp_sub)

Issue:

Stepwise regression

Untransformed predictors

Let’s go ahead with all experiments and all treatements within experiments considered equally and using the normalised log response.

# following example: https://www.statology.org/stepwise-regression-r/ 
# subset, using un-transformed predictors
df <- df_temp |> 
  select(logrr_norm, min_fapar, max_fapar, PPFD_total, vpd, ORGC, ndep) |>   # xxx Nfer doesn't exist in that data
  drop_na()

# define intercept-only model
intercept_only <- lm(logrr_norm ~ 1, data = df)

#define model with all predictors
all <- lm(logrr_norm ~ ., data = df)

#perform forward stepwise regression
forward <- step(intercept_only, direction = 'forward', scope = formula(all), trace = 0)

#view results of forward stepwise regression
forward$anova
##   Step Df Deviance Resid. Df Resid. Dev       AIC
## 1      NA       NA        64   20.01275 -74.57116
#view final model
forward$coefficients
## (Intercept) 
## -0.04563139

No predictor selected.

Transformed predictors

# subset, using un-transformed predictors
df <- df_temp |> 
  select(logrr_norm, min_fapar, max_fapar, PPFD_total_a, vpd_a, orgc_a, ndep_a) |>  # xxx Nfer doesn't exist in that data
  drop_na()

# define intercept-only model
intercept_only <- lm(logrr_norm ~ 1, data = df)

#define model with all predictors
all <- lm(logrr_norm ~ ., data = df)

#perform forward stepwise regression
forward <- step(intercept_only, direction = 'forward', scope = formula(all), trace = 0)

#view results of forward stepwise regression
forward$anova
##       Step Df  Deviance Resid. Df Resid. Dev       AIC
## 1          NA        NA        64   20.01275 -74.57116
## 2 + orgc_a -1 0.8807562        63   19.13199 -75.49665
#view final model
forward$coefficients
## (Intercept)      orgc_a 
##  -0.2875957   0.1161154

Organic C selected!

Predict

For spatial upscaling, use a map of SOC and use predict(). Here, the logarithm of the organic matter content (variable ORG is, say, 1.5)

newdata <- data.frame(orgc_a = 1.5)

logrr_predicted <- predict(forward, newdata = newdata)

In this case, the log-response ratio is -0.11. Convert this to a response ratio:

exp(logrr_predicted)
##         1 
## 0.8927733

Yes, that’s a decline in the N2O emissions for this particular choice SOC content.

For spatial uscaling, you apply the predict() function as above to the SOC content for each pixel.