library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(tidyr)
df_temp <- read_csv("~/data/n2o_Yunke/final_obs_dataset/obs_warming_dataset.csv") |>
# give it a unique ID - missing in the dataset. Also missing: an experiment name.
mutate(id = 1:n()) |>
mutate(logrr = log(n2o_elv / n2o_amb)) |>
mutate(logrr_norm = logrr / dT) |>
# log-transform - XXX not necessary, the errors should be normally distributed, not necessarily the predictors.
mutate(PPFD_total_a = log(PPFD_total),
vpd_a = log(vpd),
orgc_a = log(ORGC),
ndep_a = log(ndep)
)
## Rows: 83 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): original_ref, location, pft, species, other_treatment
## dbl (18): lon, lat, n2o_amb, n2o_elv, dT, logr, Nfer_kgha, days_of_duration,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Normalising the log response reduced the variation in the log response.
df_temp |>
pivot_longer(cols = c(logrr, logrr_norm), names_to = "var", values_to = "logrr") |>
ggplot(aes(x = var, y = logrr)) +
geom_boxplot()
Issue:
df_temp |>
select(id, Nfer_kgha, other_treatment) |>
knitr::kable()
id | Nfer_kgha | other_treatment |
---|---|---|
1 | 0.0 | elevated precipitation |
2 | 70.0 | elevated precipitation and N addition |
3 | 70.0 | N addition |
4 | 0.0 | NA |
5 | 0.0 | NA |
6 | 0.0 | N addition |
7 | 5.0 | CO 2 enrichment |
8 | 5.0 | without plant |
9 | 110.0 | NA |
10 | 0.0 | NA |
11 | 0.0 | NA |
12 | 0.0 | NA |
13 | 0.0 | NA |
14 | 0.0 | NA |
15 | 64.0 | N addition |
16 | 0.0 | without plant |
17 | 0.0 | NA |
18 | 0.0 | NA |
19 | 70.0 | biochar addition |
20 | 70.0 | NA |
21 | 170.0 | NA |
22 | 100.0 | NA |
23 | 8.0 | NA |
24 | 0.0 | NA |
25 | 12.5 | NA |
26 | 12.5 | drought |
27 | 0.0 | NA |
28 | 12.5 | CO 2 enrichment |
29 | 0.0 | CO 2 enrichment |
30 | 0.0 | CO 2 enrichment and drought |
31 | 0.0 | drought |
32 | 90.0 | CO 2 enrichment |
33 | 90.0 | CO 2 enrichment |
34 | 90.0 | NA |
35 | 90.0 | NA |
36 | 0.0 | drained |
37 | 0.0 | NA |
38 | 0.0 | NA |
39 | 0.0 | drained |
40 | 0.0 | undrained |
41 | 0.0 | undrained |
42 | 0.0 | NA |
43 | 0.0 | NA |
44 | 0.0 | NA |
45 | 0.0 | NA |
46 | 0.0 | NA |
47 | 0.0 | NA |
48 | 40.0 | N addition |
49 | 0.0 | NA |
50 | 8.0 | N addition |
51 | 0.0 | befor grazing |
52 | 0.0 | grazing |
53 | 0.0 | growing season, Grazing |
54 | 0.0 | No grazing |
55 | 0.0 | no growing season, grazing |
56 | 0.0 | growing season, No grazing |
57 | 0.0 | grazing and no fertilization |
58 | 40.0 | grazing and N addition |
59 | 0.0 | no grazing and no fertilization |
60 | 0.0 | no growing season,No grazing |
61 | 40.0 | no grazing N addition |
62 | 0.0 | NA |
63 | 0.0 | drying |
64 | 0.0 | NA |
65 | 0.0 | CO 2 enrichment |
66 | 120.0 | high irrigation |
67 | 0.0 | NA |
68 | 315.0 | N addition |
69 | 120.0 | NA |
70 | 285.0 | No tillage |
71 | 285.0 | Convential |
tillage | ||
72 | 0.0 | rainfall redution |
73 | 0.0 | rainfall redution |
74 | 0.0 | NA |
75 | 400.0 | N addition |
76 | 0.0 | NA |
77 | 0.0 | NA |
78 | 0.0 | Richmond D |
79 | 0.0 | CO 2 |
enrichment, Richmond D | ||
80 | 0.0 | CO 2 |
enrichment, Armidale C | | 81| 0.0|CO 2 enrichment, Armidale R | | 82| 0.0|Armidale R | | 83| 0.0|NA |
We could be conservative, using only data from experiments with no
other simultaneous treatment (assuming that this is the case when
other_treatment == NA
). This yields 36 experiments (out of
83 originally).
df_temp_sub <- df_temp |>
filter(is.na(other_treatment))
df_temp |> dim()
## [1] 83 30
df_temp_sub |> dim()
## [1] 36 30
This further reduces the variation.
ggplot() +
geom_boxplot(aes(x = 1, y = logrr), data = df_temp) +
geom_boxplot(aes(x = 2, y = logrr_norm), data = df_temp) +
geom_boxplot(aes(x = 3, y = logrr_norm), data = df_temp_sub)
Issue:
Let’s go ahead with all experiments and all treatements within experiments considered equally and using the normalised log response.
# following example: https://www.statology.org/stepwise-regression-r/
# subset, using un-transformed predictors
df <- df_temp |>
select(logrr_norm, min_fapar, max_fapar, PPFD_total, vpd, ORGC, ndep) |> # xxx Nfer doesn't exist in that data
drop_na()
# define intercept-only model
intercept_only <- lm(logrr_norm ~ 1, data = df)
#define model with all predictors
all <- lm(logrr_norm ~ ., data = df)
#perform forward stepwise regression
forward <- step(intercept_only, direction = 'forward', scope = formula(all), trace = 0)
#view results of forward stepwise regression
forward$anova
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 NA NA 64 20.01275 -74.57116
#view final model
forward$coefficients
## (Intercept)
## -0.04563139
No predictor selected.
# subset, using un-transformed predictors
df <- df_temp |>
select(logrr_norm, min_fapar, max_fapar, PPFD_total_a, vpd_a, orgc_a, ndep_a) |> # xxx Nfer doesn't exist in that data
drop_na()
# define intercept-only model
intercept_only <- lm(logrr_norm ~ 1, data = df)
#define model with all predictors
all <- lm(logrr_norm ~ ., data = df)
#perform forward stepwise regression
forward <- step(intercept_only, direction = 'forward', scope = formula(all), trace = 0)
#view results of forward stepwise regression
forward$anova
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 NA NA 64 20.01275 -74.57116
## 2 + orgc_a -1 0.8807562 63 19.13199 -75.49665
#view final model
forward$coefficients
## (Intercept) orgc_a
## -0.2875957 0.1161154
Organic C selected!
For spatial upscaling, use a map of SOC and use
predict()
. Here, the logarithm of the organic matter
content (variable ORG
is, say, 1.5)
newdata <- data.frame(orgc_a = 1.5)
logrr_predicted <- predict(forward, newdata = newdata)
In this case, the log-response ratio is -0.11. Convert this to a response ratio:
exp(logrr_predicted)
## 1
## 0.8927733
Yes, that’s a decline in the N2O emissions for this particular choice SOC content.
For spatial uscaling, you apply the predict()
function
as above to the SOC content for each pixel.