library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(readr)
library(tidyr)
df_temp <- read_csv("~/data/n2o_Yunke/final_obs_dataset/obs_warming_dataset.csv") |>
# give it a unique ID - missing in the dataset. Also missing: an experiment name.
mutate(id = 1:n()) |>
mutate(logrr = log(n2o_elv / n2o_amb)) |>
mutate(logrr_norm = logrr / dT) |>
# log-transform - XXX not necessary, the errors should be normally distributed, not necessarily the predictors.
mutate(PPFD_total_a = log(PPFD_total),
vpd_a = log(vpd),
orgc_a = log(ORGC),
ndep_a = log(ndep)
)
## Rows: 83 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): original_ref, location, pft, species, other_treatment
## dbl (18): lon, lat, n2o_amb, n2o_elv, dT, logr, Nfer_kgha, days_of_duration,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Normalising the log response reduced the variation in the log response.
df_temp |>
pivot_longer(cols = c(logrr, logrr_norm), names_to = "var", values_to = "logrr") |>
ggplot(aes(x = var, y = logrr)) +
geom_boxplot()
Issue:
df_temp |>
select(id, Nfer_kgha, other_treatment) |>
knitr::kable()
| id | Nfer_kgha | other_treatment |
|---|---|---|
| 1 | 0.0 | elevated precipitation |
| 2 | 70.0 | elevated precipitation and N addition |
| 3 | 70.0 | N addition |
| 4 | 0.0 | NA |
| 5 | 0.0 | NA |
| 6 | 0.0 | N addition |
| 7 | 5.0 | CO 2 enrichment |
| 8 | 5.0 | without plant |
| 9 | 110.0 | NA |
| 10 | 0.0 | NA |
| 11 | 0.0 | NA |
| 12 | 0.0 | NA |
| 13 | 0.0 | NA |
| 14 | 0.0 | NA |
| 15 | 64.0 | N addition |
| 16 | 0.0 | without plant |
| 17 | 0.0 | NA |
| 18 | 0.0 | NA |
| 19 | 70.0 | biochar addition |
| 20 | 70.0 | NA |
| 21 | 170.0 | NA |
| 22 | 100.0 | NA |
| 23 | 8.0 | NA |
| 24 | 0.0 | NA |
| 25 | 12.5 | NA |
| 26 | 12.5 | drought |
| 27 | 0.0 | NA |
| 28 | 12.5 | CO 2 enrichment |
| 29 | 0.0 | CO 2 enrichment |
| 30 | 0.0 | CO 2 enrichment and drought |
| 31 | 0.0 | drought |
| 32 | 90.0 | CO 2 enrichment |
| 33 | 90.0 | CO 2 enrichment |
| 34 | 90.0 | NA |
| 35 | 90.0 | NA |
| 36 | 0.0 | drained |
| 37 | 0.0 | NA |
| 38 | 0.0 | NA |
| 39 | 0.0 | drained |
| 40 | 0.0 | undrained |
| 41 | 0.0 | undrained |
| 42 | 0.0 | NA |
| 43 | 0.0 | NA |
| 44 | 0.0 | NA |
| 45 | 0.0 | NA |
| 46 | 0.0 | NA |
| 47 | 0.0 | NA |
| 48 | 40.0 | N addition |
| 49 | 0.0 | NA |
| 50 | 8.0 | N addition |
| 51 | 0.0 | befor grazing |
| 52 | 0.0 | grazing |
| 53 | 0.0 | growing season, Grazing |
| 54 | 0.0 | No grazing |
| 55 | 0.0 | no growing season, grazing |
| 56 | 0.0 | growing season, No grazing |
| 57 | 0.0 | grazing and no fertilization |
| 58 | 40.0 | grazing and N addition |
| 59 | 0.0 | no grazing and no fertilization |
| 60 | 0.0 | no growing season,No grazing |
| 61 | 40.0 | no grazing N addition |
| 62 | 0.0 | NA |
| 63 | 0.0 | drying |
| 64 | 0.0 | NA |
| 65 | 0.0 | CO 2 enrichment |
| 66 | 120.0 | high irrigation |
| 67 | 0.0 | NA |
| 68 | 315.0 | N addition |
| 69 | 120.0 | NA |
| 70 | 285.0 | No tillage |
| 71 | 285.0 | Convential |
| tillage | ||
| 72 | 0.0 | rainfall redution |
| 73 | 0.0 | rainfall redution |
| 74 | 0.0 | NA |
| 75 | 400.0 | N addition |
| 76 | 0.0 | NA |
| 77 | 0.0 | NA |
| 78 | 0.0 | Richmond D |
| 79 | 0.0 | CO 2 |
| enrichment, Richmond D | ||
| 80 | 0.0 | CO 2 |
enrichment, Armidale C | | 81| 0.0|CO 2 enrichment, Armidale R | | 82| 0.0|Armidale R | | 83| 0.0|NA |
We could be conservative, using only data from experiments with no
other simultaneous treatment (assuming that this is the case when
other_treatment == NA). This yields 36 experiments (out of
83 originally).
df_temp_sub <- df_temp |>
filter(is.na(other_treatment))
df_temp |> dim()
## [1] 83 30
df_temp_sub |> dim()
## [1] 36 30
This further reduces the variation.
ggplot() +
geom_boxplot(aes(x = 1, y = logrr), data = df_temp) +
geom_boxplot(aes(x = 2, y = logrr_norm), data = df_temp) +
geom_boxplot(aes(x = 3, y = logrr_norm), data = df_temp_sub)
Issue:
Let’s go ahead with all experiments and all treatements within experiments considered equally and using the normalised log response.
# following example: https://www.statology.org/stepwise-regression-r/
# subset, using un-transformed predictors
df <- df_temp |>
select(logrr_norm, min_fapar, max_fapar, PPFD_total, vpd, ORGC, ndep) |> # xxx Nfer doesn't exist in that data
drop_na()
# define intercept-only model
intercept_only <- lm(logrr_norm ~ 1, data = df)
#define model with all predictors
all <- lm(logrr_norm ~ ., data = df)
#perform forward stepwise regression
forward <- step(intercept_only, direction = 'forward', scope = formula(all), trace = 0)
#view results of forward stepwise regression
forward$anova
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 NA NA 64 20.01275 -74.57116
#view final model
forward$coefficients
## (Intercept)
## -0.04563139
No predictor selected.
# subset, using un-transformed predictors
df <- df_temp |>
select(logrr_norm, min_fapar, max_fapar, PPFD_total_a, vpd_a, orgc_a, ndep_a) |> # xxx Nfer doesn't exist in that data
drop_na()
# define intercept-only model
intercept_only <- lm(logrr_norm ~ 1, data = df)
#define model with all predictors
all <- lm(logrr_norm ~ ., data = df)
#perform forward stepwise regression
forward <- step(intercept_only, direction = 'forward', scope = formula(all), trace = 0)
#view results of forward stepwise regression
forward$anova
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 NA NA 64 20.01275 -74.57116
## 2 + orgc_a -1 0.8807562 63 19.13199 -75.49665
#view final model
forward$coefficients
## (Intercept) orgc_a
## -0.2875957 0.1161154
Organic C selected!
For spatial upscaling, use a map of SOC and use
predict(). Here, the logarithm of the organic matter
content (variable ORG is, say, 1.5)
newdata <- data.frame(orgc_a = 1.5)
logrr_predicted <- predict(forward, newdata = newdata)
In this case, the log-response ratio is -0.11. Convert this to a response ratio:
exp(logrr_predicted)
## 1
## 0.8927733
Yes, that’s a decline in the N2O emissions for this particular choice SOC content.
For spatial uscaling, you apply the predict() function
as above to the SOC content for each pixel.