Trying to fit an equation to specific quantiles of the data to get a boundary line.
Using the package quantreg
See http://cran.r-project.org/web/packages/quantreg/vignettes/rq.pdf
Manual: http://cran.r-project.org/web/packages/quantreg/quantreg.pdf
A paper on quantile regression from an ecology perspective: http://www.econ.uiuc.edu/~roger/research/rq/QReco.pdf
Analysis of crop yield boundary lines using quantile regression Makowski et al. (2007) Agronomy for Sustainable Development: http://dx.doi.org/10.1051/agro%3A2006029
install.packages("quantreg")
# Quantile regression
df <- read.csv("Gas_Exchange.csv")
# relationship between conductance temperature
# manually defined, wide x-range
# see plot at the bottom
plot(Cond ~ Tleaf,
data = df,
xlim = c(-10, 50))
# equation to use
# just a simple quadratic approach
# y ~ a * x^2 + b * x + c
my.equation <- Cond ~ a * Tleaf^2 + b * Tleaf + c
# fit the equation to the data via "non-linear least squares"
nls.fit <- nls(my.equation,
data = df,
start = list(a = 2, b = 3, c = 5))
# look at the result
summary(nls.fit)
##
## Formula: Cond ~ a * Tleaf^2 + b * Tleaf + c
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a -0.000899 0.000276 -3.26 0.0011 **
## b 0.039850 0.012547 3.18 0.0015 **
## c 0.060984 0.138156 0.44 0.6590
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.285 on 920 degrees of freedom
##
## Number of iterations to convergence: 1
## Achieved convergence tolerance: 2.05e-06
# create a dummy range of that we use to predict leaf temperature from our fitted model
predict_range <- data.frame(Tleaf = seq(-10, 50, length = 250))
# calculate for each x-range value the corresponding y-range
my.line <- within(predict_range, Cond <- predict(nls.fit, newdata = predict_range))
# add the line to the existing graph
# This line represents the "mean" fit, no quantile regression involved
lines(Cond ~ Tleaf, data = my.line, col = "red")
library(quantreg)
## Loading required package: SparseM
##
## Attaching package: 'SparseM'
##
## The following object is masked from 'package:base':
##
## backsolve
# Non-linear quantile regression
# aiming for the upper 95% quantile
my.rq <- nlrq(my.equation,
data = df,
start = list(a = 2, b = 2, c = 5),
tau = .95)
summary(my.rq)
##
## Call: nlrq(formula = my.equation, data = df, start = list(a = 2, b = 2,
## c = 5), tau = 0.95, control = structure(list(maxiter = 100,
## k = 2, InitialStepSize = 1, big = 1e+20, eps = 1e-07, beta = 0.97), .Names = c("maxiter",
## "k", "InitialStepSize", "big", "eps", "beta")), trace = FALSE)
##
## tau: [1] 0.95
##
## Coefficients:
## Value Std. Error t value Pr(>|t|)
## a -0.00319 0.00055 -5.78948 0.00000
## b 0.15240 0.02303 6.61869 0.00000
## c -0.66404 0.21573 -3.07813 0.00214
# calculating the values from the model
my.line95 <- within(predict_range,
Cond <- predict(my.rq,
newdata = predict_range))
lines(Cond ~ Tleaf, data = my.line95, col = "blue")
# re-doing the 99% quantile
# nlrq can be supplied with a list of "tau" values, to get multiple quantiles in one go.
# For now I'm doing one by one only...
my.rq.99 <- nlrq(my.equation,
data = df,
start = list(a = 2, b = 2, c = 5),
tau = .99)
summary(my.rq.99)
##
## Call: nlrq(formula = my.equation, data = df, start = list(a = 2, b = 2,
## c = 5), tau = 0.99, control = structure(list(maxiter = 100,
## k = 2, InitialStepSize = 1, big = 1e+20, eps = 1e-07, beta = 0.97), .Names = c("maxiter",
## "k", "InitialStepSize", "big", "eps", "beta")), trace = FALSE)
##
## tau: [1] 0.99
##
## Coefficients:
## Value Std. Error t value Pr(>|t|)
## a -0.00371 0.00061 -6.09065 0.00000
## b 0.17750 0.02470 7.18658 0.00000
## c -0.79848 0.23541 -3.39184 0.00072
# the usual calculation of the corresponding line
my.line99 <- within(predict_range,
Cond <- predict(my.rq.99,
newdata = predict_range))
lines(Cond ~ Tleaf, data = my.line99, col = "green")
# Add a legend to the graph
legend(0, 1.2,
c("Mean", "95%", "99%"),
lty = c(rep("solid", 3)),
col = c("red", "blue", "green"),
cex = 0.8) # make the characters slightly smaller than standard text
# import the data
df <- read.csv("FC.csv")
plot(rel.gs ~ X.FC,
data = df,
xlim = c(0, 100))
my.equation <- rel.gs ~ a/(1+exp(-(X.FC-c)/b))
my.start <- list(a = 0.5, b = 1, c = 50)
nls.fit <- nls(my.equation,
data = df,
start = my.start)
summary(nls.fit)
##
## Formula: rel.gs ~ a/(1 + exp(-(X.FC - c)/b))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 0.8321 0.0851 9.78 < 2e-16 ***
## b 18.7708 3.7912 4.95 1.6e-06 ***
## c 61.0794 6.2147 9.83 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.212 on 197 degrees of freedom
##
## Number of iterations to convergence: 8
## Achieved convergence tolerance: 1.82e-06
# create a dummy data frame that holds the predicted data form the model
predict_range <- data.frame(X.FC = seq(0, 100, length = 200))
my.line <- within(predict_range, rel.gs <- predict(nls.fit, newdata = predict_range))
# add the predicted line to the base plot
lines(rel.gs ~ X.FC, data = my.line, col = "blue")
# separate fits for AA and AM
nls.aneura <- nls(my.equation,
data = df[df$Species == "A. aneura",],
start = my.start)
summary(nls.aneura)
##
## Formula: rel.gs ~ a/(1 + exp(-(X.FC - c)/b))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 0.7985 0.0504 15.83 < 2e-16 ***
## b 12.9359 3.2876 3.93 0.00016 ***
## c 49.9436 4.5582 10.96 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.193 on 97 degrees of freedom
##
## Number of iterations to convergence: 10
## Achieved convergence tolerance: 5.3e-06
nls.melanoxylon <- nls(my.equation,
data = df[df$Species == "A. melanoxylon", ],
start = my.start,
algorithm = "port",
trace = FALSE,
nls.control(maxiter = 100000, warnOnly = TRUE,
minFactor = 0.000000000009))
## Warning: Convergence failure: function evaluation limit reached without
## convergence (9)
# Does not work well with these data, unfortunately
summary(nls.melanoxylon)
##
## Formula: rel.gs ~ a/(1 + exp(-(X.FC - c)/b))
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## a 395.3 184135.8 0.00 0.998
## b 37.0 19.1 1.94 0.055 .
## c 333.4 17403.1 0.02 0.985
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.196 on 97 degrees of freedom
##
## Algorithm "port", convergence message: function evaluation limit reached without convergence (9)
# quantile regression for the upper 95%
library(quantreg)
my.rq <- nlrq(my.equation,
data = df,
start = list(a = 2, b = 2, c = 5),
tau = .95)
summary(my.rq)
##
## Call: nlrq(formula = my.equation, data = df, start = list(a = 2, b = 2,
## c = 5), tau = 0.95, control = structure(list(maxiter = 100,
## k = 2, InitialStepSize = 1, big = 1e+20, eps = 1e-07, beta = 0.97), .Names = c("maxiter",
## "k", "InitialStepSize", "big", "eps", "beta")), trace = FALSE)
##
## tau: [1] 0.95
##
## Coefficients:
## Value Std. Error t value Pr(>|t|)
## a 1.00532 0.01539 65.33274 0.00000
## b 12.71319 4.52469 2.80974 0.00546
## c 30.80014 5.12539 6.00932 0.00000
my.line95 <- within(predict_range,
rel.gs <- predict(my.rq,
newdata = predict_range))
lines(rel.gs ~ X.FC, data = my.line95, col = "green")
# separate code for AA and AM 95% quantile regression using nlrq
# deliberately not using split-apply or plyr to show the individual process
my.rq.95.aa <- nlrq(my.equation,
data = df[df$Species == "A. aneura",],
start = list(a = 2, b = 2, c = 5),
tau = .95)
summary(my.rq.95.aa)
##
## Call: nlrq(formula = my.equation, data = df[df$Species == "A. aneura",
## ], start = list(a = 2, b = 2, c = 5), tau = 0.95, control = structure(list(
## maxiter = 100, k = 2, InitialStepSize = 1, big = 1e+20, eps = 1e-07,
## beta = 0.97), .Names = c("maxiter", "k", "InitialStepSize",
## "big", "eps", "beta")), trace = FALSE)
##
## tau: [1] 0.95
##
## Coefficients:
## Value Std. Error t value Pr(>|t|)
## a 0.99849 0.01304 76.60056 0.00000
## b 9.45893 4.60588 2.05367 0.04270
## c 22.53856 5.97793 3.77030 0.00028
my.rq.95.am <- nlrq(my.equation,
data = df[df$Species == "A. melanoxylon",],
start = list(a = 2, b = 2, c = 5),
tau = .95)
summary(my.rq.95.am)
##
## Call: nlrq(formula = my.equation, data = df[df$Species == "A. melanoxylon",
## ], start = list(a = 2, b = 2, c = 5), tau = 0.95, control = structure(list(
## maxiter = 100, k = 2, InitialStepSize = 1, big = 1e+20, eps = 1e-07,
## beta = 0.97), .Names = c("maxiter", "k", "InitialStepSize",
## "big", "eps", "beta")), trace = FALSE)
##
## tau: [1] 0.95
##
## Coefficients:
## Value Std. Error t value Pr(>|t|)
## a 1.16485 0.23508 4.95510 0.00000
## b 27.76214 15.37038 1.80621 0.07399
## c 49.95251 15.62986 3.19597 0.00188
# get the data to put lines on the grpah
my.line95.aa <- within(predict_range,
rel.gs <- predict(my.rq.95.aa,
newdata = predict_range))
my.line95.aa$Species <- "A. aneura"
my.line95.am <- within(predict_range,
rel.gs <- predict(my.rq.95.am,
newdata = predict_range))
my.line95.am$Species <- "A. melanoxylon"
# combine the two data sets
quantile.reg <- rbind(my.line95.aa, my.line95.am)
quantile.reg$Species <- as.factor(quantile.reg$Species)
# ggplot Graph:
library(ggplot2)
p <- ggplot(df, aes(x = X.FC, y = rel.gs))
p <- p + geom_point(aes(colour = Species))
p <- p + geom_line(data = quantile.reg, aes(linetype = Species))
p <- p + theme_bw()
p