LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")
str(LAIDat)
## 'data.frame': 79 obs. of 21 variables:
## $ QuadratRef: Factor w/ 79 levels "Q1(19)","Q10",..: 21 33 45 50 52 63 75 78 2 4 ...
## $ Date_Time : Factor w/ 79 levels "5/24/2017 14:05",..: 1 2 5 6 31 32 33 34 35 37 ...
## $ aaPAR : num 1858 1733 1758 1707 627 ...
## $ abPAR : num 930 816 1638 1473 149 ...
## $ Tau : num 0.506 0.475 0.916 0.863 0.238 0.708 0.588 0.7 0.721 0.26 ...
## $ LAI : num 1.54 1.47 0.16 0.26 1.73 0.45 0.93 0.57 0.49 2.44 ...
## $ Quadrat : Factor w/ 79 levels "Q10","Q11","Q12",..: 11 22 33 44 55 66 77 79 1 2 ...
## $ ClipDate : Factor w/ 24 levels "5/24/2017","5/24/2018",..: 1 1 3 3 11 11 12 12 12 13 ...
## $ SortDate : Factor w/ 39 levels "10/16/2017","10/18/2017",..: 5 7 7 7 20 20 20 20 20 20 ...
## $ C3L : num 43.5 44 18.4 21.6 101 ...
## $ C3D : num 83.5 90.35 4.35 1.6 26.35 ...
## $ C4L : num 0 0 2.05 1.2 0 0 0 0 0 0 ...
## $ C4D : num 0 40.5 1.35 0 0 0 0 0 0 0 ...
## $ FbL : num 1.4 0 2.4 2.45 0.55 ...
## $ FbD : num 6.3 1.2 1.4 1.35 0 0 0 0 0.7 0.85 ...
## $ WDY : num 0 0 0 0 17.8 0 0 0 0 9.65 ...
## $ LTR : num 49.7 67.3 11.8 17.7 70.6 ...
## $ TOTAL : num 184.4 243.4 41.7 45.9 216.2 ...
## $ VOR : int 2 4 1 1 4 1 3 2 2 5 ...
## $ UPRIGHT : int 2 2 1 2 2 2 1 2 2 3 ...
## $ LTRDEPTH : num 12.5 13.5 4 4 13 4 8 4 5.5 5.5 ...
names(LAIDat)
## [1] "QuadratRef" "Date_Time" "aaPAR" "abPAR" "Tau"
## [6] "LAI" "Quadrat" "ClipDate" "SortDate" "C3L"
## [11] "C3D" "C4L" "C4D" "FbL" "FbD"
## [16] "WDY" "LTR" "TOTAL" "VOR" "UPRIGHT"
## [21] "LTRDEPTH"
head(LAIDat)
## QuadratRef Date_Time aaPAR abPAR Tau LAI Quadrat ClipDate
## 1 Q2 5/24/2017 14:05 1858.3 930.0 0.506 1.54 Q2 5/24/2017
## 2 Q3 5/24/2017 14:42 1733.4 816.2 0.475 1.47 Q3 5/24/2017
## 3 Q4 5/25/2017 11:46 1757.8 1638.2 0.916 0.16 Q4 5/25/2017
## 4 Q5 5/25/2017 12:18 1707.3 1473.0 0.863 0.26 Q5 5/25/2017
## 5 Q6 6/7/2017 15:20 627.3 148.7 0.238 1.73 Q6 6/7/2017
## 6 Q7 6/7/2017 15:51 844.3 615.8 0.708 0.45 Q7 6/7/2017
## SortDate C3L C3D C4L C4D FbL FbD WDY LTR TOTAL VOR
## 1 5/25/2017 43.50 83.50 0.00 0.00 1.40 6.30 0.0 49.70 184.40 2
## 2 5/26/2017 44.05 90.35 0.00 40.50 0.00 1.20 0.0 67.35 243.45 4
## 3 5/26/2017 18.40 4.35 2.05 1.35 2.40 1.40 0.0 11.75 41.70 1
## 4 5/26/2017 21.55 1.60 1.20 0.00 2.45 1.35 0.0 17.70 45.85 1
## 5 6/9/2017 100.95 26.35 0.00 0.00 0.55 0.00 17.8 70.60 216.25 4
## 6 6/9/2017 30.45 2.50 0.00 0.00 0.35 0.00 0.0 17.90 51.20 1
## UPRIGHT LTRDEPTH
## 1 2 12.5
## 2 2 13.5
## 3 1 4.0
## 4 2 4.0
## 5 2 13.0
## 6 2 4.0
LAIDat$kg.ha<-with(LAIDat, TOTAL*4.325)
lai.gg <- ggplot(data=LAIDat, aes(x=LAI, y=kg.ha))+
geom_point(color="red") +
theme_bw(20)+
labs(x="Leaf Area Index (LAI)",
y="Actual Biomass(kg/ha)")
lai.gg +
geom_smooth(method = "lm",se=FALSE)+
annotate("text", x=1, y=1750, label="paste(R^2,\"=0.71\")",
parse=TRUE, size=4)
pacman::p_load(ggplot2,GGally, AICcmodavg,car)
LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")
# Calculate Varaible Inflation Factors:
# Fit a complete model
vif(lm(TOTAL ~ C3L + C3D + C4L + C4D + FbL + FbD +
WDY + LTR + LAI + VOR + UPRIGHT + LTRDEPTH, LAIDat))
## Warning in summary.lm(object): essentially perfect fit: summary may be
## unreliable
## C3L C3D C4L C4D FbL FbD WDY LTR
## 4.554289 3.867755 2.113622 3.955374 2.406485 1.926446 1.461520 3.503516
## LAI VOR UPRIGHT LTRDEPTH
## 7.821081 5.028809 1.630785 1.806318
# Drop terms > 5, refit:
#vif(lm(mpg ~ hp + wt + qsec + drat, mtm))
# Fit full combination of variables
m.null <- lm(TOTAL ~ 1, LAIDat)
m.1a <- lm(TOTAL ~ LAI, LAIDat)
m.2a <- lm(TOTAL ~ LAI + LTR, LAIDat)
m.2b <- lm(TOTAL ~ LAI + LTRDEPTH, LAIDat)
m.3a <- lm(TOTAL ~ LAI + LTRDEPTH + VOR, LAIDat)
m.4a <- lm(TOTAL ~ C3D + C3L + LTR +LAI, LAIDat)
# Compare models with information criteria
# Construct candidate model set
# First store model names in a character string
cand.mod.names <- c("m.null", "m.1a", "m.2a", "m.2b", "m.3a", "m.4a")
# Define an empty list to put models
cand.mods <- list( )
# This function fills the list by model names
for(i in 1:length(cand.mod.names)) {
cand.mods[[i]] <- get(cand.mod.names[i]) }
# Function aictab does the AICc-based model comparison
print(aictab(cand.set = cand.mods,
modnames = cand.mod.names))
##
## Model selection based on AICc:
##
## K AICc Delta_AICc AICcWt Cum.Wt LL
## m.2a 4 828.62 0.00 0.88 0.88 -410.04
## m.4a 6 832.63 4.01 0.12 1.00 -409.73
## m.3a 5 875.91 47.28 0.00 1.00 -432.54
## m.2b 4 884.43 55.80 0.00 1.00 -437.94
## m.1a 3 899.89 71.27 0.00 1.00 -446.79
## m.null 2 994.89 166.26 0.00 1.00 -495.37
mod.tab <- aictab(cand.set = cand.mods,
modnames = cand.mod.names)
mod.tab <- as.data.frame(mod.tab)
# Are top two models different?
anova(m.2a, m.4a)
## Analysis of Variance Table
##
## Model 1: TOTAL ~ LAI + LTR
## Model 2: TOTAL ~ C3D + C3L + LTR + LAI
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 76 149102
## 2 74 147937 2 1164.3 0.2912 0.7482
anova(m.1a, m.2b)
## Analysis of Variance Table
##
## Model 1: TOTAL ~ LAI
## Model 2: TOTAL ~ LAI + LTRDEPTH
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 77 377995
## 2 76 302165 1 75830 19.073 3.925e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Calculate 95% confidence intervals for top 2 models
(m.2a.CIs <- as.data.frame(confint(m.2a) ) )
## 2.5 % 97.5 %
## (Intercept) 49.4351859 88.935923
## LAI 37.3325220 58.105806
## LTR 0.9118286 1.324112
(m.4a.CIs <- as.data.frame(confint(m.4a) ) )
## 2.5 % 97.5 %
## (Intercept) 46.4607580 89.7148959
## C3D -0.1480187 0.3274438
## C3L -0.3431623 0.3562115
## LTR 0.8778188 1.3700288
## LAI 33.4209963 57.6086889
# Get parameter estimates averaged across all models
terms <- c("(Intercept)", "LAI", "LTR", "C3L", "C3D", "VOR", "LTRDEPTH")
av.params <- as.data.frame(array(NA,c(length(terms),4)))
colnames(av.params)<-c("term","estimate","ciL","ciU")
for(i in 1:length(terms)) {
av <- modavg(parm = paste(terms[i]),
cand.set = cand.mods,
modnames = cand.mod.names)
av.params[i,1] <- terms[i]
av.params[i,2] <- round(av$Mod.avg.beta, 2)
av.params[i,3] <- round(av$Lower.CL, 3)
av.params[i,4] <- round(av$Upper.CL, 3) }
av.params
## term estimate ciL ciU
## 1 (Intercept) 69.06 49.379 88.731
## 2 LAI 47.46 36.930 57.985
## 3 LTR 1.12 0.911 1.327
## 4 C3L 0.01 -0.337 0.350
## 5 C3D 0.09 -0.144 0.324
## 6 VOR 19.06 7.788 30.324
## 7 LTRDEPTH 8.32 5.128 11.511
av.params$term <- plyr::revalue(av.params$term,
c("LAI" = "Leaf Area\nIndex",
"LTR" = "Actual Litter\nBiomass",
"C3L" = "Live C3\ngrass",
"C3D" = "Dead C3\ngrass",
"VOR" = "Visual\nObstruction\nReading",
"LTRDEPTH" = "Litter\nDepth"))
I don’t remember what all these numbers mean. Is av.params where I’m supposed to get the new coefficients from?
# Plotting confidence intervals
ggplot(subset(av.params, terms != "(Intercept)")) +
coord_flip() + theme_bw(16) +
geom_hline(yintercept = 0) +
geom_errorbar(aes(x=term,
ymin=ciL, ymax=ciU),
width=0.2, size=2, color="#377eb8") +
geom_point(aes(x=term, y=estimate), size=7, pch=21,
bg="#377eb8", color="white", stroke=2) +
labs(y = "regression parameter estimate (95% CI)", x = " ")+
theme(axis.text = element_text(color = "black",
size = 16))
OK now I’m stuck on the fit <- mle2 part. I think the error is in the method part. This is what it gives me for an error message: Error in optim(par = c(84.4, 84.79, NA, NA), fn = function (p) : non-finite value supplied by optim
# Maximum Likelihood Estimation for regression parameters
LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")
# set data paramaters
y = "TOTAL"
x = "LAI"
mu = round(mean(x),0)
sigma = round(sd(x),1)
# Set regression parameters
# define and fit linear model
org.mod <- lm(TOTAL ~ LAI, LAIDat) # Training regression
# use regression coefficients as starting points for mle
B0 = round(as.numeric(coef(org.mod) [1] ),1)
B1 = round(as.numeric(coef(org.mod) [2] ),2)
# define log-likelihood function
LL <- function(beta0, beta1, Mu, Sigma) {
R = y - x * beta1 - beta0
R = suppressWarnings(dnorm(R, Mu, Sigma, log = TRUE))
-sum(R)
}
require(bbmle)
fit <- mle2(LL, start = list( beta0 = B0, beta1 = B1,
Mu = mu, Sigma = sigma),
method="L-BFGS-B")
mle.CIs <- suppressWarnings(confint(fit))
do <- data.frame(
slope.est = as.numeric(fit@coef[2] ),
slope.ciL = as.numeric(mle.CIs[2,1]),
slope.ciU = as.numeric(mle.CIs[2,2]) )
Val <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/validpts.csv")
Val$kg.ha<-with(Val, biomass*4.325)
pvabio.gg <- ggplot(data=Val, aes(x=kg.ha, y=366.73*LAI+365.01))+
geom_point(color="orange") +
theme_bw(20)+
labs(x="Actual Biomass (kg/ha)",
y="Predicted Biomass (kg/ha)")
pvabio.gg +
geom_smooth(method = "lm",se=FALSE, color="green") +
ggtitle("Predicted Biomass vs. Actual biomass")+
annotate("text", x=850, y=700, label="paste(R^2,\"=0.70\")",
parse=TRUE, size=5)