REM paper stats and graphs

LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")
str(LAIDat)

## 'data.frame':    79 obs. of  21 variables:
##  $ QuadratRef: Factor w/ 79 levels "Q1(19)","Q10",..: 21 33 45 50 52 63 75 78 2 4 ...
##  $ Date_Time : Factor w/ 79 levels "5/24/2017 14:05",..: 1 2 5 6 31 32 33 34 35 37 ...
##  $ aaPAR     : num  1858 1733 1758 1707 627 ...
##  $ abPAR     : num  930 816 1638 1473 149 ...
##  $ Tau       : num  0.506 0.475 0.916 0.863 0.238 0.708 0.588 0.7 0.721 0.26 ...
##  $ LAI       : num  1.54 1.47 0.16 0.26 1.73 0.45 0.93 0.57 0.49 2.44 ...
##  $ Quadrat   : Factor w/ 79 levels "Q10","Q11","Q12",..: 11 22 33 44 55 66 77 79 1 2 ...
##  $ ClipDate  : Factor w/ 24 levels "5/24/2017","5/24/2018",..: 1 1 3 3 11 11 12 12 12 13 ...
##  $ SortDate  : Factor w/ 39 levels "10/16/2017","10/18/2017",..: 5 7 7 7 20 20 20 20 20 20 ...
##  $ C3L       : num  43.5 44 18.4 21.6 101 ...
##  $ C3D       : num  83.5 90.35 4.35 1.6 26.35 ...
##  $ C4L       : num  0 0 2.05 1.2 0 0 0 0 0 0 ...
##  $ C4D       : num  0 40.5 1.35 0 0 0 0 0 0 0 ...
##  $ FbL       : num  1.4 0 2.4 2.45 0.55 ...
##  $ FbD       : num  6.3 1.2 1.4 1.35 0 0 0 0 0.7 0.85 ...
##  $ WDY       : num  0 0 0 0 17.8 0 0 0 0 9.65 ...
##  $ LTR       : num  49.7 67.3 11.8 17.7 70.6 ...
##  $ TOTAL     : num  184.4 243.4 41.7 45.9 216.2 ...
##  $ VOR       : int  2 4 1 1 4 1 3 2 2 5 ...
##  $ UPRIGHT   : int  2 2 1 2 2 2 1 2 2 3 ...
##  $ LTRDEPTH  : num  12.5 13.5 4 4 13 4 8 4 5.5 5.5 ...

names(LAIDat)

##  [1] "QuadratRef" "Date_Time"  "aaPAR"      "abPAR"      "Tau"       
##  [6] "LAI"        "Quadrat"    "ClipDate"   "SortDate"   "C3L"       
## [11] "C3D"        "C4L"        "C4D"        "FbL"        "FbD"       
## [16] "WDY"        "LTR"        "TOTAL"      "VOR"        "UPRIGHT"   
## [21] "LTRDEPTH"

head(LAIDat)

##   QuadratRef       Date_Time  aaPAR  abPAR   Tau  LAI Quadrat  ClipDate
## 1         Q2 5/24/2017 14:05 1858.3  930.0 0.506 1.54      Q2 5/24/2017
## 2         Q3 5/24/2017 14:42 1733.4  816.2 0.475 1.47      Q3 5/24/2017
## 3         Q4 5/25/2017 11:46 1757.8 1638.2 0.916 0.16      Q4 5/25/2017
## 4         Q5 5/25/2017 12:18 1707.3 1473.0 0.863 0.26      Q5 5/25/2017
## 5         Q6  6/7/2017 15:20  627.3  148.7 0.238 1.73      Q6  6/7/2017
## 6         Q7  6/7/2017 15:51  844.3  615.8 0.708 0.45      Q7  6/7/2017
##    SortDate    C3L   C3D  C4L   C4D  FbL  FbD  WDY   LTR  TOTAL VOR
## 1 5/25/2017  43.50 83.50 0.00  0.00 1.40 6.30  0.0 49.70 184.40   2
## 2 5/26/2017  44.05 90.35 0.00 40.50 0.00 1.20  0.0 67.35 243.45   4
## 3 5/26/2017  18.40  4.35 2.05  1.35 2.40 1.40  0.0 11.75  41.70   1
## 4 5/26/2017  21.55  1.60 1.20  0.00 2.45 1.35  0.0 17.70  45.85   1
## 5  6/9/2017 100.95 26.35 0.00  0.00 0.55 0.00 17.8 70.60 216.25   4
## 6  6/9/2017  30.45  2.50 0.00  0.00 0.35 0.00  0.0 17.90  51.20   1
##   UPRIGHT LTRDEPTH
## 1       2     12.5
## 2       2     13.5
## 3       1      4.0
## 4       2      4.0
## 5       2     13.0
## 6       2      4.0

LAIDat$kg.ha<-with(LAIDat, TOTAL*4.325)

lai.gg <- ggplot(data=LAIDat, aes(x=LAI, y=kg.ha))+
            geom_point(color="red") +
            theme_bw(20)+
            labs(x="Leaf Area Index (LAI)",
                 y="Actual Biomass(kg/ha)")

lai.gg +
geom_smooth(method = "lm",se=FALSE)+
  annotate("text", x=1, y=1750, label="paste(R^2,\"=0.71\")",
            parse=TRUE, size=4)

    pacman::p_load(ggplot2,GGally, AICcmodavg,car) 
    LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")
    
      # Calculate Varaible Inflation Factors:
              # Fit a complete model
            vif(lm(TOTAL ~  C3L + C3D + C4L + C4D + FbL + FbD +
                     WDY + LTR + LAI + VOR + UPRIGHT + LTRDEPTH, LAIDat))

## Warning in summary.lm(object): essentially perfect fit: summary may be
## unreliable

##      C3L      C3D      C4L      C4D      FbL      FbD      WDY      LTR 
## 4.554289 3.867755 2.113622 3.955374 2.406485 1.926446 1.461520 3.503516 
##      LAI      VOR  UPRIGHT LTRDEPTH 
## 7.821081 5.028809 1.630785 1.806318

            # Drop terms > 5, refit:
            #vif(lm(mpg ~ hp + wt + qsec + drat, mtm))
        
    # Fit full combination of variables
        m.null <- lm(TOTAL ~ 1, LAIDat)
        m.1a <- lm(TOTAL ~ LAI, LAIDat)
    m.2a <- lm(TOTAL ~ LAI + LTR, LAIDat)
    m.2b <- lm(TOTAL ~ LAI + LTRDEPTH, LAIDat)
    m.3a <- lm(TOTAL ~ LAI + LTRDEPTH + VOR, LAIDat)
    m.4a <- lm(TOTAL ~ C3D + C3L + LTR +LAI, LAIDat)
    
    
    
    # Compare models with information criteria
      
      # Construct candidate model set
      # First store model names in a character string
      cand.mod.names <- c("m.null", "m.1a", "m.2a", "m.2b", "m.3a", "m.4a")
     
       # Define an empty list to put models
          cand.mods <- list( ) 
      
       # This function fills the list by model names
          for(i in 1:length(cand.mod.names)) {
            cand.mods[[i]] <- get(cand.mod.names[i]) }
      
      # Function aictab does the AICc-based model comparison
       print(aictab(cand.set = cand.mods, 
                    modnames = cand.mod.names))

## 
## Model selection based on AICc:
## 
##        K   AICc Delta_AICc AICcWt Cum.Wt      LL
## m.2a   4 828.62       0.00   0.88   0.88 -410.04
## m.4a   6 832.63       4.01   0.12   1.00 -409.73
## m.3a   5 875.91      47.28   0.00   1.00 -432.54
## m.2b   4 884.43      55.80   0.00   1.00 -437.94
## m.1a   3 899.89      71.27   0.00   1.00 -446.79
## m.null 2 994.89     166.26   0.00   1.00 -495.37

       mod.tab <- aictab(cand.set = cand.mods, 
                    modnames = cand.mod.names)
       mod.tab <- as.data.frame(mod.tab)
      # Are top two models different? 
        anova(m.2a, m.4a)

## Analysis of Variance Table
## 
## Model 1: TOTAL ~ LAI + LTR
## Model 2: TOTAL ~ C3D + C3L + LTR + LAI
##   Res.Df    RSS Df Sum of Sq      F Pr(>F)
## 1     76 149102                           
## 2     74 147937  2    1164.3 0.2912 0.7482

        anova(m.1a, m.2b)

## Analysis of Variance Table
## 
## Model 1: TOTAL ~ LAI
## Model 2: TOTAL ~ LAI + LTRDEPTH
##   Res.Df    RSS Df Sum of Sq      F    Pr(>F)    
## 1     77 377995                                  
## 2     76 302165  1     75830 19.073 3.925e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    # Calculate 95% confidence intervals for top 2 models
        (m.2a.CIs <- as.data.frame(confint(m.2a) ) )

##                  2.5 %    97.5 %
## (Intercept) 49.4351859 88.935923
## LAI         37.3325220 58.105806
## LTR          0.9118286  1.324112

        (m.4a.CIs <- as.data.frame(confint(m.4a) ) )

##                  2.5 %     97.5 %
## (Intercept) 46.4607580 89.7148959
## C3D         -0.1480187  0.3274438
## C3L         -0.3431623  0.3562115
## LTR          0.8778188  1.3700288
## LAI         33.4209963 57.6086889

     # Get parameter estimates averaged across all models 
        terms <- c("(Intercept)", "LAI", "LTR", "C3L", "C3D", "VOR", "LTRDEPTH")
        av.params <- as.data.frame(array(NA,c(length(terms),4)))
        colnames(av.params)<-c("term","estimate","ciL","ciU")
       for(i in 1:length(terms)) {
        av <- modavg(parm = paste(terms[i]), 
                     cand.set = cand.mods, 
                     modnames = cand.mod.names)
            av.params[i,1] <- terms[i]
            av.params[i,2] <- round(av$Mod.avg.beta, 2)
            av.params[i,3] <- round(av$Lower.CL, 3) 
            av.params[i,4] <- round(av$Upper.CL, 3) }
        
        av.params

##          term estimate    ciL    ciU
## 1 (Intercept)    69.06 49.379 88.731
## 2         LAI    47.46 36.930 57.985
## 3         LTR     1.12  0.911  1.327
## 4         C3L     0.01 -0.337  0.350
## 5         C3D     0.09 -0.144  0.324
## 6         VOR    19.06  7.788 30.324
## 7    LTRDEPTH     8.32  5.128 11.511

        av.params$term <- plyr::revalue(av.params$term, 
                                        c("LAI" = "Leaf Area\nIndex", 
                                          "LTR" = "Actual Litter\nBiomass",
                                          "C3L" = "Live C3\ngrass",
                                          "C3D" = "Dead C3\ngrass",
                                          "VOR" = "Visual\nObstruction\nReading",
                                          "LTRDEPTH" = "Litter\nDepth"))

I don’t remember what all these numbers mean. Is av.params where I’m supposed to get the new coefficients from?

# Plotting confidence intervals
    
    ggplot(subset(av.params, terms != "(Intercept)")) + 
             coord_flip() + theme_bw(16) +
      geom_hline(yintercept = 0) + 
      geom_errorbar(aes(x=term,
                        ymin=ciL, ymax=ciU), 
                        width=0.2, size=2, color="#377eb8") +
      geom_point(aes(x=term, y=estimate), size=7, pch=21, 
                 bg="#377eb8", color="white", stroke=2) + 
      labs(y = "regression parameter estimate (95% CI)", x = " ")+
      theme(axis.text = element_text(color = "black", 
                                         size = 16))

OK now I’m stuck on the fit <- mle2 part. I think the error is in the method part. This is what it gives me for an error message: Error in optim(par = c(84.4, 84.79, NA, NA), fn = function (p) : non-finite value supplied by optim

# Maximum Likelihood Estimation for regression parameters 

    LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")

# set data paramaters
    y = "TOTAL"
    x = "LAI"
    mu = round(mean(x),0)
    sigma = round(sd(x),1) 
    # Set regression parameters
      # define and fit linear model
    
        org.mod <- lm(TOTAL ~ LAI, LAIDat) # Training regression 
      # use regression coefficients as starting points for mle
        B0 = round(as.numeric(coef(org.mod) [1] ),1) 
        B1 = round(as.numeric(coef(org.mod) [2] ),2) 
    # define log-likelihood function 
    LL <- function(beta0, beta1, Mu, Sigma) {
              R =  y - x  * beta1 - beta0
              R = suppressWarnings(dnorm(R, Mu, Sigma, log = TRUE))
              -sum(R)
        }
    require(bbmle)
    fit <- mle2(LL, start = list( beta0 = B0, beta1 = B1, 
                                  Mu = mu, Sigma = sigma),
                method="L-BFGS-B")
    mle.CIs <- suppressWarnings(confint(fit)) 
    
        do <- data.frame( 
                    slope.est = as.numeric(fit@coef[2] ), 
                    slope.ciL = as.numeric(mle.CIs[2,1]), 
                    slope.ciU = as.numeric(mle.CIs[2,2]) )

Val <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/validpts.csv")
Val$kg.ha<-with(Val, biomass*4.325)

pvabio.gg <- ggplot(data=Val, aes(x=kg.ha, y=366.73*LAI+365.01))+
  geom_point(color="orange") +
  theme_bw(20)+
  labs(x="Actual Biomass (kg/ha)",
       y="Predicted Biomass (kg/ha)")

pvabio.gg +
  geom_smooth(method = "lm",se=FALSE, color="green") +
  ggtitle("Predicted Biomass vs. Actual biomass")+
  annotate("text", x=850, y=700, label="paste(R^2,\"=0.70\")",
           parse=TRUE, size=5)

REM paper stats and graphs

Micayla Lakey

December 20, 2018