New Training and Test Datasets

Val <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/validpts.csv")
Val$kg.ha<-with(Val, biomass*4.325)

colnames(Val)[colnames(Val)=="actual"] <- "kg.ha"

LAIDat <- read.csv("C:/Users/micayla.lakey/Desktop/R/data/ceptdata.csv")
LAIDat$kg.ha<-with(LAIDat, TOTAL*4.325)

Val %>% 
  select(LAI,kg.ha) -> test

LAIDat %>%
  select(LAI,kg.ha) -> train


comb <- rbind(train,test)

# comb.ord is combined LAIDat and Val

comb.ord <- comb[order(comb$LAI),]

comb.ord$sequence <- rep(1:3, length.out=104)

# test.dat is Val

test.dat <- comb.ord[which(comb.ord$sequence=='2'),]

# train.dat is LAIDat

train.dat <- subset(comb.ord, comb.ord$sequence==1 |
                      comb.ord$sequence==3)

Regression Lines

# regression line

train.gg <- ggplot(data=train.dat, aes(x=LAI, y=kg.ha))+
  geom_point(color="red") +
  theme_bw(20)+
  labs(x="Leaf Area Index (LAI)",
       y="Actual Biomass(kg/ha)")

train.gg +
  geom_smooth(method = "lm",se=FALSE)+
  geom_smooth(method = "lm",formula="y~poly(x,2)",color="black",se=FALSE)

#annotate("text", x=1, y=1750, label="paste(R^2,\"=0.71\")",
#parse=TRUE, size=4)

poly.train <- lm(kg.ha~poly(LAI,2,raw=TRUE), train.dat)

coef(poly.train)
##               (Intercept) poly(LAI, 2, raw = TRUE)1 
##                  53.70858                 720.76308 
## poly(LAI, 2, raw = TRUE)2 
##                 -79.31494

Predicted Biomass vs Actual Biomass

# predicted biomass vs actual biomass

test.dat <- test.dat%>% mutate(predicted=I(365.01+LAI*366.73))
## Warning: package 'bindrcpp' was built under R version 3.4.3
test.dat <- test.dat%>% mutate(pred.poly=I(53.71-79.31*(LAI^2)+720.76*LAI))

pvabio.gg <- ggplot(data=test.dat, aes(x=kg.ha))+
  geom_point(aes(y=predicted), color="green", alpha=0.5) +
  geom_point(aes(y=pred.poly), color="blue", alpha=0.5)+
  theme_bw(20)+
  labs(x="Actual Biomass (kg/ha)",
       y="Predicted Biomass (kg/ha)")

pvabio.gg +
  geom_smooth(aes(y=predicted), method = "lm",se=FALSE, color="green") +
  geom_smooth(aes(y=pred.poly), color="blue", method="lm")+
  geom_abline(intercept = 0, slope = 1)+
  ggtitle("Predicted Biomass vs. Actual Biomass")

   #coord_cartesian(xlim = c(200,1050), ylim = c(200,1050))