## Loading required package: nlme
## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.
library(RgoogleMaps)
for(i in names(table(SiteInfo$Vegetation))){
  Lat <- SiteInfo$Lat[SiteInfo$Vegetation==i]
  Long <- SiteInfo$Long[SiteInfo$Vegetation==i]

## Plot sampling points on map ----
MyMap <- GetMap.bbox(lonR = range(SiteInfo$Long), latR = range(SiteInfo$Lat),size=c(640,640), maptype = "terrain")
# options for map type are "roadmap","mobile","satellite","terrain" and "hybrid"
PlotOnStaticMap(MyMap)

convert_points <- LatLon2XY.centered(MyMap, Lat, Long)
points(convert_points$newX, convert_points$newY, col = 'red', pch=19)
NAMES <- SiteInfo$Site[SiteInfo$Vegetation==i]
text(convert_points$newX, convert_points$newY+16,NAMES,cex=0.6)
text(min(SiteInfo$Lat),min(SiteInfo$Long),paste("Species is",i))  
}

Sites Information

length(SiteInfo$Vegetation)
## [1] 157
table(SiteInfo$Vegetation)
## 
## CRO CSH DBF EBF ENF GRA  MF OSH SAV WET WSA 
##  19   6  17   3  50  23   6  17   1  11   4
length(unique(SiteInfo$Vegetation))
## [1] 11
#EachSpecies <- names(sort(table(SiteInfo$Vegetation),decreasing = T))
# SiteInfo$MaintainYear <- SiteInfo$End-SiteInfo$Start +1
# TrainSelection <- sort(SiteInfo,f= ~ Vegetation - MaintainYear)

Plot of each species

EachSpecies <- names(sort(table(SiteInfo$Vegetation),decreasing = T))


for(i in EachSpecies){
  Index <- (Full.Data$species==i)
  SubData <- Full.Data[Index,]
  SortData <- sort(SubData,f= ~ elevation)
  SiteName <- unique(as.character(SortData$site))
  SiteNumber <-table(as.character(SortData$site))
  D = data.frame(SiteNumber)[rank(SiteName),]
  SiteMarker <- cumsum(D$Freq)
  X <- SortData$flux
  
  SubInfo <- SiteInfo[SiteInfo$Vegetation==i,]
  SortInfo <- sort(SubInfo, f=~ Elevation.m.)
  
  plot(X,pch=20,cex=0.3,main=i,ylab=i,ylim=c(-20,6))
  abline(v=SiteMarker,lty="dashed",col="grey")
  text(SiteMarker-0.5*SiteNumber[rank(SiteName)],4, SiteName)
  text(SiteMarker-0.5*SiteNumber[rank(SiteName)],-15,as.character(SortInfo$Elevation.m.))
  print(round(SortInfo$Elevation.m.,0))
}

Model 1

Mod.Train1 <- gam(flux ~ s(year,bs="cc",k=23)
                  +s(year,bs="cc",k=23,by=species,m=1)
                  +species+s(lat,bs="cr")
                  +s(long,bs="cr")
                  +indicator, data=Train.data)
summary(Mod.Train1)
PlotModRes(residuals(Mod.Train1),pch=20,cex=0.3)

Prediction of Model 1

Pred1 <- predict(Mod.Train1,Test.data[,-c(1,2)])
PlotPredRes(Pred1)

Model 2

Mod.Train2 <- gam(flux ~ s(year,bs="cc",k=23)
                  +s(year,bs="cc",k=23,by=species,m=1)
                  +species+s(lat,bs="cr")
                  +s(long,bs="cr")
                  +indicator
                  +s(airtemp,bs="cr",k=23),data=Train.data,
                  na.action = na.exclude)
summary(Mod.Train2)
PlotModRes(residuals(Mod.Train2))

Prediction of Model 2

Pred2 <- predict(Mod.Train2,Test.data[,-c(1,2)])
PlotPredRes(Pred=Pred2,Obs=Obs)

Meeting notes

Re-order the residuals by day of year — boxplot or others

Add elevation + solar day in the model

# Mod.Train3 <- gam(flux ~ s(year,bs="cs",k=5) + species
#                   + s(year,bs="cr",k=15,by=species,m=1)
#                   +s(lat,bs="cr")
#                   + long
#                   + indicator
#                   + elevation
#                   + s(dayyear,bs="cc",k=15)
#                   +s(airtemp,bs="cc",k=15),data=Train.data,
#                   na.action = na.exclude)

Mod.Train3 <- gam(flux ~ year 
                  + species 
                  + lat
                  + long
                  + indicator
                  + elevation
                  + dayyear
                  + airtemp
                  + year:species + year:lat + year:indicator + year:elevation + year:airtemp
                  + species:lat + species:long + species:indicator + species:elevation + species:airtemp
                  + lat:long + lat:indicator + lat:elevation + lat:dayyear + lat:airtemp
                  + long:indicator + long:elevation + long:airtemp
                  + indicator:elevation + indicator:dayyear + indicator:airtemp
                  + elevation:dayyear + elevation:airtemp,
                  data=Train.data,
                  na.action = na.exclude)

 summary(Mod.Train3)
PlotModRes(residuals(Mod.Train3))
Pred3 <- predict(Mod.Train3,Test.data[,-c(1,2)])

PlotPredRes(Obs,Pred3)

Mod 4 – smoothed version of model 3

Mod.Train4 <- gam(flux ~ s(year,bs="tp",k=10) 
                  + species
                  + s(lat,bs="tp",k=10)
                  + long
                  + indicator
                  + s(elevation,bs="tp",k=10)
                  + s(dayyear,bs="cc",k=10)
                  + s(airtemp,bs="tp",k=10)
                  + species:lat + s(long,by=species) + species:indicator + s(elevation,by=species) + s(airtemp,by=species)
                  + long:elevation 
                  + indicator:airtemp
                  + elevation:airtemp,
                  data=Train.data,
                  na.action = na.exclude)

summary(Mod.Train4)
 PlotModRes(residuals(Mod.Train4))
 Pred4 <- predict(Mod.Train4,Test.data[,-c(1,2)])
 PlotPredRes(Obs,Pred4)

Model 5

Mod.Train5 <- gam(flux ~ s(year,bs="tp",k=10) 
                  + s(lat,k=10) 
                  + s(long, k=10)
                  + s(elevation,k=10)
                  + s(airtemp,by=species,k=10) # use full
                  + s(dayyear,bs="cc",k=10)
                  + s(airtemp,k=10),
                  data=Train.data,
                  na.action = na.exclude)

plot(Mod.Train5)
summary(Mod.Train5)

Model 6

Mod.Train6 <- gam(flux ~ 
                  #+ s(dayyear,bs="cc",k=10)
                   s(elevation,bs="tp")
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp) # to use this the main effect has to be smoothed?
                  + s(lat,long,bs="ts") # increased 3%
                  # + s(airtemp,by=species,k=10) # EBF:weired
                  ,
                  data=Train.data,
                  na.action = na.exclude)

plot(Mod.Train6)

summary(Mod.Train6)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## flux ~ s(elevation, bs = "tp") + s(airtemp, bs = "tp") + ti(elevation, 
##     airtemp) + s(lat, long, bs = "ts")
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.97110    0.01875  -51.79   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##                          edf Ref.df      F p-value    
## s(elevation)           8.898  8.995  57.16  <2e-16 ***
## s(airtemp)             8.818  8.990 421.55  <2e-16 ***
## ti(elevation,airtemp) 15.776 15.975  78.14  <2e-16 ***
## s(lat,long)           28.496 29.000  94.83  <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.409   Deviance explained = 41.2%
## GCV = 3.2333  Scale est. = 3.2191    n = 14337
 Pred6 <- predict(Mod.Train6,Test.data[,-c(1,2)])
 PlotPredRes(Obs,Pred6)

 Res.train <- residuals(Mod.Train6) 
 Res.test <- Pred6 - Test.data$flux
 PlotResTo(Res.train,Res.test)

Model 7 – Data without EBF

Test.Site.noEBF <- c("US-Bo1","US-Ne1","US-ARM","US-Ro1", # CRO
                "US-Ced", # CSH
                "US-Ha1","US-WCr","US-MMS", # DBF
                "US-Ho1","US-Wrc","US-NR1","CA-TP1","US-GLE","US-Me2","US-SP1", # ENF
                "US-Var","US-Wkg","US-FPe", # GRA
                "US-PFa", # MF
                "US-Whs","US-Ses","US-SRC", # OSH
                "US-Los","US-Brw", # WET
                "US-Ton" # WSA
                )
Full.Data.noEBF <- Full.Data[-which(Full.Data$species=="EBF"),]


Train.data.noEBF <- subset(Full.Data.noEBF,!(site %in% Test.Site.noEBF)) 
Test.data.noEBF <- subset(Full.Data.noEBF,(site %in% Test.Site.noEBF))
Obs.noEBF <- Test.data.noEBF[,2]
Mod.Train7 <- gam(flux ~ #s(year,bs="tp",k=10) 
                  # + s(dayyear,bs="cc",k=10)
                  # + s(elevation,bs="tp")
                  # + s(airtemp,bs="tp")
                  # + species
                  # to use this the main effect has to be smoothed? No
                  + ti(elevation,airtemp) 
                  + s(lat,long,bs="tp") # "tp": isotropy 
                  + s(airtemp,by=species,k=10) # EBF:weired
                  ,
                  data=Train.data.noEBF,
                  na.action = na.exclude)

plot(Mod.Train7)
summary(Mod.Train7)

Meeting on May 12 2017

To check whether the points is in the interior area

Project 4D to 3D

3D plot of lat, long, airtemp

You must enable Javascript to view this page properly.

3D plot of lat, long, elevation

You must enable Javascript to view this page properly.

3D plot of lat, elevation, airtemp

You must enable Javascript to view this page properly.

3D plot of long, elevation, airtemp

You must enable Javascript to view this page properly.

Fit model locally

Iter.Weighted <- function(Spe,K,tolerance=0.01,n=30){
  Train.sub <- Train.data[Train.data$species==Spe,]
  
  Mod.Train <- gam(flux ~ s(elevation,bs="tp",k=K)
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp,k=K) 
                  + s(lat,long,bs="ts",k=K),
                  data=Train.sub,
                  na.action = na.exclude)
  Res.Train <- residuals(Mod.Train)
  SD.current0 <- sd(Res.Train,na.rm=T)
  SD.Total0=SD.current0

  Train.sub1 <- Train.sub
  Dim <- dim(Train.sub1[,2:5])
  Iter <- matrix(c(0,0,SD.current0,SD.Total0),nrow=2,byrow = T)
  
    i = 1
    while(i < n && abs(Iter[i+1,1] - Iter[i,1])>tolerance){
    Mod.Train1 <- gam(flux ~ s(elevation,bs="tp",k=K)
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp,k=K) 
                  + s(lat,long,bs="ts",k=K),
                  weights = rep(1/Iter[i+1,2],Dim[1]),
                  data=Train.sub1,
                  na.action = na.exclude)
    Res.train1 <- residuals(Mod.Train1)
    SD.current <- sd(Res.train1,na.rm=T)
    SD.Total <- SD.current*Iter[i+1,2]
    Iter <- rbind(Iter,c(SD.current,SD.Total))
    i <- i+1
  }
  
  R <- rbind(range(Res.train,na.rm = T),range(Res.train1,na.rm = T))  
  y.l <- min(R[,1])
  y.u <- max(R[,2]) 
  par(mfrow=c(2,2))
  plot(Iter[-1,1],type = "b",main="sd(residuals) of each model")
  plot(Iter[-1,2],type="b",main="Accumulate sd(residuals)")
  plot(residuals(Mod.Train),pch=20,main="Original Model",ylim=c(y.l,y.u))
  abline(h=0,col=2)
  plot(residuals(Mod.Train1),pch=20,main="Weighted Model",ylim=c(y.l,y.u))
  abline(h=0,col=2)
  return(tail(Iter))
  

}

## 
Iter.Weighted("CRO",7) # w=7.565 # differences are noticiable 
Iter.Weighted("CSH",4) # w=0.82
Iter.Weighted("DBF",7) # w=2.239
Iter.Weighted("ENF",7) # w=1.775 
Iter.Weighted("GRA",10) # w=1.54 
Iter.Weighted("MF",4)  # w=0.91
Iter.Weighted("OSH",7) # w=0.411
Iter.Weighted("WET",5) # w=1.58

# A term has fewer unique covariate combinations than specified maximum degrees of freedom
Iter.Weighted("WSA",3)  ## 4 sites
Iter.Weighted("SAV",1)  ## 1 site
Iter.Weighted("EBF",3)  ## 3 sites 
Species.Weight <- c("WSA","SAV","EBF")
D.Weight <- subset(Train.data, !(species %in% Species.Weight))

Mod.NoWeight <- gam(flux ~ s(elevation,bs="tp")
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp) 
                  + s(lat,long,bs="ts"),
                  data=D.Weight,
                  na.action = na.exclude)

WEIGHTS <- c(7.565, 0.82, 2.239, 1.775, 1.54, 0.91, 0.411, 1.58)
TIMES <- table(as.character(D.Weight$species))
Mod.Weight <- gam(flux ~ s(elevation,bs="tp")
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp) 
                  + s(lat,long,bs="ts"),
                  weights = rep(WEIGHTS,TIMES),
                  data=D.Weight,
                  na.action = na.exclude)

par(mfrow=c(1,2))
plot(residuals(Mod.NoWeight),ylim=c(-20,14),pch=20)
abline(h=0,col=2)
plot(residuals(Mod.Weight),ylim=c(-20,14),pch=20)
abline(h=0,col=2)

Meeting Notes of May 23 2017

  1. All weights = 1
  2. Fit the model with all species.
  3. Calculate the sd by spedcies.
  4. Reweight by species. Repeat 1 - 3 until all weighted sd are close to 1. If that doesn’t happen, then abandon the bad ones.

Plot 11 curves together.

K=7
Rep.Spe <- table(Train.data$species)
Train.data.sort.spe <- sort(Train.data,f=~ species)

Mod.Train.all <- gam(flux ~ s(elevation,bs="tp",k=K)
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp,k=K) 
                  + s(lat,long,bs="ts",k=K),
                  data=Train.data.sort.spe,
                  na.action = na.exclude)
  Res.Train <- residuals(Mod.Train.all)
  SD.all <- aggregate(Res.Train,list(Train.data$species),sd,na.rm=T)
  all(abs(SD.all$x-1)<0.2)
## [1] FALSE
  SD.all.current <- SD.all$x
  SD.current.mat <- cbind(SD.all[,1],1,SD.all[,2])
  SD.cumulate.mat <- cbind(SD.all[,1],1,SD.all[,2])
  
  i = 1
  while(any(abs(SD.current.mat[,i+2]-1)>0.1) && i < 10){
    Wei <- 1/(SD.cumulate.mat[,i+2])
    #Wei <- SD.cumulate.mat[,i+2]
    Mod.Train.all1 <- gam(flux ~ s(elevation,bs="tp",k=K)
                  + s(airtemp,bs="tp")
                  + ti(elevation,airtemp,k=K) 
                  + s(lat,long,bs="ts",k=K),
                  data=Train.data.sort.spe,
                  weights = rep(Wei,Rep.Spe),
                  na.action = na.exclude)
    SD.current1 <- aggregate(residuals(Mod.Train.all1),list(Train.data.sort.spe$species),sd,na.rm=T)$x
    SD.current.mat <- cbind(SD.current.mat,SD.current1)
    SD.cumulate.mat <- cbind(SD.cumulate.mat,SD.cumulate.mat[,i+2]*SD.current1) 
    i = i+1
  }
  
  rownames(SD.current.mat) <- SD.all$Group.1
  rownames(SD.cumulate.mat) <- SD.all$Group.1
  
  SD.current.mat
##                    SD.current1 SD.current1 SD.current1 SD.current1
## CRO  1 1 1.8807877   2.2815558   1.5365464   1.2539004   1.1287779
## CSH  2 1 1.6996088   0.9102803   0.9045778   0.9219108   0.9424108
## DBF  3 1 1.4350301   1.6584936   1.3298278   1.1814243   1.1045327
## EBF  4 1 2.6872641   0.6986765   0.7949216   0.8729979   0.9255682
## ENF  5 1 1.9332676   1.1306433   1.0454558   1.0150689   1.0041313
## GRA  6 1 1.4949151   1.3119855   1.1118459   1.0372933   1.0089011
## MF   7 1 2.2843708   0.8589128   0.9125857   0.9502833   0.9731040
## OSH  8 1 2.2841389   0.6483971   0.7674227   0.8480988   0.9018695
## SAV  9 1 0.6801692   0.5535177   0.7389239   0.8577099   0.9252731
## WET 10 1 1.8153329   1.0449904   1.0383341   1.0318825   1.0253295
## WSA 11 1 1.1057546   0.8346410   0.9006926   0.9463136   0.9738830
##     SD.current1
## CRO   1.0681855
## CSH   0.9597870
## DBF   1.0618256
## EBF   0.9577808
## ENF   1.0004263
## GRA   0.9987603
## MF    0.9859413
## OSH   0.9371762
## SAV   0.9614874
## WET   1.0192159
## WSA   0.9890670
  SD.cumulate.mat
##     [,1] [,2]      [,3]      [,4]      [,5]      [,6]      [,7]      [,8]
## CRO    1    1 1.8807877 4.2911221 6.5935081 8.2676024 9.3322872 9.9686136
## CSH    2    1 1.6996088 1.5471205 1.3994908 1.2902057 1.2159038 1.1670087
## DBF    3    1 1.4350301 2.3799883 3.1649747 3.7391781 4.1300444 4.3853871
## EBF    4    1 2.6872641 1.8775283 1.4924879 1.3029388 1.2059587 1.1550441
## ENF    5    1 1.9332676 2.1858361 2.2851951 2.3196305 2.3292135 2.3302065
## GRA    6    1 1.4949151 1.9613069 2.1806710 2.2619954 2.2821296 2.2793005
## MF     7    1 2.2843708 1.9620752 1.7905617 1.7015409 1.6557762 1.6324981
## OSH    8    1 2.2841389 1.4810289 1.1365753 0.9639281 0.8693373 0.8147222
## SAV    9    1 0.6801692 0.3764857 0.2781943 0.2386100 0.2207794 0.2122767
## WET   10    1 1.8153329 1.8970053 1.9697253 2.0325250 2.0840079 2.1240540
## WSA   11    1 1.1057546 0.9229082 0.8312566 0.7866294 0.7660850 0.7577093
  ## Compare residuals
  par(mfrow=c(1,2))
  plot(Res.Train,pch=20,cex=0.3)
  abline(h=0, col=2)
  abline(v=cumsum(Rep.Spe),col="pink",lty="dashed")
  
  plot(residuals(Mod.Train.all1),pch=20,cex=0.3)
  abline(h=0, col=2)
  abline(v=cumsum(Rep.Spe),col="pink",lty="dashed")

  ## Show the current sd and acumulated sd step by step
  Col = rainbow(11)
  
  N <- dim(SD.current.mat)[2]-2
  plot(1:N,SD.current.mat[1,-c(1,2)],type="l",ylim=c(0,3.5),xlab="Iteration Time",ylab="Standard Error",main="Current SE of Each Species",col=Col[1])
  abline(h=1,col=1,lty="dashed")
  for(i in 2:11){
    lines(1:N,SD.current.mat[i,-c(1,2)],col=Col[i])
  }
  
  legend("topright",legend = SD.all$Group.1, col=Col,pch=20,cex=0.7)

  plot(1:N,SD.cumulate.mat[1,-c(1,2)],type="l",ylim=c(0,10),
       xlab="Iteration Time",ylab="Standard Error",
       main="Acumulate SE of Each Species",col=Col[1])
  abline(h=1,col=1,lty="dashed")
  for(i in 2:11){
    lines(1:N,SD.cumulate.mat[i,-c(1,2)],col=Col[i])
  }
  
  legend("topleft",legend = SD.all$Group.1, col=Col,pch=20,cex=0.7)

Comparisons

Method A: Modeling each species locally and iteratively modify the weights.

Method B: Modeling all the species together, calculate the s.e. by each species, and iteratively modify the weights.

Method.A <- c(7.565,0.82,2.239,NA,1.775,1.54,0.91,0.411,NA,1.58,NA) 
Method.B <- SD.cumulate.mat[,dim(SD.cumulate.mat)[2]]

SE <- cbind(Method.A,Method.B)
SE
##     Method.A  Method.B
## CRO    7.565 9.9686136
## CSH    0.820 1.1670087
## DBF    2.239 4.3853871
## EBF       NA 1.1550441
## ENF    1.775 2.3302065
## GRA    1.540 2.2793005
## MF     0.910 1.6324981
## OSH    0.411 0.8147222
## SAV       NA 0.2122767
## WET    1.580 2.1240540
## WSA       NA 0.7577093