\[ \]
There are 5 spots which have been selected.
\[ \]
Lat: 53.6289 / Long: -106.1978
Time range: 2003 – 2008
Lat: 53.9163 / Long: -104.6920
Time range: 2003 – 2008
Lat: 53.9872 / Long: -105.1178
Time range: 2003 – 2008
Lat: 53.8758 / Long: -104.6453
Time range: 2004 – 2006
Lat:49.26708 / Long:-74.03653
Time range: 2003 – 2010
\[ \]
NDVI <- OldAspen_NDVI
Indi0 <- c(rep("W",5),rep("S",15),rep("W",3))
Indi <- rep(Indi0,6)
Time <- rep(seq(1,366,by=16),6)
OldAspen.m <- gam(OldAspen_16day ~ s(Time, bs="cc", k=12) + NDVI + Indi)
gam.check(OldAspen.m)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 7 iterations.
## The RMS GCV score gradiant at convergence was 4.033404e-05 .
## The Hessian was positive definite.
## The estimated model rank was 13 (maximum possible: 13)
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time) 10.00 9.39 1.20 0.98
#summary(OldAspen.m)
#OldAspen_var <- var(OldAspen.m$residuals)
OldAspen_var <- var(residuals(OldAspen.m))
#plot(OldAspen.m,residuals = TRUE,pch=19)
#plot(OldAspen.m$fitted.values,pch=19)
#lines(OldAspen.m$fitted.values)
NDVI <- OldJackPine_NDVI
Indi0 <- c(rep("W",5),rep("S",15),rep("W",3))
Indi <- rep(Indi0,6)
Time <- rep(seq(1,366,by=16),6)
OldJackPine.m <- gam(OldJackPine_16day ~ s(Time, bs="cc", k=12) + NDVI + Indi)
gam.check(OldJackPine.m)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradiant at convergence was 3.086343e-07 .
## The Hessian was positive definite.
## The estimated model rank was 13 (maximum possible: 13)
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time) 10.00 8.05 1.13 0.94
#summary(OldJackPine.m)
OldJackPine_var <- var(residuals(OldJackPine.m))
NDVI <- Southern_NDVI
Indi0 <- c(rep("W",5),rep("S",15),rep("W",3))
Indi <- rep(Indi0,6)
Time <- rep(seq(1,366,by=16),6)
Southern.m <- gam(Southern_16day ~ s(Time, bs="cc", k=12)+NDVI +Indi)
gam.check(Southern.m)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 6 iterations.
## The RMS GCV score gradiant at convergence was 4.179887e-07 .
## The Hessian was positive definite.
## The estimated model rank was 13 (maximum possible: 13)
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time) 10.00 7.83 1.17 0.99
#summary(Southern.m)
Southern_var <- var(residuals(Southern.m))
NDVI <- YoungJackPine_NDVI
Indi0 <- c(rep("W",5),rep("S",15),rep("W",3))
Indi <- rep(Indi0,3)
Time <- rep(seq(1,366,by=16),3)
YoungJackPine.m <- gam(YoungJackPine_16day ~ s(Time, bs="cc", k=6) + NDVI + Indi)
gam.check(YoungJackPine.m)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 7 iterations.
## The RMS GCV score gradiant at convergence was 8.30027e-07 .
## The Hessian was positive definite.
## The estimated model rank was 7 (maximum possible: 7)
## Model rank = 7 / 7
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time) 4.00 3.59 1.07 0.66
#summary(YoungJackPine.m)
#YoungJackPine_var <- var(YoungJackPine.m$residuals)
YoungJackPine_var <- var(residuals(YoungJackPine.m))
NDVI <- QC2000_NDVI
Indi0 <- c(rep("W",5),rep("S",15),rep("W",3))
Indi <- rep(Indi0,8)
Time <- rep(seq(1,366,by=16),8)
QC2000.m <- gam(QC2000_16day ~ s(Time, bs="cc", k=12) + NDVI + Indi)
gam.check(QC2000.m)
##
## Method: GCV Optimizer: magic
## Smoothing parameter selection converged after 10 iterations.
## The RMS GCV score gradiant at convergence was 1.120602e-07 .
## The Hessian was positive definite.
## The estimated model rank was 13 (maximum possible: 13)
## Model rank = 13 / 13
##
## Basis dimension (k) checking results. Low p-value (k-index<1) may
## indicate that k is too low, especially if edf is close to k'.
##
## k' edf k-index p-value
## s(Time) 1.00e+01 9.19e-09 1.16e+00 0.98
#summary(QC2000.m)
#QC2000_var <- var(QC2000.m$residuals)
QC2000_var <- var(residuals(QC2000.m))
FlxTwr <- c(OldAspen_16day,OldJackPine_16day,Southern_16day,YoungJackPine_16day,QC2000_16day)
ndvi <- c(OldAspen_NDVI,OldJackPine_NDVI,Southern_NDVI,YoungJackPine_NDVI,QC2000_NDVI)
time <- rep(seq(1,365,by=16),29)
Indi0 <- c(rep("W",5),rep("S",15),rep("W",3))
indi <- rep(Indi0,29)
spe <- c(rep("OldAspen",length(OldAspen_NDVI)),rep("OldJackPine",length(OldJackPine_NDVI)),
rep("BlackSpr",length(Southern_NDVI)),rep("YoungJackPine",length(YoungJackPine_NDVI)),rep("BlackSpr",length(QC2000_NDVI)) )
Weight <- c(rep(1/OldAspen_var,138),rep(1/OldJackPine_var,138),rep(1/Southern_var,138),rep(1/YoungJackPine_var,69),rep(1/QC2000_var,184))
w <- Weight/mean(Weight)
D <- data.frame(Flx=FlxTwr,Time=time,NDVI=ndvi,Spe=spe,W=w,Indi=indi)
rownames(D) <- paste0("row",1:nrow(D))
Mix <- gam(Flx ~ s(Time,bs="cc",k=23)+s(Time,bs="cc",k=23,by=Spe)+ NDVI + Indi ,weights = W,data = D)
Diff <- D[setdiff(rownames(D),names(predict(Mix))),]
Diff.row <- as.numeric(substr(rownames(Diff),4,6))
Mplot <- function(data,name){
label <- c("Jan 1","Jan 17","Feb 2","Feb 18","Mar 6","Mar 22","April 7","April 23","May 9","May 25","Jun 10","Jun 26","July 12","July 28","Aug 13","Aug 29","Sep 14","Sep 30","Oct 16","Nov 1","Nov 17","Dec 3","Dec 19")
monthplot(data,phase = seq_along(data)%%23+1,labels=label,ylab="FlxTwr")
abline(v=9,lty="dashed")
title(name)
}
daytime <- seq(1,365,by=16)
OA <- matrix(OldAspen_16day,nrow=23)
OAbasis <- create.bspline.basis(c(1,365),nbasis=13,norder=4,breaks=c(1,120,130,140,170,200,220,250,260,270,365))
OAbasismat <- eval.basis(daytime, OAbasis)
OAcoef <- solve(crossprod(OAbasismat,OAbasismat))%*%crossprod(OAbasismat,OA)
everyday <- 1:365
yhat <- eval.basis(everyday,OAbasis)%*%OAcoef
## Plot the 1st derivative for each year
Plot_1st_deri <- function(){
OAbasismat_1st <- eval.basis(everyday,OAbasis,1)
y_1st <- OAbasismat_1st%*%OAcoef
for(i in 1:6){
plot(yhat[,i],type="l",ylim=c(-6,3),main=paste("First derivative of Year",i))
points(y_1st[,i]*10)
abline(h=0,col=2)
}
}
## Mapping based on T* and G's
App <- function(TStars,Gpoints,eval.points=everyday){
approx(c(1,TStars,365),c(1,Gpoints,365),eval.points)$y
}
## Find the registed value of the curve
Regi <- function(Ti,yi,basis=OAbasis,basismat=OAbasismat){
OAcoef <- solve(crossprod(basismat,basismat))%*%crossprod(basismat,yi)
eval.basis(Ti,basis)%*%OAcoef
}
#Gmat <- Gmat_3landmarks # read the landmarks located by locator()
Gmat0 <- read.csv("/Users/fhe7/Documents/School/Oct_15 Gammon/G3landmarks.csv",header=T)
Gmat <- Gmat0[,-1]
## T*
TStars <- apply(Gmat,2,mean)
## Use "Map" to find the True time corresponding to the time in T* scale
Tmat <- matrix(NA,nrow=6,ncol=365)
for(i in 1:6){
Tmat[i,] <- App(TStars,Gmat[i,])
}
## Get the registed curve value based on the True time calulated above
Regimat <- matrix(NA,nrow=6,ncol=365)
for(i in 1:6){
Regimat[i,] <- Regi(Tmat[i,],OA[,i])
}
[1] Pick some of the T* (let’s say \(n\) of them, \(n = 10\) or more).
Evaluate each year at T*\(_1\) till T*\(_n\).
[2] Choose \(m\) landmarks.
[3] Each curve now has \(m+n\) values.
[4] Find the joint distribution.
[4-1] obs number \(= (n+m) \times 6\), as for the unobserved year 7, sample from multivariate normal
[4-2] Raw points \(\rightarrow\) Regi curve (shift) \(\rightarrow\) Fit again (fewer knots?) \(\rightarrow\) using the coefs?
TStarBody <- c(1,40,80,120.8694,137,154,174.1377,200,225,250,270.3034,300,330,365)
As we can see from above, that the simulated curves only catch some of the feature of the original ones.
The problem probably lies in that the samples that I chose from T* is no enough.
Then more points get sampled as below:
TSB1 <- sort(c(seq(1,365,by=16),TStars,365))
Details:
[1] Input is the one vector of sampled data from T*.
[2] Using the same ``map" to find the True time under T* scale
[3] Then using multivariate normal to find the registed value for each the sampled data
NextYear <- function(SampleOfTstars){
Tmat.new <- matrix(NA,nrow=6,ncol=length(SampleOfTstars))
for(i in 1:6){
Tmat.new[i,] <- App(TStars,Gmat[i,],SampleOfTstars)
}
Regimat.new <- matrix(NA,nrow=6,ncol=length(SampleOfTstars))
for(i in 1:6){
Regimat.new[i,] <- Regi(Tmat.new[i,],OA[,i]) ## looking at line 468
}
mean.vector <- apply(Regimat.new,2,mean)
var.mat <- var(Regimat.new)
mvrnorm(n=1,mu=mean.vector,Sigma=var.mat)
}
year7 <- NULL
pred.x <- 1:365
for(i in 1:100){
year7 <- rbind(year7,NextYear(pred.x))
}
Example:
## SEED = 18,20,21,57,59
## 2 low vallis 48,51
set.seed(48)
Year7 <- NextYear(pred.x)
plot(pred.x,Year7,type="l",ylim=c(-7,5), main="Special simulation of year 7",xlab="Registed time of the year",axes = FALSE,ylab="FlxTwr CO2 Data (umol/m2/s)")
labRegi <- c("Jan","Feb","March","April","May","June","July","Aug","Sep","Oct","Nov","Dec")
axis(side=1, at=c(1,32,60,91,121,152,182,213,244,274,305,335), labels= labRegi, las=1)
axis(side=2, at=seq(-7,5,by=2), lab=seq(-7,5,by=2), las=2)
abline(v=TSB1[c(9,13,20)],col=2)
NewExtrems <- c(120.1702,173.5792,255.1763)
TYear7 <- App(TStars,NewExtrems,eval.points = pred.x)
Bmat <- eval.basis(pred.x,OAbasis)
RegiYear7 <- Regi(TYear7,Year7,basis=OAbasis,basismat=Bmat)
plot(pred.x,RegiYear7,type="l",ylim=c(-7,5), main="Registration of Special simulation of year 7",xlab="Registed time of the year",axes = FALSE,ylab="FlxTwr CO2 Data (umol/m2/s)")
labRegi <- c("Jan","Feb","March","April","May","June","July","Aug","Sep","Oct","Nov","Dec")
axis(side=1, at=c(1,32,60,91,121,152,182,213,244,274,305,335), labels= labRegi, las=1)
axis(side=2, at=seq(-7,5,by=2), lab=seq(-7,5,by=2), las=2)
abline(v=TSB1[c(9,13,20)],col=2)
Why the curve has not been registed as expected.
[1] The simulated curve is not create by b-spline regression fit, but a simulation of multivariate distribution. Before, the 6 years curve is created by b-spline regression.
[2] If keep using the Regi() function to produce the registed curve it is not apply to the multivarite simulation but the red circles showed below.
OAcoef <- solve(crossprod(Bmat,Bmat))%*%crossprod(Bmat,Year7)
y <- eval.basis(pred.x,OAbasis)%*%OAcoef
plot(pred.x,Year7,type="l",ylim=c(-7,5), main="Special simulation of year 7",xlab="Registed time of the year",axes = FALSE,ylab="FlxTwr CO2 Data (umol/m2/s)")
labRegi <- c("Jan","Feb","March","April","May","June","July","Aug","Sep","Oct","Nov","Dec")
axis(side=1, at=c(1,32,60,91,121,152,182,213,244,274,305,335), labels= labRegi, las=1)
axis(side=2, at=seq(-7,5,by=2), lab=seq(-7,5,by=2), las=2)
abline(v=TSB1[c(9,13,20)],col=2)
points(y,col=2,cex=0.5)
One way to solve this problem is linear interpolation
plot(pred.x,approx(pred.x,Year7,TYear7)$y,main="Registration of Special simulation of year 7",xlab="Registed time of the year",axes = FALSE,ylab="FlxTwr CO2 Data (umol/m2/s)")
labRegi <- c("Jan","Feb","March","April","May","June","July","Aug","Sep","Oct","Nov","Dec")
axis(side=1, at=c(1,32,60,91,121,152,182,213,244,274,305,335), labels= labRegi, las=1)
axis(side=2, at=seq(-7,5,by=2), lab=seq(-7,5,by=2), las=2)
abline(v=TSB1[c(9,13,20)],col=2)
Data saved in Regimat: 6x365
[1] Smooth all 6 curves
[2] Register 6 curves, save the registed curves in matrix – Regimat (6x365)
[3] Smooth again based on the registed curves (sample the value at 1,17,…)
[4] Get the re-smoothed curves, save the coef in matrix – Regicoef (13x6)
[5] Generate next year through matrix Regicoef
#daytime <- seq(1,365,by=16)
OAbasis <- create.bspline.basis(c(1,365),nbasis=13,norder=4,breaks=c(1,120,130,140,170,200,220,250,260,270,365))
Regibasismat <- eval.basis(daytime, OAbasis)
Regicoef <- solve(crossprod(OAbasismat,OAbasismat))%*%crossprod(OAbasismat,t(Regimat[,daytime]))
everyday <- 1:365
yRegihat <- eval.basis(everyday,OAbasis)%*%Regicoef
coef.predict <- mvrnorm(n=1,mu=apply(Regicoef,1,mean),Sigma=var(t(Regicoef)))
y.predict <- eval.basis(everyday,OAbasis)%*%coef.predict
landmark_sim <-mvrnorm(n=10000,mu=TStars,Sigma=var(Gmat))
hist(landmark_sim[,1],xlim=c(0,365),main="Histogram of 3 simulated landmarks \n rep = 10000",col=1,breaks=30,xlab="3 landmarks")
hist(landmark_sim[,2],add=T,col=2,breaks=30)
hist(landmark_sim[,3],add=T,col=4,breaks=30)
sum(landmark_sim[,1]>landmark_sim[,2])
## [1] 3
backto_T <- NULL
for(i in 1:50){
backto_T <- rbind(backto_T,App(TStars,landmark_sim[i,],1:365))
}
[1] Regicoef (13 x 6)
[2] Gmat (6 x 3)
[3] Combine these 2 matrices together and simulate data from MVN
[4] Then use the same way (only different number) back to real time scale.
CoefGmat <- cbind(t(Regicoef),Gmat)
Toge_sim <-mvrnorm(n=50,mu=apply(CoefGmat,2,mean),Sigma=var(CoefGmat))
## Toge_sim (50 x (13 + 3)) will be used in the Together() as input
Together <- function(data){
y.predict <- eval.basis(everyday,OAbasis)%*%data[1:13]
backto_T <- App(TStars,data[14:16],1:365)
list(pred=y.predict,time=backto_T)
}
plot(yhat[,1],type="l",ylim=c(-7,5),main="15 simulated years",
axes = FALSE,xlab="Real time of the year",ylab="FlxTwr CO2 Data (umol/m2/s)")
labRegi <- c("Jan","Feb","March","April","May","June","July","Aug","Sep","Oct","Nov","Dec")
axis(side=1, at=c(1,32,60,91,121,152,182,213,244,274,305,335), labels= labRegi, las=1)
axis(side=2, at=seq(-7,5,by=2), lab=seq(-7,5,by=2), las=2)
for(i in 2:6){
lines(yhat[,i])
}
for(i in 1:15){
simulation <- Together(Toge_sim[i,])
lines(simulation[[2]],simulation[[1]],col=i,lty="dashed")
}