## Loading required package: nlme
## This is mgcv 1.8-15. For overview type 'help("mgcv-package")'.
library(RgoogleMaps)
for(i in names(table(SiteInfo$Vegetation))){
Lat <- SiteInfo$Lat[SiteInfo$Vegetation==i]
Long <- SiteInfo$Long[SiteInfo$Vegetation==i]
## Plot sampling points on map ----
MyMap <- GetMap.bbox(lonR = range(SiteInfo$Long), latR = range(SiteInfo$Lat),size=c(640,640), maptype = "terrain")
# options for map type are "roadmap","mobile","satellite","terrain" and "hybrid"
PlotOnStaticMap(MyMap)
convert_points <- LatLon2XY.centered(MyMap, Lat, Long)
points(convert_points$newX, convert_points$newY, col = 'red', pch=19)
NAMES <- SiteInfo$Site[SiteInfo$Vegetation==i]
text(convert_points$newX, convert_points$newY+16,NAMES,cex=0.6)
text(min(SiteInfo$Lat),min(SiteInfo$Long),paste("Species is",i))
}
length(SiteInfo$Vegetation)
## [1] 157
table(SiteInfo$Vegetation)
##
## CRO CSH DBF EBF ENF GRA MF OSH SAV WET WSA
## 19 6 17 3 50 23 6 17 1 11 4
length(unique(SiteInfo$Vegetation))
## [1] 11
#EachSpecies <- names(sort(table(SiteInfo$Vegetation),decreasing = T))
# SiteInfo$MaintainYear <- SiteInfo$End-SiteInfo$Start +1
# TrainSelection <- sort(SiteInfo,f= ~ Vegetation - MaintainYear)
EachSpecies <- names(sort(table(SiteInfo$Vegetation),decreasing = T))
for(i in EachSpecies){
Index <- (Full.Data$species==i)
SubData <- Full.Data[Index,]
SortData <- sort(SubData,f= ~ elevation)
SiteName <- unique(as.character(SortData$site))
SiteNumber <-table(as.character(SortData$site))
D = data.frame(SiteNumber)[rank(SiteName),]
SiteMarker <- cumsum(D$Freq)
X <- SortData$flux
SubInfo <- SiteInfo[SiteInfo$Vegetation==i,]
SortInfo <- sort(SubInfo, f=~ Elevation.m.)
plot(X,pch=20,cex=0.3,main=i,ylab=i,ylim=c(-20,6))
abline(v=SiteMarker,lty="dashed",col="grey")
text(SiteMarker-0.5*SiteNumber[rank(SiteName)],4, SiteName)
text(SiteMarker-0.5*SiteNumber[rank(SiteName)],-15,as.character(SortInfo$Elevation.m.))
print(round(SortInfo$Elevation.m.,0))
}
Mod.Train1 <- gam(flux ~ s(year,bs="cc",k=23)
+s(year,bs="cc",k=23,by=species,m=1)
+species+s(lat,bs="cr")
+s(long,bs="cr")
+indicator, data=Train.data)
summary(Mod.Train1)
PlotModRes(residuals(Mod.Train1),pch=20,cex=0.3)
Pred1 <- predict(Mod.Train1,Test.data[,-c(1,2)])
PlotPredRes(Pred1)
Mod.Train2 <- gam(flux ~ s(year,bs="cc",k=23)
+s(year,bs="cc",k=23,by=species,m=1)
+species+s(lat,bs="cr")
+s(long,bs="cr")
+indicator
+s(airtemp,bs="cr",k=23),data=Train.data,
na.action = na.exclude)
summary(Mod.Train2)
PlotModRes(residuals(Mod.Train2))
Pred2 <- predict(Mod.Train2,Test.data[,-c(1,2)])
PlotPredRes(Pred=Pred2,Obs=Obs)
# Mod.Train3 <- gam(flux ~ s(year,bs="cs",k=5) + species
# + s(year,bs="cr",k=15,by=species,m=1)
# +s(lat,bs="cr")
# + long
# + indicator
# + elevation
# + s(dayyear,bs="cc",k=15)
# +s(airtemp,bs="cc",k=15),data=Train.data,
# na.action = na.exclude)
Mod.Train3 <- gam(flux ~ year
+ species
+ lat
+ long
+ indicator
+ elevation
+ dayyear
+ airtemp
+ year:species + year:lat + year:indicator + year:elevation + year:airtemp
+ species:lat + species:long + species:indicator + species:elevation + species:airtemp
+ lat:long + lat:indicator + lat:elevation + lat:dayyear + lat:airtemp
+ long:indicator + long:elevation + long:airtemp
+ indicator:elevation + indicator:dayyear + indicator:airtemp
+ elevation:dayyear + elevation:airtemp,
data=Train.data,
na.action = na.exclude)
summary(Mod.Train3)
PlotModRes(residuals(Mod.Train3))
Pred3 <- predict(Mod.Train3,Test.data[,-c(1,2)])
PlotPredRes(Obs,Pred3)
Mod.Train4 <- gam(flux ~ s(year,bs="tp",k=10)
+ species
+ s(lat,bs="tp",k=10)
+ long
+ indicator
+ s(elevation,bs="tp",k=10)
+ s(dayyear,bs="cc",k=10)
+ s(airtemp,bs="tp",k=10)
+ species:lat + s(long,by=species) + species:indicator + s(elevation,by=species) + s(airtemp,by=species)
+ long:elevation
+ indicator:airtemp
+ elevation:airtemp,
data=Train.data,
na.action = na.exclude)
summary(Mod.Train4)
PlotModRes(residuals(Mod.Train4))
Pred4 <- predict(Mod.Train4,Test.data[,-c(1,2)])
PlotPredRes(Obs,Pred4)
Mod.Train5 <- gam(flux ~ s(year,bs="tp",k=10)
+ s(lat,k=10)
+ s(long, k=10)
+ s(elevation,k=10)
+ s(airtemp,by=species,k=10) # use full
+ s(dayyear,bs="cc",k=10)
+ s(airtemp,k=10),
data=Train.data,
na.action = na.exclude)
plot(Mod.Train5)
summary(Mod.Train5)
Mod.Train6 <- gam(flux ~
#+ s(dayyear,bs="cc",k=10)
s(elevation,bs="tp")
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp) # to use this the main effect has to be smoothed?
+ s(lat,long,bs="ts") # increased 3%
# + s(airtemp,by=species,k=10) # EBF:weired
,
data=Train.data,
na.action = na.exclude)
plot(Mod.Train6)
summary(Mod.Train6)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## flux ~ s(elevation, bs = "tp") + s(airtemp, bs = "tp") + ti(elevation,
## airtemp) + s(lat, long, bs = "ts")
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.97110 0.01875 -51.79 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(elevation) 8.898 8.995 57.16 <2e-16 ***
## s(airtemp) 8.818 8.990 421.55 <2e-16 ***
## ti(elevation,airtemp) 15.776 15.975 78.14 <2e-16 ***
## s(lat,long) 28.496 29.000 94.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.409 Deviance explained = 41.2%
## GCV = 3.2333 Scale est. = 3.2191 n = 14337
Pred6 <- predict(Mod.Train6,Test.data[,-c(1,2)])
PlotPredRes(Obs,Pred6)
Res.train <- residuals(Mod.Train6)
Res.test <- Pred6 - Test.data$flux
PlotResTo(Res.train,Res.test)
Test.Site.noEBF <- c("US-Bo1","US-Ne1","US-ARM","US-Ro1", # CRO
"US-Ced", # CSH
"US-Ha1","US-WCr","US-MMS", # DBF
"US-Ho1","US-Wrc","US-NR1","CA-TP1","US-GLE","US-Me2","US-SP1", # ENF
"US-Var","US-Wkg","US-FPe", # GRA
"US-PFa", # MF
"US-Whs","US-Ses","US-SRC", # OSH
"US-Los","US-Brw", # WET
"US-Ton" # WSA
)
Full.Data.noEBF <- Full.Data[-which(Full.Data$species=="EBF"),]
Train.data.noEBF <- subset(Full.Data.noEBF,!(site %in% Test.Site.noEBF))
Test.data.noEBF <- subset(Full.Data.noEBF,(site %in% Test.Site.noEBF))
Obs.noEBF <- Test.data.noEBF[,2]
Mod.Train7 <- gam(flux ~ #s(year,bs="tp",k=10)
# + s(dayyear,bs="cc",k=10)
# + s(elevation,bs="tp")
# + s(airtemp,bs="tp")
# + species
# to use this the main effect has to be smoothed? No
+ ti(elevation,airtemp)
+ s(lat,long,bs="tp") # "tp": isotropy
+ s(airtemp,by=species,k=10) # EBF:weired
,
data=Train.data.noEBF,
na.action = na.exclude)
plot(Mod.Train7)
summary(Mod.Train7)
You must enable Javascript to view this page properly.
You must enable Javascript to view this page properly.
You must enable Javascript to view this page properly.
You must enable Javascript to view this page properly.
Iter.Weighted <- function(Spe,K,tolerance=0.01,n=30){
Train.sub <- Train.data[Train.data$species==Spe,]
Mod.Train <- gam(flux ~ s(elevation,bs="tp",k=K)
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp,k=K)
+ s(lat,long,bs="ts",k=K),
data=Train.sub,
na.action = na.exclude)
Res.Train <- residuals(Mod.Train)
SD.current0 <- sd(Res.Train,na.rm=T)
SD.Total0=SD.current0
Train.sub1 <- Train.sub
Dim <- dim(Train.sub1[,2:5])
Iter <- matrix(c(0,0,SD.current0,SD.Total0),nrow=2,byrow = T)
i = 1
while(i < n && abs(Iter[i+1,1] - Iter[i,1])>tolerance){
Mod.Train1 <- gam(flux ~ s(elevation,bs="tp",k=K)
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp,k=K)
+ s(lat,long,bs="ts",k=K),
weights = rep(1/Iter[i+1,2],Dim[1]),
data=Train.sub1,
na.action = na.exclude)
Res.train1 <- residuals(Mod.Train1)
SD.current <- sd(Res.train1,na.rm=T)
SD.Total <- SD.current*Iter[i+1,2]
Iter <- rbind(Iter,c(SD.current,SD.Total))
i <- i+1
}
R <- rbind(range(Res.train,na.rm = T),range(Res.train1,na.rm = T))
y.l <- min(R[,1])
y.u <- max(R[,2])
par(mfrow=c(2,2))
plot(Iter[-1,1],type = "b",main="sd(residuals) of each model")
plot(Iter[-1,2],type="b",main="Accumulate sd(residuals)")
plot(residuals(Mod.Train),pch=20,main="Original Model",ylim=c(y.l,y.u))
abline(h=0,col=2)
plot(residuals(Mod.Train1),pch=20,main="Weighted Model",ylim=c(y.l,y.u))
abline(h=0,col=2)
return(tail(Iter))
}
##
Iter.Weighted("CRO",7) # w=7.565 # differences are noticiable
Iter.Weighted("CSH",4) # w=0.82
Iter.Weighted("DBF",7) # w=2.239
Iter.Weighted("ENF",7) # w=1.775
Iter.Weighted("GRA",10) # w=1.54
Iter.Weighted("MF",4) # w=0.91
Iter.Weighted("OSH",7) # w=0.411
Iter.Weighted("WET",5) # w=1.58
# A term has fewer unique covariate combinations than specified maximum degrees of freedom
Iter.Weighted("WSA",3) ## 4 sites
Iter.Weighted("SAV",1) ## 1 site
Iter.Weighted("EBF",3) ## 3 sites
Species.Weight <- c("WSA","SAV","EBF")
D.Weight <- subset(Train.data, !(species %in% Species.Weight))
Mod.NoWeight <- gam(flux ~ s(elevation,bs="tp")
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp)
+ s(lat,long,bs="ts"),
data=D.Weight,
na.action = na.exclude)
WEIGHTS <- c(7.565, 0.82, 2.239, 1.775, 1.54, 0.91, 0.411, 1.58)
TIMES <- table(as.character(D.Weight$species))
Mod.Weight <- gam(flux ~ s(elevation,bs="tp")
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp)
+ s(lat,long,bs="ts"),
weights = rep(WEIGHTS,TIMES),
data=D.Weight,
na.action = na.exclude)
par(mfrow=c(1,2))
plot(residuals(Mod.NoWeight),ylim=c(-20,14),pch=20)
abline(h=0,col=2)
plot(residuals(Mod.Weight),ylim=c(-20,14),pch=20)
abline(h=0,col=2)
Plot 11 curves together.
K=7
Rep.Spe <- table(Train.data$species)
Train.data.sort.spe <- sort(Train.data,f=~ species)
Mod.Train.all <- gam(flux ~ s(elevation,bs="tp",k=K)
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp,k=K)
+ s(lat,long,bs="ts",k=K),
data=Train.data.sort.spe,
na.action = na.exclude)
Res.Train <- residuals(Mod.Train.all)
SD.all <- aggregate(Res.Train,list(Train.data$species),sd,na.rm=T)
all(abs(SD.all$x-1)<0.2)
## [1] FALSE
SD.all.current <- SD.all$x
SD.current.mat <- cbind(SD.all[,1],1,SD.all[,2])
SD.cumulate.mat <- cbind(SD.all[,1],1,SD.all[,2])
i = 1
while(any(abs(SD.current.mat[,i+2]-1)>0.1) && i < 10){
Wei <- 1/(SD.cumulate.mat[,i+2])
#Wei <- SD.cumulate.mat[,i+2]
Mod.Train.all1 <- gam(flux ~ s(elevation,bs="tp",k=K)
+ s(airtemp,bs="tp")
+ ti(elevation,airtemp,k=K)
+ s(lat,long,bs="ts",k=K),
data=Train.data.sort.spe,
weights = rep(Wei,Rep.Spe),
na.action = na.exclude)
SD.current1 <- aggregate(residuals(Mod.Train.all1),list(Train.data.sort.spe$species),sd,na.rm=T)$x
SD.current.mat <- cbind(SD.current.mat,SD.current1)
SD.cumulate.mat <- cbind(SD.cumulate.mat,SD.cumulate.mat[,i+2]*SD.current1)
i = i+1
}
rownames(SD.current.mat) <- SD.all$Group.1
rownames(SD.cumulate.mat) <- SD.all$Group.1
SD.current.mat
## SD.current1 SD.current1 SD.current1 SD.current1
## CRO 1 1 1.8807877 2.2815558 1.5365464 1.2539004 1.1287779
## CSH 2 1 1.6996088 0.9102803 0.9045778 0.9219108 0.9424108
## DBF 3 1 1.4350301 1.6584936 1.3298278 1.1814243 1.1045327
## EBF 4 1 2.6872641 0.6986765 0.7949216 0.8729979 0.9255682
## ENF 5 1 1.9332676 1.1306433 1.0454558 1.0150689 1.0041313
## GRA 6 1 1.4949151 1.3119855 1.1118459 1.0372933 1.0089011
## MF 7 1 2.2843708 0.8589128 0.9125857 0.9502833 0.9731040
## OSH 8 1 2.2841389 0.6483971 0.7674227 0.8480988 0.9018695
## SAV 9 1 0.6801692 0.5535177 0.7389239 0.8577099 0.9252731
## WET 10 1 1.8153329 1.0449904 1.0383341 1.0318825 1.0253295
## WSA 11 1 1.1057546 0.8346410 0.9006926 0.9463136 0.9738830
## SD.current1
## CRO 1.0681855
## CSH 0.9597870
## DBF 1.0618256
## EBF 0.9577808
## ENF 1.0004263
## GRA 0.9987603
## MF 0.9859413
## OSH 0.9371762
## SAV 0.9614874
## WET 1.0192159
## WSA 0.9890670
SD.cumulate.mat
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8]
## CRO 1 1 1.8807877 4.2911221 6.5935081 8.2676024 9.3322872 9.9686136
## CSH 2 1 1.6996088 1.5471205 1.3994908 1.2902057 1.2159038 1.1670087
## DBF 3 1 1.4350301 2.3799883 3.1649747 3.7391781 4.1300444 4.3853871
## EBF 4 1 2.6872641 1.8775283 1.4924879 1.3029388 1.2059587 1.1550441
## ENF 5 1 1.9332676 2.1858361 2.2851951 2.3196305 2.3292135 2.3302065
## GRA 6 1 1.4949151 1.9613069 2.1806710 2.2619954 2.2821296 2.2793005
## MF 7 1 2.2843708 1.9620752 1.7905617 1.7015409 1.6557762 1.6324981
## OSH 8 1 2.2841389 1.4810289 1.1365753 0.9639281 0.8693373 0.8147222
## SAV 9 1 0.6801692 0.3764857 0.2781943 0.2386100 0.2207794 0.2122767
## WET 10 1 1.8153329 1.8970053 1.9697253 2.0325250 2.0840079 2.1240540
## WSA 11 1 1.1057546 0.9229082 0.8312566 0.7866294 0.7660850 0.7577093
## Compare residuals
par(mfrow=c(1,2))
plot(Res.Train,pch=20,cex=0.3)
abline(h=0, col=2)
abline(v=cumsum(Rep.Spe),col="pink",lty="dashed")
plot(residuals(Mod.Train.all1),pch=20,cex=0.3)
abline(h=0, col=2)
abline(v=cumsum(Rep.Spe),col="pink",lty="dashed")
## Show the current sd and acumulated sd step by step
Col = rainbow(11)
N <- dim(SD.current.mat)[2]-2
plot(1:N,SD.current.mat[1,-c(1,2)],type="l",ylim=c(0,3.5),xlab="Iteration Time",ylab="Standard Error",main="Current SE of Each Species",col=Col[1])
abline(h=1,col=1,lty="dashed")
for(i in 2:11){
lines(1:N,SD.current.mat[i,-c(1,2)],col=Col[i])
}
legend("topright",legend = SD.all$Group.1, col=Col,pch=20,cex=0.7)
plot(1:N,SD.cumulate.mat[1,-c(1,2)],type="l",ylim=c(0,10),
xlab="Iteration Time",ylab="Standard Error",
main="Acumulate SE of Each Species",col=Col[1])
abline(h=1,col=1,lty="dashed")
for(i in 2:11){
lines(1:N,SD.cumulate.mat[i,-c(1,2)],col=Col[i])
}
legend("topleft",legend = SD.all$Group.1, col=Col,pch=20,cex=0.7)
Method A: Modeling each species locally and iteratively modify the weights.
Method B: Modeling all the species together, calculate the s.e. by each species, and iteratively modify the weights.
Method.A <- c(7.565,0.82,2.239,NA,1.775,1.54,0.91,0.411,NA,1.58,NA)
Method.B <- SD.cumulate.mat[,dim(SD.cumulate.mat)[2]]
SE <- cbind(Method.A,Method.B)
SE
## Method.A Method.B
## CRO 7.565 9.9686136
## CSH 0.820 1.1670087
## DBF 2.239 4.3853871
## EBF NA 1.1550441
## ENF 1.775 2.3302065
## GRA 1.540 2.2793005
## MF 0.910 1.6324981
## OSH 0.411 0.8147222
## SAV NA 0.2122767
## WET 1.580 2.1240540
## WSA NA 0.7577093