Set WD
#setwd("C:/Users/roloff/Desktop/Projects/SaultTribeMarten")
setwd("~/Michigan_State/Marten2/MartScale_090219")
Load Libraries
Needed.packages = c("rgdal", "raster", "corrplot", "spdep",
"rgeos","maptools","mapproj","GISTools",
"sp","ggplot2","ggthemes","plyr","dplyr",
"kableExtra")
for(p in Needed.packages){
if(!require(p, character.only = TRUE)) install.packages(p)
suppressMessages(library(p, character.only = TRUE))
}
## Loading required package: rgdal
## Loading required package: sp
## rgdal: version: 1.4-4, (SVN revision 833)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.2.3, released 2017/11/20
## Path to GDAL shared files: C:/Program Files/R/R-3.5.3/library/rgdal/gdal
## GDAL binary built with GEOS: TRUE
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: C:/Program Files/R/R-3.5.3/library/rgdal/proj
## Linking to sp version: 1.3-1
## Loading required package: raster
## Loading required package: corrplot
## corrplot 0.84 loaded
## Loading required package: spdep
## Loading required package: spData
## To access larger datasets in this package, install the spDataLarge
## package with: `install.packages('spDataLarge',
## repos='https://nowosad.github.io/drat/', type='source')`
## Loading required package: sf
## Linking to GEOS 3.6.1, GDAL 2.2.3, PROJ 4.9.3
## Loading required package: rgeos
## rgeos version: 0.5-1, (SVN revision 614)
## GEOS runtime version: 3.6.1-CAPI-1.10.1
## Linking to sp version: 1.3-1
## Polygon checking: TRUE
## Loading required package: maptools
## Checking rgeos availability: TRUE
## Loading required package: mapproj
## Loading required package: maps
## Loading required package: GISTools
## Loading required package: RColorBrewer
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:raster':
##
## area, select
##
## Attaching package: 'GISTools'
## The following object is masked from 'package:maps':
##
## map.scale
## Loading required package: ggplot2
## Loading required package: ggthemes
## Loading required package: plyr
##
## Attaching package: 'plyr'
## The following object is masked from 'package:maps':
##
## ozone
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:rgeos':
##
## intersect, setdiff, union
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
#INLA not on Cran
if(!require("INLA", character.only = TRUE)) install.packages("INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE)
## Loading required package: INLA
## Loading required package: Matrix
## This is INLA_18.07.12 built 2018-07-12 11:05:18 UTC.
## See www.r-inla.org/contact-us for how to get help.
suppressMessages(library("INLA", character.only = TRUE))
Load Data
#martenscl <- read.csv("Marten_Scale.csv", header=T)
martenscl = read.csv("Marten_Modeling.csv", header=T)
head(martenscl)
## FID_1 Field1 GMT_Time Duration DOP Satellites id Longitude Latitude
## 1 0 26816 4 6.0 4 5516m 658223.8 5094504
## 2 1 26916 4 7.2 4 5516m 657887.5 5094635
## 3 2 26716 68 4.8 4 5516m 658432.3 5094552
## 4 7 21121 35 3.2 4 5516m 656835.5 5095063
## 5 3 6947 25 7.6 4 5516m 656981.3 5094983
## 6 4 11666 23 7.0 4 5516m 657008.5 5095027
## RASTERVALU Area Area_MC90 Area_HO90 Area_YC90 Area_HO120
## 1 0.69615316 0.45248816 0.00000 0 0.0000 0
## 2 0.53853881 0.99558746 2053.13217 0 0.0000 0
## 3 0.74680561 1.00000000 0.00000 0 0.0000 0
## 4 0.04130336 0.08082647 97.77849 0 240.2753 0
## 5 0.05860979 0.94163814 0.00000 0 0.0000 0
## 6 0.04371836 0.94163814 0.00000 0 0.0000 0
## Area_YC120 Area_MC120 Area_HO150 Area_YC150 Area_MC150 Dist_River
## 1 0.0000 855.1882 1040.88852 0.000 0.0000 934.0391
## 2 0.0000 1887.5735 93.76815 0.000 3487.1855 774.2151
## 3 0.0000 0.0000 0.00000 0.000 0.0000 872.2185
## 4 652.9674 616.2176 0.00000 1618.857 248.5517 338.2444
## 5 0.0000 0.0000 0.00000 0.000 0.0000 385.0158
## 6 0.0000 0.0000 0.00000 0.000 0.0000 435.7748
## Dist_Paved Dist_Unpav Area_AC90 Area_AH90 Area_AM90 Area_AC120
## 1 429.95079 3013.808 0.0000 191.5467 0 855.1882
## 2 330.92218 2684.197 2053.1315 144.6394 0 2062.2441
## 3 597.21109 3135.752 0.0000 0.0000 0 0.0000
## 4 15.92333 1719.551 502.5996 690.6044 0 1446.2554
## 5 42.97241 1859.207 0.0000 0.0000 0 0.0000
## 6 93.98086 1835.544 0.0000 0.0000 0 0.0000
## Area_AH120 Area_AM120 Area_AC150 Area_AH150 Area_AM150 Area_YH90
## 1 367.6649 0.0000 0.000 0.000 0.0000 0
## 2 1970.0284 191.5466 3602.865 1718.269 191.5466 0
## 3 0.0000 0.0000 0.000 0.000 0.0000 0
## 4 2889.3007 0.0000 2189.179 9165.870 0.0000 0
## 5 188.1937 0.0000 0.000 1556.017 0.0000 0
## 6 188.1937 0.0000 0.000 1556.017 0.0000 0
## Area_MH90 Area_MH120 Area_YH120 Area_MH150 Area_YH150
## 1 191.5467 367.6649 0 0.000 0
## 2 144.6394 1970.0284 0 1718.269 0
## 3 0.0000 0.0000 0 0.000 0
## 4 690.6044 2889.3007 0 9165.867 0
## 5 0.0000 188.1937 0 1556.017 0
## 6 0.0000 188.1937 0 1556.017 0
length(unique(martenscl$id))
## [1] 13
unique(martenscl$id)
## [1] 5516m 6977m 5529m 8217m houdini 5509m 5517m 1114m
## [9] 5538m 7004m 2254m 5496m 5064f
## 13 Levels: 1114m 2254m 5064f 5496m 5509m 5516m 5517m 5529m 5538m ... houdini
Extract month and year from GMT_Time field
# Julian date variable
mart_time <- read.csv("mart_TimeFormat.csv", header=T)
colnames(mart_time)[1] <- "Field1"
marten <- merge(martenscl, mart_time, by=c("Field1"))
marten$moda <- as.character(substr(marten$GMT.Time,1,5))
marten$moda <- as.Date(marten$moda, format="%m/%d")
marten$jdate <- as.numeric(strftime(marten$moda, format="%j"))
# Year for random effect
marten$year <- as.character(substr(marten$GMT.Time,7,10))
unique(marten$year)
## [1] "2016" "2017"
Scale and Center environmental variables (columns 12:38)
Cov.scale = marten[,12:38]
head(Cov.scale)
## Area_MC90 Area_HO90 Area_YC90 Area_HO120 Area_YC120 Area_MC120
## 1 1056.9237 0 0 0 0 1056.9237
## 2 1416.1906 0 0 0 0 172.8011
## 3 0.0000 0 0 0 0 172.8011
## 4 0.0000 0 0 0 0 172.8011
## 5 0.0000 0 0 0 0 172.8011
## 6 448.0824 0 0 0 0 907.9136
## Area_HO150 Area_YC150 Area_MC150 Dist_River Dist_Paved Dist_Unpav
## 1 0 0 3125.662 552.8939 9.986318 185.04661
## 2 0 0 1662.822 484.9658 69.843462 129.56449
## 3 0 0 0.000 428.3420 136.272916 59.43180
## 4 0 0 0.000 426.5506 136.003795 60.37133
## 5 0 0 0.000 437.2093 134.323876 59.44017
## 6 0 0 2332.427 870.4423 144.553752 338.78825
## Area_AC90 Area_AH90 Area_AM90 Area_AC120 Area_AH120 Area_AM120
## 1 1056.9245 5915.042 0.0000 1056.9237 11261.336 0.000
## 2 1416.1899 1795.211 207.1842 172.8011 9522.005 1290.248
## 3 0.0000 5721.567 316.7537 172.8011 9522.005 1290.248
## 4 0.0000 5721.567 316.7537 172.8011 9522.005 1290.248
## 5 0.0000 5721.567 316.7537 172.8011 9522.005 1290.248
## 6 448.0834 6769.907 209.8804 907.9136 11238.664 1228.853
## Area_AC150 Area_AH150 Area_AM150 Area_YH90 Area_MH90 Area_MH120
## 1 3125.662 11137.01 0.00 0 5915.042 11261.336
## 2 1662.822 12773.32 0.00 0 1795.211 9522.004
## 3 0.000 17457.06 0.00 0 5722.009 9522.004
## 4 0.000 17457.06 0.00 0 5722.009 9522.004
## 5 0.000 17457.06 0.00 0 5722.009 9522.004
## 6 2332.427 16405.54 2225.36 0 6769.907 11238.664
## Area_YH120 Area_MH150 Area_YH150
## 1 0 11137.01 0.00000
## 2 0 11349.86 94.82599
## 3 0 16711.75 0.00000
## 4 0 16711.75 0.00000
## 5 0 16711.75 0.00000
## 6 0 16405.54 0.00000
for(i in 1:ncol(Cov.scale)){
Cov.scale[,i] = as.numeric(scale(Cov.scale[,i], scale = T, center = T))
}
names(Cov.scale) = paste("s_", names(Cov.scale), sep="")
#Cov.scale$s_jdate = as.numeric(scale(marten$jdate, scale = T, center = T))
#Add scaled version to data frame with an "s" in front of column name
Mod.cov.df = cbind(marten, Cov.scale)
head(Mod.cov.df)
## Field1 FID_1 GMT_Time Duration.x DOP.x Satellites.x id.x Longitude.x
## 1 18 8044 59 2.4 4 2254m 675505.0
## 2 19 8074 6 1.8 4 2254m 675562.8
## 3 20 8053 3 1.8 4 2254m 675631.1
## 4 21 8054 2 2.2 4 2254m 675630.5
## 5 22 8055 2 2.8 4 2254m 675630.1
## 6 23 7806 9 3.4 4 2254m 675362.8
## Latitude.x RASTERVALU Area Area_MC90 Area_HO90 Area_YC90 Area_HO120
## 1 5136966 0.12328015 0.9583898 1056.9237 0 0 0
## 2 5137027 0.19138694 1.0000000 1416.1906 0 0 0
## 3 5136975 0.21412432 0.6456566 0.0000 0 0 0
## 4 5136985 0.21412432 0.6456566 0.0000 0 0 0
## 5 5136948 0.21412432 0.6456566 0.0000 0 0 0
## 6 5136545 0.02193207 1.0000000 448.0824 0 0 0
## Area_YC120 Area_MC120 Area_HO150 Area_YC150 Area_MC150 Dist_River
## 1 0 1056.9237 0 0 3125.662 552.8939
## 2 0 172.8011 0 0 1662.822 484.9658
## 3 0 172.8011 0 0 0.000 428.3420
## 4 0 172.8011 0 0 0.000 426.5506
## 5 0 172.8011 0 0 0.000 437.2093
## 6 0 907.9136 0 0 2332.427 870.4423
## Dist_Paved Dist_Unpav Area_AC90 Area_AH90 Area_AM90 Area_AC120
## 1 9.986318 185.04661 1056.9245 5915.042 0.0000 1056.9237
## 2 69.843462 129.56449 1416.1899 1795.211 207.1842 172.8011
## 3 136.272916 59.43180 0.0000 5721.567 316.7537 172.8011
## 4 136.003795 60.37133 0.0000 5721.567 316.7537 172.8011
## 5 134.323876 59.44017 0.0000 5721.567 316.7537 172.8011
## 6 144.553752 338.78825 448.0834 6769.907 209.8804 907.9136
## Area_AH120 Area_AM120 Area_AC150 Area_AH150 Area_AM150 Area_YH90
## 1 11261.336 0.000 3125.662 11137.01 0.00 0
## 2 9522.005 1290.248 1662.822 12773.32 0.00 0
## 3 9522.005 1290.248 0.000 17457.06 0.00 0
## 4 9522.005 1290.248 0.000 17457.06 0.00 0
## 5 9522.005 1290.248 0.000 17457.06 0.00 0
## 6 11238.664 1228.853 2332.427 16405.54 2225.36 0
## Area_MH90 Area_MH120 Area_YH120 Area_MH150 Area_YH150 GMT.Time
## 1 5915.042 11261.336 0 11137.01 0.00000 03/05/2016 19:30
## 2 1795.211 9522.004 0 11349.86 94.82599 03/05/2016 19:45
## 3 5722.009 9522.004 0 16711.75 0.00000 03/05/2016 20:00
## 4 5722.009 9522.004 0 16711.75 0.00000 03/05/2016 20:15
## 5 5722.009 9522.004 0 16711.75 0.00000 03/05/2016 20:30
## 6 6769.907 11238.664 0 16405.54 0.00000 03/05/2016 20:45
## Duration.y DOP.y Satellites.y id.y Longitude.y Latitude.y moda
## 1 59 2.4 4 2254m 675505.0 5136966 2019-03-05
## 2 6 1.8 4 2254m 675562.8 5137027 2019-03-05
## 3 3 1.8 4 2254m 675631.1 5136975 2019-03-05
## 4 2 2.2 4 2254m 675630.5 5136985 2019-03-05
## 5 2 2.8 4 2254m 675630.1 5136948 2019-03-05
## 6 9 3.4 4 2254m 675362.8 5136545 2019-03-05
## jdate year s_Area_MC90 s_Area_HO90 s_Area_YC90 s_Area_HO120 s_Area_YC120
## 1 64 2016 -0.4264822 -0.2371896 -0.2464141 -0.2452936 -0.2643398
## 2 64 2016 -0.3046249 -0.2371896 -0.2464141 -0.2452936 -0.2643398
## 3 64 2016 -0.7849728 -0.2371896 -0.2464141 -0.2452936 -0.2643398
## 4 64 2016 -0.7849728 -0.2371896 -0.2464141 -0.2452936 -0.2643398
## 5 64 2016 -0.7849728 -0.2371896 -0.2464141 -0.2452936 -0.2643398
## 6 64 2016 -0.6329908 -0.2371896 -0.2464141 -0.2452936 -0.2643398
## s_Area_MC120 s_Area_HO150 s_Area_YC150 s_Area_MC150 s_Dist_River
## 1 -0.6041284 -0.2731015 -0.2862721 -0.4192066 -0.31499693
## 2 -0.7792977 -0.2731015 -0.2862721 -0.6114912 -0.39484946
## 3 -0.7792977 -0.2731015 -0.2862721 -0.8300625 -0.46141313
## 4 -0.7792977 -0.2731015 -0.2862721 -0.8300625 -0.46351898
## 5 -0.7792977 -0.2731015 -0.2862721 -0.8300625 -0.45098930
## 6 -0.6336514 -0.2731015 -0.2862721 -0.5234742 0.05829504
## s_Dist_Paved s_Dist_Unpav s_Area_AC90 s_Area_AH90 s_Area_AM90
## 1 -1.1602711 -1.190270 -0.5225447 1.31680824 -0.1746501
## 2 -1.0155286 -1.202675 -0.4097534 -0.05017898 0.4627365
## 3 -0.8548934 -1.218356 -0.8543658 1.25261200 0.7998185
## 4 -0.8555442 -1.218146 -0.8543658 1.25261200 0.7998185
## 5 -0.8596064 -1.218354 -0.8543658 1.25261200 0.7998185
## 6 -0.8348692 -1.155896 -0.7136902 1.60045785 0.4710309
## s_Area_AC120 s_Area_AH120 s_Area_AM120 s_Area_AC150 s_Area_AH150
## 1 -0.6994525 1.504999 -0.2199738 -0.5333788 0.7262574
## 2 -0.8592917 1.172298 2.4522865 -0.7066839 0.9305782
## 3 -0.8592917 1.172298 2.4522865 -0.9036811 1.5154190
## 4 -0.8592917 1.172298 2.4522865 -0.9036811 1.5154190
## 5 -0.8592917 1.172298 2.4522865 -0.9036811 1.5154190
## 6 -0.7263918 1.500662 2.3251307 -0.6273546 1.3841197
## s_Area_AM150 s_Area_YH90 s_Area_MH90 s_Area_MH120 s_Area_YH120
## 1 -0.2181024 -0.131986 1.36682603 1.557246 -0.1477767
## 2 -0.2181024 -0.131986 -0.01808274 1.220917 -0.1477767
## 3 -0.2181024 -0.131986 1.30193660 1.220917 -0.1477767
## 4 -0.2181024 -0.131986 1.30193660 1.220917 -0.1477767
## 5 -0.2181024 -0.131986 1.30193660 1.220917 -0.1477767
## 6 3.4899749 -0.131986 1.65419432 1.552862 -0.1477767
## s_Area_MH150 s_Area_YH150
## 1 0.7737631 -0.16403638
## 2 0.8006350 -0.08693928
## 3 1.4775715 -0.16403638
## 4 1.4775715 -0.16403638
## 5 1.4775715 -0.16403638
## 6 1.4389131 -0.16403638
View Correlation Table
Cov.cor = cor(Mod.cov.df[,49:75])
corrplot(Cov.cor,
tl.cex=0.7,
number.cex = 0.6,
method = "number")
Convert to Spatial Points
nProj = "+proj=utm +zone=16 +datum=NAD83 +units=m +no_defs +ellps=GRS80 +towgs84=0,0,0"
Marten.pnts = SpatialPointsDataFrame(Mod.cov.df[,c("Longitude.x", "Latitude.x")], Mod.cov.df)
proj4string(Marten.pnts) = nProj
Load County Boundaries
Counties = readOGR(dsn = "./Counties_v17a",
layer = "Counties_v17a",
stringsAsFactors = FALSE)
## OGR data source with driver: ESRI Shapefile
## Source: "C:\Users\humph173\Documents\Michigan_State\Marten2\MartScale_090219\Counties_v17a", layer: "Counties_v17a"
## with 83 features
## It has 15 fields
## Integer64 fields read as strings: OBJECTID FIPSNUM
Counties = spTransform(Counties, proj4string(Marten.pnts))
Plot Points
wmap_df = fortify(Counties)
## Regions defined for each Polygons
tmp.df = Marten.pnts@data
ggplot(wmap_df, aes(long,lat, group=group)) +
geom_polygon(fill = "lightgray", col="darkgray") +
geom_point(data=tmp.df,
aes(Longitude.x, Latitude.x,
group=NULL,
fill=NULL,
col = "red")) +
xlab("Longitude") +
ylab("Latitude") +
ggtitle("Marten Locations") +
coord_equal() +
theme(panel.grid.minor = element_blank(),
panel.grid.major = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
legend.title = element_blank(),
legend.position = "none",
axis.title.x = element_text(size=22, face="bold"),
axis.title.y = element_text(size=22, face="bold"),
plot.title = element_text(size=24, face="bold", hjust = 0.5))
Calculate Nearest Neighbor Distance by Marten ID
library(spatstat)
## Loading required package: spatstat.data
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:dplyr':
##
## collapse
## The following object is masked from 'package:raster':
##
## getData
## Loading required package: rpart
##
## spatstat 1.60-1 (nickname: 'Swinging Sixties')
## For an introduction to spatstat, type 'beginner'
##
## Attaching package: 'spatstat'
## The following object is masked from 'package:MASS':
##
## area
## The following objects are masked from 'package:raster':
##
## area, rotate, shift
Mart.ids = levels(as.factor(Marten.pnts$id.x))
for(i in 1:length(Mart.ids)){ #for all marten IDs
Mart.tmp = subset(Marten.pnts, id.x == Mart.ids[i]) #choose one marten at a time
P.pp = as.ppp(Mart.tmp) #convert to ppp object
Mart.tmp@data$NN = as.numeric(nndist(P.pp, k = 1)) #find nearest neighboring point
if(i == 1){Marten.df = Mart.tmp@data #re-assemble data frame
} else{Marten.df = rbind(Marten.df, Mart.tmp@data)}
}
#large range
range(Marten.df$NN)
## [1] 0.000 1984.527
#Scale neighbor distance
Marten.df$NNs = Marten.df$NN/100
#Convert back to points
Marten.pnts = SpatialPointsDataFrame(Marten.df[,c("Longitude.x", "Latitude.x")], Marten.df)
proj4string(Marten.pnts) = nProj
detach("package:spatstat", unload=TRUE)
Looping through each covariate one at a time with marten ID as a grouping random effect and nearest neigbor distance (within individual marten) to account for spatial bias.
Covariates.list = levels(factor(names(Marten.pnts@data[,49:75]))) #Get names of all covariates
Covariates.list
Marten.data = Marten.pnts@data #copy of the data
for(i in 1:length(Covariates.list)){ #for each covariate
Cov.lst = list(list(intercept1 = rep(1, dim(Marten.data)[1])), #intercept = 1
list(Covariate.X = Marten.data[,Covariates.list[i]], #Choose 1 covariate at a time
NN = Marten.data[,"NNs"], #Scaled neighbor distance
ID = Marten.data[,"id.x"], #Marten ID
JDATE = Marten.data[,"jdate"], #Day
YEAR = Marten.data[,"year"])) #Year
Data.stk = inla.stack(data = list( #Organizing data as a list
RASTERVALU = Marten.data$RASTERVALU), #Response/dependent variable
A = list(1,1), #Not used, empty space
effects = Cov.lst, #Individual covariate, neighbor, ID, Year
tag = "M1.0") #Just a label
nPrior = list(theta=list(prior = "normal", param=c(0, 5))) #flat prior
Formula = RASTERVALU ~ -1 + intercept1 + #response and intercept
f(ID, #grouping effect for individual marten
model="iid",
constr = TRUE, #enforce a zero mean (average)
hyper = nPrior) +
f(inla.group(NN, #distance as a smooth effect
n = 20, #binning distances because some distances are very close
method = "cut"),
model="rw1",
constr = TRUE,
scale.model = TRUE,
hyper = nPrior) +
f(YEAR, #Year as grouping variable
model="iid",
constr = TRUE,
hyper = nPrior) +
f(JDATE, #DOY as grouping variable
model="iid",
constr = TRUE,
hyper = nPrior) +
Covariate.X #Individual covariate of interest
Model.X = inla(Formula, #Run model iteration
data = inla.stack.data(Data.stk),
family = "gamma",
verbose = TRUE,
control.fixed = list(prec = 0.001,
prec.intercept = 0.0001),
control.predictor = list(
A = inla.stack.A(Data.stk),
compute = TRUE,
link = 1),
control.inla = list(strategy="gaussian",
int.strategy = "eb"),
control.results = list(return.marginals.random = TRUE,
return.marginals.predictor = TRUE),
control.compute=list(dic = TRUE, cpo = TRUE, waic = TRUE))
#Create a data frame to store results
Fixed.effects = Model.X$summary.fixed[,c(1:3,5)]
names(Fixed.effects) = c("Mean", "sd", "Q.025", "Q.975")
#Add comparison metrics DIC and WAIC
Fixed.effects$DIC = Model.X$dic$dic
Fixed.effects$WAIC = Model.X$waic$waic
#Add label to identify actual covariate name
Fixed.effects$Covariate = Covariates.list[i]
#Get estimates for ID effect
ID.effect = Model.X$summary.random$ID[,c(1:4,6)]
names(ID.effect) = c("ID", "Mean", "sd", "Q.025", "Q.975")
ID.effect$Covariate = Covariates.list[i]
#Get estimates for NN distance effect
NN.effect = Model.X$summary.random$`inla.group(NN, n = 20, method = "cut")`[,c(1:4,6)]
names(NN.effect) = c("Distance", "Mean", "sd", "Q.025", "Q.975")
NN.effect$Covariate = Covariates.list[i]
#Get estimates for JDATE effect
JDate.effect = Model.X$summary.random$JDATE[,c(1:4,6)]
names(JDate.effect) = c("JDATE", "Mean", "sd", "Q.025", "Q.975")
JDate.effect$Covariate = Covariates.list[i]
#Get estimates for YEAR effect
Year.effect = Model.X$summary.random$YEAR[,c(1:4,6)]
names(Year.effect) = c("YEAR", "Mean", "sd", "Q.025", "Q.975")
Year.effect$Covariate = Covariates.list[i]
#Combine data for all iterations
if(i == 1){
Fixed.effects.all = Fixed.effects
ID.effect.all = ID.effect
NN.effect.all = NN.effect
JDate.effect.all = JDate.effect
Year.effect.all = Year.effect
} else{
Fixed.effects.all = rbind(Fixed.effects.all, Fixed.effects)
ID.effect.all = rbind(ID.effect.all, ID.effect)
NN.effect.all = rbind(NN.effect.all, NN.effect)
JDate.effect.all = rbind(JDate.effect.all,JDate.effect)
Year.effect.all = rbind(Year.effect.all, Year.effect)}
}
#Determine which are signficant as judged by credible interval
Fixed.effects.all$Credible = ifelse(Fixed.effects.all$Q.025 >= 0 & Fixed.effects.all$Q.975 >= 0, "Inside",
ifelse(Fixed.effects.all$Q.025 <= 0 & Fixed.effects.all$Q.975 <= 0, "Inside",
"Outside"))
#Are row values the covariate or intercept?
Fixed.effects.all$Type = paste(substr(rownames(Fixed.effects.all), 1, 3))
Save copies to WD
write.csv(Fixed.effects.all, "./Ind_fixed_effects.csv") #fixed effects
write.csv(ID.effect.all, "./Ind_ID_effect.csv")
write.csv(NN.effect.all, "./Ind_Distance_effect.csv")
write.csv(Year.effect.all, "./Year_effect.csv")
Covariate Selection
Looks like s_Area_AC150, s_Area_AH120, s_Area_AM150, s_Area_HO150, s_Area_MC150, s_Area_MH120, s_Dist_Paved, s_Dist_River, s_Dist_Unpav are the winners with good agreement between the DIC and WAIC.
#Remove those "outside" credible interval and intercepts
Fixed.effects.all2 = Fixed.effects.all %>% filter(Credible == "Inside" & Type == "Cov")
#How many dropped
dim(Fixed.effects.all)[1] - dim(Fixed.effects.all2)[1]
## [1] 29
Fixed.effects.all2$Var = gsub("\\d", "", Fixed.effects.all2$Covariate)
Fixed.effects.all2$Var = gsub("s_Area_", "", Fixed.effects.all2$Var)
Fixed.effects.all2 = arrange(Fixed.effects.all2, desc(DIC))
Fixed.effects.all2$Dup = duplicated(Fixed.effects.all2[,"Var"])
Fixed.effects.all3 = Fixed.effects.all2 %>%
filter(Dup != TRUE)
#Top of each type
Fixed.effects.all3
## Mean sd Q.025 Q.975 DIC WAIC
## 1 0.05764240 0.01031198 0.037396506 0.07787141 -19102.83 -18995.75
## 2 0.02718557 0.01202695 0.003572613 0.05077882 -19114.41 -19015.14
## 3 0.09367091 0.01778514 0.058752675 0.12856000 -19136.45 -19038.62
## 4 -0.04174204 0.01525609 -0.071694900 -0.01181419 -19156.99 -19079.14
## 5 0.02930576 0.01117848 0.007358630 0.05123458 -19157.15 -19078.96
## 6 0.05138328 0.01579765 0.020367173 0.08237350 -19159.75 -19078.15
## 7 0.05881514 0.01550618 0.028371266 0.08923360 -19163.88 -19083.97
## 8 -0.40615566 0.06568606 -0.535119547 -0.27729941 -19191.21 -19113.68
## 9 -0.11121076 0.01670017 -0.143998817 -0.07845006 -19198.94 -19121.13
## 10 -0.12847749 0.01672153 -0.161307489 -0.09567490 -19213.38 -19131.17
## 11 -0.24248898 0.02856258 -0.298566962 -0.18645779 -19220.77 -19141.57
## Covariate Credible Type Var Dup
## 1 s_Area_AM150 Inside Cov AM FALSE
## 2 s_Area_YC120 Inside Cov YC FALSE
## 3 s_Area_AC120 Inside Cov AC FALSE
## 4 s_Area_HO150 Inside Cov HO FALSE
## 5 s_Area_YH120 Inside Cov YH FALSE
## 6 s_Dist_Paved Inside Cov s_Dist_Paved FALSE
## 7 s_Area_MC120 Inside Cov MC FALSE
## 8 s_Dist_Unpav Inside Cov s_Dist_Unpav FALSE
## 9 s_Area_AH120 Inside Cov AH FALSE
## 10 s_Area_MH120 Inside Cov MH FALSE
## 11 s_Dist_River Inside Cov s_Dist_River FALSE
write.csv(Fixed.effects.all3, "./Top_sig_effects.csv")
Check correlation between selected covariates
Best.Cov = Marten.pnts@data %>%
dplyr::select(s_Area_AC120, s_Area_AH120, s_Area_AM150, s_Area_HO150,
s_Area_MC120, s_Area_MH120, s_Area_YC120, s_Area_YH120,
s_Dist_Paved, s_Dist_River, s_Dist_Unpav)
Cov.cor2 = cor(Best.Cov)
corrplot(Cov.cor2,
tl.cex=0.7,
number.cex = 0.6,
method = "number")
#The AC, AH, MC, and MH variables look highly correlated. Let's test with a different method:
library(perturb)
##
## Attaching package: 'perturb'
## The following object is masked from 'package:raster':
##
## reclassify
CI = colldiag(Cov.cor2)
CI #Last value in the Index column needs to be less than 30; it's still too high
## Condition
## Index Variance Decomposition Proportions
## intercept s_Area_AC120 s_Area_AH120 s_Area_AM150 s_Area_HO150
## 1 1.000 0.000 0.000 0.000 0.000 0.000
## 2 1.329 0.000 0.000 0.000 0.001 0.000
## 3 1.466 0.000 0.000 0.000 0.001 0.001
## 4 1.847 0.000 0.000 0.000 0.000 0.000
## 5 2.066 0.000 0.000 0.000 0.000 0.001
## 6 2.151 0.000 0.000 0.000 0.013 0.003
## 7 2.525 0.000 0.000 0.000 0.005 0.005
## 8 2.743 0.000 0.000 0.000 0.002 0.000
## 9 4.584 0.000 0.000 0.001 0.000 0.001
## 10 31.563 0.181 0.002 0.020 0.194 0.140
## 11 363.507 0.819 0.998 0.979 0.784 0.849
## s_Area_MC120 s_Area_MH120 s_Area_YC120 s_Area_YH120 s_Dist_Paved
## 1 0.000 0.000 0.000 0.000 0.000
## 2 0.000 0.000 0.000 0.000 0.002
## 3 0.000 0.000 0.000 0.000 0.000
## 4 0.000 0.000 0.000 0.014 0.001
## 5 0.000 0.000 0.000 0.004 0.007
## 6 0.000 0.000 0.000 0.000 0.003
## 7 0.000 0.000 0.000 0.001 0.001
## 8 0.000 0.000 0.000 0.004 0.021
## 9 0.000 0.002 0.000 0.001 0.011
## 10 0.000 0.049 0.003 0.309 0.163
## 11 1.000 0.948 0.996 0.665 0.792
## s_Dist_River s_Dist_Unpav
## 1 0.000 0.000
## 2 0.000 0.000
## 3 0.001 0.002
## 4 0.000 0.001
## 5 0.000 0.001
## 6 0.000 0.000
## 7 0.004 0.003
## 8 0.001 0.003
## 9 0.020 0.007
## 10 0.122 0.115
## 11 0.852 0.869
#dropping the MH150 and re-assessing
Best.Cov2 = Best.Cov %>% dplyr::select(-c("s_Area_MH120", "s_Area_MC120")) #Have the high individual values
Cov.cor3 = cor(Best.Cov2)
CI = colldiag(Cov.cor3)
CI #Now less than 30
## Condition
## Index Variance Decomposition Proportions
## intercept s_Area_AC120 s_Area_AH120 s_Area_AM150 s_Area_HO150
## 1 1.000 0.075 0.027 0.023 0.010 0.002
## 2 1.104 0.609 0.000 0.010 0.002 0.042
## 3 1.228 0.094 0.000 0.028 0.122 0.077
## 4 1.558 0.007 0.000 0.000 0.139 0.040
## 5 1.781 0.001 0.003 0.033 0.010 0.528
## 6 1.839 0.007 0.015 0.000 0.414 0.004
## 7 2.195 0.160 0.010 0.103 0.289 0.086
## 8 2.491 0.000 0.051 0.073 0.004 0.140
## 9 4.967 0.048 0.893 0.730 0.010 0.081
## s_Area_YC120 s_Area_YH120 s_Dist_Paved s_Dist_River s_Dist_Unpav
## 1 0.037 0.002 0.012 0.026 0.002
## 2 0.010 0.029 0.056 0.001 0.046
## 3 0.011 0.036 0.003 0.034 0.060
## 4 0.077 0.428 0.022 0.012 0.023
## 5 0.083 0.120 0.000 0.000 0.131
## 6 0.010 0.018 0.321 0.047 0.070
## 7 0.014 0.003 0.302 0.196 0.032
## 8 0.500 0.272 0.003 0.009 0.341
## 9 0.257 0.091 0.281 0.673 0.294
Organize data
Marten.data = Marten.pnts@data
Global.lst = list(list(intercept1 = rep(1, dim(Marten.data)[1])),
list(s_Area_AC150 = Marten.data[,"s_Area_AC120"],
s_Area_AH120 = Marten.data[,"s_Area_AH120"],
s_Area_AM150 = Marten.data[,"s_Area_AM150"],
s_Area_HO150 = Marten.data[,"s_Area_HO150"],
s_Area_YC120 = Marten.data[,"s_Area_YC120"],
s_Area_YH120 = Marten.data[,"s_Area_YH120"],
s_Dist_Paved = Marten.data[,"s_Dist_Paved"],
s_Dist_River = Marten.data[,"s_Dist_River"],
s_Dist_Unpav = Marten.data[,"s_Dist_Unpav"],
JDATE = Marten.data[,"jdate"],
NN = round(Marten.data[,"NNs"],3),
ID = Marten.data[,"id.x"],
YEAR = Marten.data[,"year"]))
Global.stk = inla.stack(data = list(RASTERVALU = Marten.data$RASTERVALU),
A = list(1,1),
effects = Global.lst,
tag = "G1.0")
Run Full Model 1
nPrior = list(theta=list(prior = "normal", param=c(0, 5))) #flat prior
Formula = RASTERVALU ~ -1 + intercept1 + #response and intercept
f(ID, #grouping effect for individual marten
model="iid",
constr = TRUE,
hyper = nPrior) +
f(inla.group(NN, #distance as a smooth effect
n = 20, #
method = "cut"),
model="rw1",
constr = TRUE,
scale.model = TRUE,
hyper = nPrior ) +
f(JDATE,
model="iid",
constr = TRUE,
hyper = nPrior) +
f(YEAR,
model="iid",
constr = TRUE,
hyper = nPrior) +
s_Area_AC150 + s_Area_AH120 + s_Area_AM150 +
s_Area_HO150 + s_Area_YC120 + s_Area_YH120 +
s_Dist_Paved + s_Dist_River + s_Dist_Unpav
GModel.1 = inla(Formula,
data = inla.stack.data(Global.stk),
family = "gamma",
verbose = TRUE,
control.fixed = list(prec = 0.001,
prec.intercept = 0.0001),
control.predictor = list(
A = inla.stack.A(Global.stk),
compute = TRUE,
link = 1),
control.inla = list(strategy="gaussian",
int.strategy = "eb"),
control.results = list(return.marginals.random = TRUE,
return.marginals.predictor = TRUE),
control.compute=list(dic = TRUE, cpo = TRUE, waic = TRUE))
summary(GModel.1)
##
## Call:
## c("inla(formula = Formula, family = \"gamma\", data = inla.stack.data(Global.stk), ", " verbose = TRUE, control.compute = list(dic = TRUE, cpo = TRUE, ", " waic = TRUE), control.predictor = list(A = inla.stack.A(Global.stk), ", " compute = TRUE, link = 1), control.inla = list(strategy = \"gaussian\", ", " int.strategy = \"eb\"), control.results = list(return.marginals.random = TRUE, ", " return.marginals.predictor = TRUE), control.fixed = list(prec = 0.001, ", " prec.intercept = 1e-04))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 2.4709 61.0098 0.7193 64.2001
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## intercept1 -0.3551 0.2136 -0.7744 -0.3551 0.0638 -0.3551 0
## s_Area_AC150 0.0681 0.0230 0.0230 0.0681 0.1132 0.0681 0
## s_Area_AH120 -0.0798 0.0199 -0.1188 -0.0798 -0.0408 -0.0798 0
## s_Area_AM150 0.0697 0.0109 0.0483 0.0697 0.0911 0.0697 0
## s_Area_HO150 -0.0345 0.0159 -0.0658 -0.0345 -0.0033 -0.0345 0
## s_Area_YC120 0.0280 0.0129 0.0028 0.0280 0.0533 0.0280 0
## s_Area_YH120 0.0435 0.0114 0.0212 0.0435 0.0658 0.0435 0
## s_Dist_Paved 0.0449 0.0157 0.0141 0.0449 0.0757 0.0449 0
## s_Dist_River -0.2405 0.0287 -0.2967 -0.2405 -0.1843 -0.2405 0
## s_Dist_Unpav -0.4389 0.0664 -0.5693 -0.4389 -0.3086 -0.4389 0
##
## Random effects:
## Name Model
## ID IID model
## inla.group(NN, n = 20, method = "cut") RW1 model
## JDATE IID model
## YEAR IID model
##
## Model hyperparameters:
## mean sd
## Precision parameter for the Gamma observations 1.2028 0.0168
## Precision for ID 0.7375 0.2203
## Precision for inla.group(NN, n = 20, method = "cut") 0.6228 0.2254
## Precision for JDATE 0.9353 0.1198
## Precision for YEAR 1.1529 0.5593
## 0.025quant 0.5quant
## Precision parameter for the Gamma observations 1.1695 1.2030
## Precision for ID 0.3903 0.7097
## Precision for inla.group(NN, n = 20, method = "cut") 0.3068 0.5807
## Precision for JDATE 0.7143 0.9310
## Precision for YEAR 0.4458 1.0281
## 0.975quant mode
## Precision parameter for the Gamma observations 1.235 1.2039
## Precision for ID 1.247 0.6569
## Precision for inla.group(NN, n = 20, method = "cut") 1.177 0.5072
## Precision for JDATE 1.184 0.9256
## Precision for YEAR 2.579 0.8303
##
## Expected number of effective parameters(std dev): 144.41(0.00)
## Number of equivalent replicates : 58.38
##
## Deviance Information Criterion (DIC) ...............: -19369.17
## Deviance Information Criterion (DIC, saturated) ....: -6.846e+59
## Effective number of parameters .....................: 145.58
##
## Watanabe-Akaike information criterion (WAIC) ...: -19291.15
## Effective number of parameters .................: 200.56
##
## Marginal log-Likelihood: 9365.77
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Here we re-run without the spatial (NN) effect.
Run Non Spatial Model Dropping NN.
nPrior = list(theta=list(prior = "normal", param=c(0, 5)))
Formula = RASTERVALU ~ -1 + intercept1 + #response and intercept
f(ID, #grouping effect for individual marten
model="iid",
constr = TRUE,
hyper = nPrior) +
f(JDATE,
model="iid",
constr = TRUE,
hyper = nPrior) +
f(YEAR,
model="iid",
constr = TRUE,
hyper = nPrior) +
s_Area_AC150 + s_Area_AH120 + s_Area_AM150 +
s_Area_HO150 + s_Area_YC120 + s_Area_YH120 +
s_Dist_Paved + s_Dist_River + s_Dist_Unpav
Non.Spatial.mod = inla(Formula,
data = inla.stack.data(Global.stk),
family = "gamma",
verbose = TRUE,
control.fixed = list(prec = 0.001,
prec.intercept = 0.0001),
control.predictor = list(
A = inla.stack.A(Global.stk),
compute = TRUE,
link = 1),
control.inla = list(strategy="gaussian",
int.strategy = "eb"),
control.results = list(return.marginals.random = TRUE,
return.marginals.predictor = TRUE),
control.compute=list(dic = TRUE, cpo = TRUE, waic = TRUE))
summary(Non.Spatial.mod)
##
## Call:
## c("inla(formula = Formula, family = \"gamma\", data = inla.stack.data(Global.stk), ", " verbose = TRUE, control.compute = list(dic = TRUE, cpo = TRUE, ", " waic = TRUE), control.predictor = list(A = inla.stack.A(Global.stk), ", " compute = TRUE, link = 1), control.inla = list(strategy = \"gaussian\", ", " int.strategy = \"eb\"), control.results = list(return.marginals.random = TRUE, ", " return.marginals.predictor = TRUE), control.fixed = list(prec = 0.001, ", " prec.intercept = 1e-04))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 1.8998 38.9248 1.0682 41.8928
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## intercept1 -1.5139 0.1274 -1.7641 -1.5139 -1.2639 -1.5139 0
## s_Area_AC150 0.0552 0.0233 0.0095 0.0552 0.1009 0.0552 0
## s_Area_AH120 -0.0822 0.0201 -0.1217 -0.0822 -0.0428 -0.0822 0
## s_Area_AM150 0.0650 0.0110 0.0434 0.0650 0.0866 0.0650 0
## s_Area_HO150 -0.0259 0.0158 -0.0570 -0.0259 0.0052 -0.0259 0
## s_Area_YC120 0.0248 0.0130 -0.0006 0.0248 0.0503 0.0248 0
## s_Area_YH120 0.0450 0.0115 0.0225 0.0450 0.0676 0.0450 0
## s_Dist_Paved 0.0390 0.0158 0.0079 0.0390 0.0701 0.0390 0
## s_Dist_River -0.2459 0.0285 -0.3019 -0.2459 -0.1900 -0.2459 0
## s_Dist_Unpav -0.4202 0.0668 -0.5514 -0.4202 -0.2891 -0.4202 0
##
## Random effects:
## Name Model
## ID IID model
## JDATE IID model
## YEAR IID model
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision parameter for the Gamma observations 1.1536 0.0161 1.1216
## Precision for ID 0.7489 0.2255 0.3976
## Precision for JDATE 0.9077 0.1244 0.6651
## Precision for YEAR 1.1722 0.5372 0.4398
## 0.5quant 0.975quant mode
## Precision parameter for the Gamma observations 1.1539 1.185 1.1550
## Precision for ID 0.7187 1.277 0.6619
## Precision for JDATE 0.9095 1.149 0.9236
## Precision for YEAR 1.0699 2.506 0.8858
##
## Expected number of effective parameters(std dev): 137.30(0.00)
## Number of equivalent replicates : 61.40
##
## Deviance Information Criterion (DIC) ...............: -18934.87
## Deviance Information Criterion (DIC, saturated) ....: -1.114e+65
## Effective number of parameters .....................: 138.08
##
## Watanabe-Akaike information criterion (WAIC) ...: -18786.01
## Effective number of parameters .................: 243.95
##
## Marginal log-Likelihood: 9164.42
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Run spatial model without grouping by ID Day, or Year effects
nPrior = list(theta=list(prior = "normal", param=c(0, 5))) #flat prior
Formula = RASTERVALU ~ -1 + intercept1 +
f(inla.group(NN, #distance as a smooth effect
n = 20, #
method = "cut"),
model="rw1",
constr = TRUE,
scale.model = TRUE,
hyper = nPrior ) +
s_Area_AC150 + s_Area_AH120 + s_Area_AM150 +
s_Area_HO150 + s_Area_YC120 + s_Area_YH120 +
s_Dist_Paved + s_Dist_River + s_Dist_Unpav
No.ID.mod = inla(Formula,
data = inla.stack.data(Global.stk),
family = "gamma",
verbose = TRUE,
control.fixed = list(prec = 0.001,
prec.intercept = 0.0001),
control.predictor = list(
A = inla.stack.A(Global.stk),
compute = TRUE,
link = 1),
control.inla = list(strategy="gaussian",
int.strategy = "eb"),
control.results = list(return.marginals.random = TRUE,
return.marginals.predictor = TRUE),
control.compute=list(dic = TRUE, cpo = TRUE, waic = TRUE))
summary(No.ID.mod)
##
## Call:
## c("inla(formula = Formula, family = \"gamma\", data = inla.stack.data(Global.stk), ", " verbose = TRUE, control.compute = list(dic = TRUE, cpo = TRUE, ", " waic = TRUE), control.predictor = list(A = inla.stack.A(Global.stk), ", " compute = TRUE, link = 1), control.inla = list(strategy = \"gaussian\", ", " int.strategy = \"eb\"), control.results = list(return.marginals.random = TRUE, ", " return.marginals.predictor = TRUE), control.fixed = list(prec = 0.001, ", " prec.intercept = 1e-04))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 1.8346 20.5495 0.7207 23.1048
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## intercept1 -0.1345 0.2258 -0.5778 -0.1345 0.3083 -0.1345 0
## s_Area_AC150 0.6415 0.0211 0.6000 0.6415 0.6829 0.6415 0
## s_Area_AH120 0.4439 0.0191 0.4065 0.4439 0.4813 0.4439 0
## s_Area_AM150 0.1576 0.0161 0.1261 0.1576 0.1892 0.1576 0
## s_Area_HO150 0.0941 0.0181 0.0586 0.0941 0.1295 0.0941 0
## s_Area_YC120 0.0109 0.0177 -0.0238 0.0109 0.0456 0.0109 0
## s_Area_YH120 0.0569 0.0165 0.0245 0.0569 0.0893 0.0569 0
## s_Dist_Paved 0.3277 0.0181 0.2923 0.3277 0.3632 0.3277 0
## s_Dist_River -0.0745 0.0156 -0.1051 -0.0745 -0.0440 -0.0745 0
## s_Dist_Unpav 0.3489 0.0167 0.3161 0.3489 0.3817 0.3489 0
##
## Random effects:
## Name Model
## inla.group(NN, n = 20, method = "cut") RW1 model
##
## Model hyperparameters:
## mean sd
## Precision parameter for the Gamma observations 0.5209 0.0067
## Precision for inla.group(NN, n = 20, method = "cut") 1.3766 0.5469
## 0.025quant 0.5quant
## Precision parameter for the Gamma observations 0.5080 0.5209
## Precision for inla.group(NN, n = 20, method = "cut") 0.5922 1.2832
## 0.975quant mode
## Precision parameter for the Gamma observations 0.5341 0.5208
## Precision for inla.group(NN, n = 20, method = "cut") 2.7090 1.1120
##
## Expected number of effective parameters(std dev): 15.18(0.00)
## Number of equivalent replicates : 555.31
##
## Deviance Information Criterion (DIC) ...............: -9991.53
## Deviance Information Criterion (DIC, saturated) ....: -5.521e+161
## Effective number of parameters .....................: 15.58
##
## Watanabe-Akaike information criterion (WAIC) ...: -9995.66
## Effective number of parameters .................: 10.36
##
## Marginal log-Likelihood: 4914.97
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Run non-spatial model without grouping by ID or Year effects (just a linear model)
nPrior = list(theta=list(prior = "normal", param=c(0, 5)))
Formula = RASTERVALU ~ -1 + intercept1 +
s_Area_AC150 + s_Area_AH120 + s_Area_AM150 +
s_Area_HO150 + s_Area_YC120 + s_Area_YH120 +
s_Dist_Paved + s_Dist_River + s_Dist_Unpav
No.Space.ID.mod = inla(Formula,
data = inla.stack.data(Global.stk),
family = "gamma",
verbose = TRUE,
control.fixed = list(prec = 0.001,
prec.intercept = 0.0001),
control.predictor = list(
A = inla.stack.A(Global.stk),
compute = TRUE,
link = 1),
control.inla = list(strategy="gaussian",
int.strategy = "eb"),
control.results = list(return.marginals.random = TRUE,
return.marginals.predictor = TRUE),
control.compute=list(dic = TRUE, cpo = TRUE, waic = TRUE))
summary(No.Space.ID.mod)
##
## Call:
## c("inla(formula = Formula, family = \"gamma\", data = inla.stack.data(Global.stk), ", " verbose = TRUE, control.compute = list(dic = TRUE, cpo = TRUE, ", " waic = TRUE), control.predictor = list(A = inla.stack.A(Global.stk), ", " compute = TRUE, link = 1), control.inla = list(strategy = \"gaussian\", ", " int.strategy = \"eb\"), control.results = list(return.marginals.random = TRUE, ", " return.marginals.predictor = TRUE), control.fixed = list(prec = 0.001, ", " prec.intercept = 1e-04))")
##
## Time used:
## Pre-processing Running inla Post-processing Total
## 1.4554 15.4077 1.1255 17.9886
##
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## intercept1 -1.3801 0.0154 -1.4104 -1.3801 -1.3499 -1.3801 0
## s_Area_AC150 0.6383 0.0212 0.5966 0.6383 0.6800 0.6383 0
## s_Area_AH120 0.4465 0.0192 0.4088 0.4465 0.4841 0.4465 0
## s_Area_AM150 0.1518 0.0162 0.1199 0.1518 0.1836 0.1518 0
## s_Area_HO150 0.1118 0.0180 0.0764 0.1118 0.1472 0.1118 0
## s_Area_YC120 0.0075 0.0177 -0.0273 0.0075 0.0423 0.0075 0
## s_Area_YH120 0.0597 0.0166 0.0271 0.0597 0.0923 0.0597 0
## s_Dist_Paved 0.3110 0.0180 0.2757 0.3110 0.3463 0.3110 0
## s_Dist_River -0.0763 0.0158 -0.1073 -0.0763 -0.0453 -0.0763 0
## s_Dist_Unpav 0.3287 0.0167 0.2959 0.3287 0.3615 0.3287 0
##
## The model has no random effects
##
## Model hyperparameters:
## mean sd 0.025quant
## Precision parameter for the Gamma observations 0.5111 0.0065 0.4984
## 0.5quant 0.975quant mode
## Precision parameter for the Gamma observations 0.511 0.524 0.5109
##
## Expected number of effective parameters(std dev): 10.03(0.00)
## Number of equivalent replicates : 840.67
##
## Deviance Information Criterion (DIC) ...............: -9762.58
## Deviance Information Criterion (DIC, saturated) ....: -6.822e+163
## Effective number of parameters .....................: 10.05
##
## Watanabe-Akaike information criterion (WAIC) ...: -9765.49
## Effective number of parameters .................: 7.093
##
## Marginal log-Likelihood: 4807.11
## CPO and PIT are computed
##
## Posterior marginals for linear predictor and fitted values computed
Compare Model Metrics. The two global models look comparable, but, dropping the one non-significant covariate increases both the DIC and WAIC slightly. The Linear model without any spatial or grouping effects performs the worst.
Models = c("Global 1", "Non-Spatial",
"No ID", "Linear Model")
#Deviance information criteria
S1.DICs = c(GModel.1$dic$dic, Non.Spatial.mod$dic$dic,
No.ID.mod$dic$dic, No.Space.ID.mod$dic$dic)
S1.WAICs = c(GModel.1$waic$waic, Non.Spatial.mod$waic$waic,
No.ID.mod$waic$waic, No.Space.ID.mod$waic$waic)
Model_out = as.data.frame(cbind(Model = Models,
DIC = round(S1.DICs, 2),
WAIC = round(S1.WAICs, 2)))
kable(Model_out, caption = "Model Comparison") %>%
kable_styling("striped", full_width = F) %>%
row_spec(0, font_size = 20)
Model | DIC | WAIC |
---|---|---|
Global 1 | -19369.17 | -19291.15 |
Non-Spatial | -18934.87 | -18786.01 |
No ID | -9991.53 | -9995.66 |
Linear Model | -9762.58 | -9765.49 |
Global Model 1 (GModel.2) Fixed Effect Estimates
FE.table = GModel.1$summary.fixed[,c(1:3,5)]
names(FE.table) = c("Mean", "sd", "Q0.025", "Q0.975")
kable(FE.table, caption = "Fixed Effects") %>%
kable_styling("striped", full_width = F) %>%
row_spec(0, font_size = 20)
Mean | sd | Q0.025 | Q0.975 | |
---|---|---|---|---|
intercept1 | -0.3551082 | 0.2135645 | -0.7744073 | 0.0638409 |
s_Area_AC150 | 0.0681244 | 0.0229835 | 0.0230001 | 0.1132111 |
s_Area_AH120 | -0.0797869 | 0.0198875 | -0.1188329 | -0.0407736 |
s_Area_AM150 | 0.0696989 | 0.0109028 | 0.0482930 | 0.0910869 |
s_Area_HO150 | -0.0344976 | 0.0159236 | -0.0657611 | -0.0032602 |
s_Area_YC120 | 0.0280427 | 0.0128646 | 0.0027852 | 0.0532790 |
s_Area_YH120 | 0.0434901 | 0.0113554 | 0.0211956 | 0.0657660 |
s_Dist_Paved | 0.0449110 | 0.0156998 | 0.0140869 | 0.0757094 |
s_Dist_River | -0.2404680 | 0.0286515 | -0.2967205 | -0.1842625 |
s_Dist_Unpav | -0.4389190 | 0.0664246 | -0.5693330 | -0.3086139 |
ID Effect by Marten ID
mic.d = as.data.frame(GModel.1$summary.random$ID)
names(mic.d) = c("ID", "Mean", "sd", "Q025", "Q50", "Q975", "mode", "kld")
ggplot(mic.d, aes(factor(ID), y=Mean)) +
geom_point(size=2, pch=19, col = "red") +
geom_linerange(aes(ymin=Q025, ymax=Q975), colour="black") +
geom_hline(yintercept = 0,
linetype = "solid",
col = "darkgray",
size = 0.5) +
theme_classic() +
xlab("Marten ID") +
ylab("RASTERVALU (log)") +
ggtitle("Marten ID Effect") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.y = element_text(face="bold", size=18),
axis.title.x = element_text(face="bold", size=18),
title = element_text(face="bold", size=18, hjust=0.5),
strip.text.x = element_text(face="bold", size = 14, colour = "black"),
axis.text.y = element_text(face="bold", size=14),
axis.text.x = element_text(face="bold", size=14, vjust=0.5,
hjust=0.5, angle=90))
Effect by Year
mic.d = as.data.frame(GModel.1$summary.random$YEAR)
names(mic.d) = c("YEAR", "Mean", "sd", "Q025", "Q50", "Q975", "mode", "kld")
ggplot(mic.d, aes(factor(YEAR), y=Mean)) +
geom_point(size=2, pch=19, col = "red") +
geom_linerange(aes(ymin=Q025, ymax=Q975), colour="black") +
geom_hline(yintercept = 0,
linetype = "solid",
col = "darkgray",
size = 0.5) +
theme_classic() +
xlab("Year") +
ylab("RASTERVALU (log)") +
ggtitle("Year Effect") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.y = element_text(face="bold", size=18),
axis.title.x = element_text(face="bold", size=18),
title = element_text(face="bold", size=18, hjust=0.5),
strip.text.x = element_text(face="bold", size = 14, colour = "black"),
axis.text.y = element_text(face="bold", size=14),
axis.text.x = element_text(face="bold", size=14, vjust=0.5,
hjust=0.5, angle=90))
JDate Effect
mic.d = as.data.frame(GModel.1$summary.random$JDATE)
names(mic.d) = c("Day", "Mean", "sd", "Q025", "Q50", "Q975", "mode", "kld")
ggplot(mic.d, aes(Day, y=Mean)) +
geom_point(size=2, pch=19, col = "red") +
geom_linerange(aes(ymin=Q025, ymax=Q975), colour="black") +
geom_hline(yintercept = 0,
linetype = "solid",
col = "darkgray",
size = 0.5) +
scale_x_continuous(breaks = seq(64, 325, 5), labels=seq(64, 325, 5)) +
theme_classic() +
xlab("JDATE") +
ylab("RASTERVALU (log)") +
ggtitle("Year Effect") +
theme(plot.title = element_text(hjust = 0.5),
axis.title.y = element_text(face="bold", size=18),
axis.title.x = element_text(face="bold", size=18),
title = element_text(face="bold", size=18, hjust=0.5),
strip.text.x = element_text(face="bold", size = 14, colour = "black"),
axis.text.y = element_text(face="bold", size=14),
axis.text.x = element_text(face="bold", size=14, vjust=0.5,
hjust=0.5, angle=90))
Spatial Effect (Neighbor distance)
mic.d = as.data.frame(GModel.1$summary.random$`inla.group(NN, n = 20, method = "cut")`)
names(mic.d) = c("ID", "Mean", "sd", "Q025", "Q50", "Q975", "mode", "kld")
mic.d$ID = mic.d$ID*100 #Back to original scale, meters
ggplot(mic.d, aes(ID, Mean), lab) +
geom_smooth(col = "black",
linetype= "solid",
method = "loess",
span = 0.9,
se = FALSE,
lwd = 1) +
geom_smooth(data = mic.d, aes(ID, Q025),
col = "grey",
method = "loess",
span = 0.9,
se = FALSE,
linetype= "dashed") +
geom_smooth(data = mic.d, aes(ID, Q975),
col = "grey",
method = "loess",
span = 0.9,
se = FALSE,
linetype= "dashed") +
geom_hline(yintercept = 0,
linetype = "solid",
col = "darkgray",
size = 0.5) +
ylab("RASTERVALU (log)") +
xlab("Distance (meters)") +
ggtitle("Spatial Effect") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5),
title = element_text(face="bold", size=18, hjust=0.5),
axis.text=element_text(size=16),
strip.text = element_text(face="bold", size = 20),
axis.title.y = element_text(face="bold", size = 20),
axis.text.x = element_text(face="bold", size=14, vjust=0.5, angle=90),
axis.title.x = element_text(face="bold", size = 20))
Select records from each marten.
#Testing
set.seed(1976)
test.data = as.data.frame(
Marten.pnts@data %>%
group_by(id.x) %>%
sample_frac(0.20, replace=FALSE))
dim(test.data)
## [1] 1686 77
#Training set 80%
train.data = anti_join(Marten.pnts@data, test.data)
## Joining, by = c("Field1", "FID_1", "GMT_Time", "Duration.x", "DOP.x", "Satellites.x", "id.x", "Longitude.x", "Latitude.x", "RASTERVALU", "Area", "Area_MC90", "Area_HO90", "Area_YC90", "Area_HO120", "Area_YC120", "Area_MC120", "Area_HO150", "Area_YC150", "Area_MC150", "Dist_River", "Dist_Paved", "Dist_Unpav", "Area_AC90", "Area_AH90", "Area_AM90", "Area_AC120", "Area_AH120", "Area_AM120", "Area_AC150", "Area_AH150", "Area_AM150", "Area_YH90", "Area_MH90", "Area_MH120", "Area_YH120", "Area_MH150", "Area_YH150", "GMT.Time", "Duration.y", "DOP.y", "Satellites.y", "id.y", "Longitude.y", "Latitude.y", "moda", "jdate", "year", "s_Area_MC90", "s_Area_HO90", "s_Area_YC90", "s_Area_HO120", "s_Area_YC120", "s_Area_MC120", "s_Area_HO150", "s_Area_YC150", "s_Area_MC150", "s_Dist_River", "s_Dist_Paved", "s_Dist_Unpav", "s_Area_AC90", "s_Area_AH90", "s_Area_AM90", "s_Area_AC120", "s_Area_AH120", "s_Area_AM120", "s_Area_AC150", "s_Area_AH150", "s_Area_AM150", "s_Area_YH90", "s_Area_MH90", "s_Area_MH120", "s_Area_YH120", "s_Area_MH150", "s_Area_YH150", "NN", "NNs")
dim(train.data)
## [1] 6744 77
Organize training data to fit the predictive model. Using 80 percent to create a data set called Train.stk.
Train.lst = list(list(intercept1 = rep(1, dim(train.data)[1])),
list(s_Area_AC150 = train.data[,"s_Area_AC120"],
s_Area_AH120 = train.data[,"s_Area_AH120"],
s_Area_AM150 = train.data[,"s_Area_AM150"],
s_Area_HO150 = train.data[,"s_Area_HO150"],
s_Area_YC120 = train.data[,"s_Area_YC120"],
s_Area_YH120 = train.data[,"s_Area_YH120"],
s_Dist_Paved = train.data[,"s_Dist_Paved"],
s_Dist_River = train.data[,"s_Dist_River"],
s_Dist_Unpav = train.data[,"s_Dist_Unpav"],
JDATE = train.data[,"jdate"],
NN = round(train.data[,"NNs"],3),
ID = train.data[,"id.x"],
YEAR = train.data[,"year"]))
Train.stk = inla.stack(data = list(RASTERVALU = train.data$RASTERVALU),
A = list(1,1),
effects = Train.lst,
tag = "train.0")
Organize testing data to be predicted by trained model model. Using 20 percent to create a data set called Test.stk.
Test.lst = list(list(intercept1 = rep(1, dim(test.data)[1])),
list(s_Area_AC150 = test.data[,"s_Area_AC120"],
s_Area_AH120 = test.data[,"s_Area_AH120"],
s_Area_AM150 = test.data[,"s_Area_AM150"],
s_Area_HO150 = test.data[,"s_Area_HO150"],
s_Area_YC120 = test.data[,"s_Area_YC120"],
s_Area_YH120 = test.data[,"s_Area_YH120"],
s_Dist_Paved = test.data[,"s_Dist_Paved"],
s_Dist_River = test.data[,"s_Dist_River"],
s_Dist_Unpav = test.data[,"s_Dist_Unpav"],
JDATE = test.data[,"jdate"],
NN = round(test.data[,"NNs"],3),
ID = test.data[,"id.x"],
YEAR = test.data[,"year"]))
test.data$NA_RASTERVALU = NA #Remove the RASTERVALU in the testing set so it can't be used; these NA values will be predicted.
Test.stk = inla.stack(data = list(RASTERVALU = test.data$NA_RASTERVALU), #NA values to be predicted
A = list(1,1),
effects = Test.lst,
tag = "test.0") #This label will be used to pull results later
Join training and testing data to feed into model.
Validation.stk = inla.stack(Train.stk, Test.stk)
Run formula from best Global model from above.
nPrior = list(theta=list(prior = "normal", param=c(0, 5))) #flat prior
Formula = RASTERVALU ~ -1 + intercept1 + #response and intercept
f(ID, #grouping effect for individual marten
model="iid",
constr = TRUE,
hyper = nPrior) +
f(inla.group(NN, #distance as a smooth effect
n = 20, #
method = "cut"),
model="rw1",
constr = TRUE,
scale.model = TRUE,
hyper = nPrior ) +
f(JDATE,
model="iid",
constr = TRUE,
hyper = nPrior) +
f(YEAR,
model="iid",
constr = TRUE,
hyper = nPrior) +
s_Area_AC150 + s_Area_AH120 + s_Area_AM150 +
s_Area_HO150 + s_Area_YC120 + s_Area_YH120 +
s_Dist_Paved + s_Dist_River + s_Dist_Unpav
Validation.mod = inla(Formula,
data = inla.stack.data(Validation.stk), #Validation data: 80% to train, 20% to be predicted.
family = "gamma",
verbose = TRUE,
control.fixed = list(prec = 0.001,
prec.intercept = 0.0001),
control.predictor = list(
A = inla.stack.A(Validation.stk), #Need to provide the data twice
compute = TRUE,
link = 1),
control.inla = list(strategy="gaussian",
int.strategy = "eb"),
control.results = list(return.marginals.random = TRUE,
return.marginals.predictor = TRUE),
control.compute=list(dic = TRUE, cpo = TRUE, waic = TRUE))
Put predicted values in new column to compare to RASTERVALU
idat = inla.stack.index(Validation.stk, "test.0")$data #Index to identify testing data locations from full data set
test.data$Prediction = Validation.mod$summary.fitted.values$mean[idat] #Fitted values for tetsing data only
test.data$Low = Validation.mod$summary.fitted.values$`0.025quant`[idat]
test.data$High = Validation.mod$summary.fitted.values$`0.975quant`[idat]
Compare predicted to actual RASTERVALU
#Correlation test
cor.test(test.data$Prediction, test.data$RASTERVALU)
##
## Pearson's product-moment correlation
##
## data: test.data$Prediction and test.data$RASTERVALU
## t = 35.407, df = 1684, p-value < 2.2e-16
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## 0.6250124 0.6797989
## sample estimates:
## cor
## 0.65326
#Calculate error
test.data$Error = test.data$RASTERVALU - test.data$Prediction
#Root Mean Squared Error
rmse = function(error)
{
sqrt(mean(error^2))
}
rmse(test.data$Error)
## [1] 0.2255065
#Mean Absolute Error
mae = function(error)
{
mean(abs(error))
}
mae(test.data$Error)
## [1] 0.1525502
#Plot
range(test.data$RASTERVALU)
## [1] 0.0003245893 0.9887089729
range(test.data$Prediction)
## [1] 0.001138274 2.653417988
length(which(test.data$Prediction > 1)) # few high values excluded from plot
## [1] 19
ggplot(test.data, aes(RASTERVALU, Prediction)) +
geom_point(shape = 19,
col="lightgray",
size = 1) +
geom_smooth(col = "black",
linetype= "solid",
method = "lm",
se = FALSE,
lwd = 1) +
xlim(0,1) +
ylim(0,1) +
ylab("Actual") +
xlab("Predicted") +
ggtitle(" ") +
theme_classic() +
theme(plot.title = element_text(hjust = 0.5),
title = element_text(face="bold", size=18, hjust=0.5),
axis.text=element_text(size=16),
strip.text = element_text(face="bold", size = 20),
axis.title.y = element_text(face="bold", size = 20),
axis.text.x = element_text(face="bold", size=14, vjust=0.5, angle=90),
axis.title.x = element_text(face="bold", size = 20))
## Warning: Removed 19 rows containing non-finite values (stat_smooth).
## Warning: Removed 19 rows containing missing values (geom_point).