Exercise. My budget can no longer support the costs of monitoring a full network. I can reduce costs by limiting the number of plots, the sizes of plots, or both. For this study it is important that the network can provide estimates of the effects of predictors on tree abundance. Using the FIA aggregated data as a model, determine the impact of sample size \(n\) versus effort \(E\) (plot area) on estimates of effects from standAge, meanTemp, and annualPrec in xdataCluster for a species of choice.

I might do this:

Here is the full fitted model:

load( 'xdataCluster.rdata' )
load( 'densPlus15cluster.rdata' )
y     <- round( densPlus15cluster[,'liriodenTulipife'] )
xdata <- data.frame( xdataCluster )
full  <- glm( y ~ standAge + meanTemp + moist + annualDef, data = xdata, family = poisson )

Here is the effect of changing \(E\) and \(n\), for the same \(S = nE\) (fit2 versus fit3) and for different \(S\) (fit1 versus the other two fits):

fraction <- .01

form <- as.formula( z ~ standAge + meanTemp + moist + annualDef )

# sample a fraction of the plots
n      <- length(y)
w      <- sort( sample( n, n*fraction) )     # S = n*E will be the same as for next model
xsmall <- xdata[w, ]
z      <- y[w]
smalln <- glm( form, data = xsmall, family = poisson)

# half the plot size (few stems per plot)
z      <- rbinom( n, y, fraction )           # each plot has a fraction of the no. trees
smallE <- glm( form, data = xdata, family = poisson,
               offset = rep(log(fraction), n) )

fit1 <- summary( full )$coefficients[-1,1:2]
fit2 <- summary( smalln )$coefficients[-1,1:2]
fit3 <- summary( smallE )$coefficients[-1,1:2]

cint <- c(-1.96,-1, 0, 1, 1.96)           # confidence
cint <- matrix( cint, nrow(fit1), length(cint), byrow=T)

xsd <- apply(xdata, 2, sd, na.rm = T)     # parameters on proportionate scale due to log link
## Warning in var(if (is.vector(x) || is.factor(x)) x else as.double(x), na.rm =
## na.rm): NAs introduced by coercion
st1 <- fit1*xsd[rownames(fit1)]           
st2 <- fit2*xsd[rownames(fit2)]
st3 <- fit3*xsd[rownames(fit3)]

s1   <- st1[,1] + cint*st1[,2] 
s2   <- st2[,1] + cint*st2[,2] 
s3   <- st3[,1] + cint*st3[,2] 
colnames(s1) <- colnames(s2) <- colnames(s3) <- cint[1,]
rownames(s1) <- rownames(s2) <- rownames(s3) <- rownames(st1)
stats <- cbind( t(s1), t(s2), t(s3) )
s     <- list( stats = stats, group = 1:ncol(stats), names = colnames(stats) )

# four parameters, each with it's own color
lc <- rep( c('#1c9099', '#636363', '#f03b20', 'black'), 3 )
fc <- rep( c('#a6bddb', '#bdbdbd', '#feb24c', 'grey'), 3 )

par(mfrow=c(1,1.2), bty = 'n')
plot( NULL, xlab = 'Standardized estimates', ylab = '',
      xlim = c(-2,2), ylim = c(0, 12), yaxt = 'n')
abline(v = stats[3,1:4], lty=2, lwd = 2, col = fc[1:4])  # median parameter estimate, full model

bxp( s, outline=FALSE, horizontal = T,
     pars = list(medcol = lc, staplecol = lc, boxcol = lc, boxfill = fc,
                 whiskcol = lc, boxwex = .5, yaxt = 'n'), add = T )

abline(h = c(4, 8, 12)+.5, lty=2, col = 'grey' )         # separate, label three models
par( xpd = F )
text( rep(max(stats), 3), c(4, 8, 12) - 2, c('full (big S)', 'smalln', 'smallE'), pos=2 )

legend( 'bottomleft', legend = colnames(stats)[4:1], text.col = lc[4:1], 
        cex = .8, bty='n')

par( xpd = T )

Note that the widths of parameter quantiles are similar for the upper two fits, which have the same \(S\), but different \(n\) and \(E\).