Connectivity Score Analysis

EIS_raw <- read.csv("EIS_raw.csv")

EIS_clean <- EIS_raw[,-(13:15)] %>% 
  mutate(CEAA_typ = factor(CEAA_typ)) %>% 
  mutate(province = factor(province)) %>% 
  mutate(proj_asso = factor(proj_asso)) %>% 
  mutate(type_road = factor(type_road)) %>% 
  mutate(material = factor(material)) %>% 
  mutate(prep_by = factor(prep_by)) %>% 
  select(site, score, sub_yr, sub_dur, num_lane, length_km, province, CEAA_typ, proj_asso, type_road, material, prep_by)

PCA_main <- prcomp(EIS_clean[, 2:6], scale = T) 
summary(PCA_main)

## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5
## Standard deviation     1.4948 1.1453 0.9512 0.68937 0.27197
## Proportion of Variance 0.4469 0.2624 0.1809 0.09505 0.01479
## Cumulative Proportion  0.4469 0.7092 0.8902 0.98521 1.00000

autoplot(PCA_main, data = EIS_clean, color = "CEAA_typ", main = "PCA Plot of Quantitative Predictors", loadings.label=TRUE, loadings = TRUE, loadings.label.size = 4, loadings.colour = "blue", label.size=5) +
  geom_text(vjust = -1, label = rownames(EIS_clean)) +
  theme_classic()

var_explained = data.frame(PC = paste0("PC",1:5),
                               var_explained=(PCA_main$sdev)^2/sum((PCA_main$sdev)^2))

ggplot(data = var_explained, aes(x=PC,y=var_explained, group=1))+
  geom_point(size=4)+
  geom_line()+
  labs(title="Screeplot of Quantitative Predictors", y = "Variation Explained") +
  theme_classic()

options(na.action = na.fail)

full_model <- lmer(log(score) ~ sub_yr + sub_dur + num_lane + length_km + (1|province) + (1|CEAA_typ) + (1|proj_asso) + (1|type_road) + (1|material) + (1|prep_by), data = EIS_clean)

fixed_only <- lm(log(score) ~ sub_yr + sub_dur + num_lane + length_km, data = EIS_clean)

reduced_model <- lmer(log(score) ~ (1|province) + (1|CEAA_typ) + (1|proj_asso) + (1|type_road) + (1|material) + (1|prep_by), data = EIS_clean)

## boundary (singular) fit: see help('isSingular')

LRT <- anova(full_model, fixed_only, reduced_model)

## refitting model(s) with ML (instead of REML)

LRT

## Data: EIS_clean
## Models:
## fixed_only: log(score) ~ sub_yr + sub_dur + num_lane + length_km
## reduced_model: log(score) ~ (1 | province) + (1 | CEAA_typ) + (1 | proj_asso) + (1 | type_road) + (1 | material) + (1 | prep_by)
## full_model: log(score) ~ sub_yr + sub_dur + num_lane + length_km + (1 | province) + (1 | CEAA_typ) + (1 | proj_asso) + (1 | type_road) + (1 | material) + (1 | prep_by)
##               npar     AIC     BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## fixed_only       6  4.7640  3.5146  3.6180  -7.2360                         
## reduced_model    8 14.9618 13.2959  0.5191  -1.0382  0.000  2          1    
## full_model      12 -2.2096 -4.7085 13.1048 -26.2096 25.171  4  4.647e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

dredge_full <- dredge(full_model, eval=TRUE,rank="AICc")

## Warning in dredge(full_model, eval = TRUE, rank = "AICc"): comparing models
## fitted by REML

## Fixed term is "(Intercept)"
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 3 negative eigenvalues

## boundary (singular) fit: see help('isSingular')

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge with max|grad| = 0.00213263 (tol = 0.002, component 1)

## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')
## boundary (singular) fit: see help('isSingular')

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues

## boundary (singular) fit: see help('isSingular')

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## unable to evaluate scaled gradient

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
## Model failed to converge: degenerate Hessian with 1 negative eigenvalues

dredge_full

## Global model call: lmer(formula = log(score) ~ sub_yr + sub_dur + num_lane + length_km + 
##     (1 | province) + (1 | CEAA_typ) + (1 | proj_asso) + (1 | 
##     type_road) + (1 | material) + (1 | prep_by), data = EIS_clean)
## ---
## Model selection table 
##        (Int)     lng_km num_lan   sub_dur   sub_yr df  logLik  AICc delta
## 1    -0.9071                                        8  -0.598 -30.8  0.00
## 3    -1.5610             0.1972                     9   0.009 -27.0  3.79
## 9    73.3100                              -0.03681  9  -2.623 -21.8  9.05
## 5    -0.8367                    -0.034500           9  -3.060 -20.9  9.92
## 11  257.9000             0.1979           -0.12890 10  -2.853 -18.3 12.51
## 7    -1.5210             0.1810  0.018590          10  -3.965 -16.1 14.73
## 2    -0.8561 -0.0017530                             9  -5.952 -15.1 15.71
## 13  114.1000                     0.002716 -0.05709 10  -5.055 -13.9 16.91
## 4    -1.6230  0.0008742  0.1976                    10  -5.422 -13.2 17.65
## 15  158.7000             0.1979  0.022380 -0.07962 11  -5.515 -11.0 19.83
## 10  150.5000 -0.0016330                   -0.07512 10  -7.557  -8.9 21.92
## 6    -0.8409 -0.0008856         -0.019780          10  -8.249  -7.5 23.30
## 12 -339.5000  0.0008726  0.1976            0.16780 11  -7.936  -6.1 24.67
## 8    -1.5140 -0.0017280  0.1960  0.021090          11  -8.828  -4.3 26.46
## 14  127.0000 -0.0017440         -0.006318 -0.06346 11  -9.842  -2.3 28.49
## 16  161.0000 -0.0010860  0.1632  0.016170 -0.08067 12 -10.514   0.5 31.26
##    weight
## 1   0.853
## 3   0.129
## 9   0.009
## 5   0.006
## 11  0.002
## 7   0.001
## 2   0.000
## 13  0.000
## 4   0.000
## 15  0.000
## 10  0.000
## 6   0.000
## 12  0.000
## 8   0.000
## 14  0.000
## 16  0.000
## Models ranked by AICc(x) 
## Random terms (all models): 
##   1 | province, 1 | CEAA_typ, 1 | proj_asso, 1 | type_road, 1 | material, 
##   1 | prep_by

anova(full_model)

## Analysis of Variance Table
##           npar   Sum Sq  Mean Sq F value
## sub_yr       1 0.010860 0.010860  0.3194
## sub_dur      1 0.000605 0.000605  0.0178
## num_lane     1 0.045640 0.045640  1.3424
## length_km    1 0.003290 0.003290  0.0968

rand_p_list <- plot_model(full_model, type = "re", show.values = TRUE)

random_p_list_clean <- lapply(rand_p_list[1:5], function(plot) {
  plot + theme(axis.title.x = element_blank(),
               axis.title.y = element_blank(),
               axis.text.x = element_blank(),
               axis.line = element_blank(),
               axis.ticks = element_blank(),
               plot.title = element_blank(),
               panel.border = element_blank(),
               panel.grid = element_blank())
})

last_plot <- lapply(rand_p_list[6], function(plot) {
  plot + theme(axis.title.x = element_blank(),
               axis.title.y = element_blank(),
               axis.line = element_blank(),
               axis.ticks = element_blank(),
               plot.title = element_blank(),
               panel.border = element_blank(),
               panel.grid = element_blank())

})

combined_list <- append(random_p_list_clean, last_plot)

combined_plot <- plot_grid(plotlist = combined_list, ncol = 1, align = "v", rel_heights = c(5,3,2,2,2,2.7))

title_for_combined_plot <- 
  ggdraw() +
  draw_label("Random Effects on Score")

combined_plot_w_title <- plot_grid(title_for_combined_plot, combined_plot, ncol = 1, rel_heights = c(0.05, 0.95))

combined_plot_w_title

my.plotcorr <- function (corr, outline = FALSE, col = "grey", upper.panel = c("ellipse", "number", "none"), lower.panel = c("ellipse", "number", "none"), diag = c("none", "ellipse", "number"), digits = 2, bty = "n", axes = FALSE, xlab = "", ylab = "", asp = 1, cex.lab = par("cex.lab"), cex = 0.75 * par("cex"), mar = 0.1 + c(2, 2, 4, 2), ...)
{
# this is a modified version of the plotcorr function from the ellipse package
# this prints numbers and ellipses on the same plot but upper.panel and lower.panel changes what is displayed
# diag now specifies what to put in the diagonal (numbers, ellipses, nothing)
# digits specifies the number of digits after the . to round to
# unlike the original, this function will always print x_i by x_i correlation rather than being able to drop it
# modified by Esteban Buz
  if (!require('ellipse', quietly = TRUE, character = TRUE)) {
    stop("Need the ellipse library")
  }
  savepar <- par(pty = "s", mar = mar)
  on.exit(par(savepar))
  if (is.null(corr))
    return(invisible())
  if ((!is.matrix(corr)) || (round(min(corr, na.rm = TRUE), 6) < -1) || (round(max(corr, na.rm = TRUE), 6) > 1))
    stop("Need a correlation matrix")
  plot.new()
  par(new = TRUE)
  rowdim <- dim(corr)[1]
  coldim <- dim(corr)[2]
  rowlabs <- dimnames(corr)[[1]]
  collabs <- dimnames(corr)[[2]]
  if (is.null(rowlabs))
    rowlabs <- 1:rowdim
  if (is.null(collabs))
    collabs <- 1:coldim
  rowlabs <- as.character(rowlabs)
  collabs <- as.character(collabs)
  col <- rep(col, length = length(corr))
  dim(col) <- dim(corr)
  upper.panel <- match.arg(upper.panel)
  lower.panel <- match.arg(lower.panel)
  diag <- match.arg(diag)
  cols <- 1:coldim
  rows <- 1:rowdim
  maxdim <- max(length(rows), length(cols))
  plt <- par("plt")
  xlabwidth <- max(strwidth(rowlabs[rows], units = "figure", cex = cex.lab))/(plt[2] - plt[1])
  xlabwidth <- xlabwidth * maxdim/(1 - xlabwidth)
  ylabwidth <- max(strwidth(collabs[cols], units = "figure", cex = cex.lab))/(plt[4] - plt[3])
  ylabwidth <- ylabwidth * maxdim/(1 - ylabwidth)
  plot(c(-xlabwidth - 0.5, maxdim + 0.5), c(0.5, maxdim + 1 + ylabwidth), type = "n", bty = bty, axes = axes, xlab = "", ylab = "", asp = asp, cex.lab = cex.lab, ...)
  text(rep(0, length(rows)), length(rows):1, labels = rowlabs[rows], adj = 1, cex = cex.lab)
  text(cols, rep(length(rows) + 1, length(cols)), labels = collabs[cols], srt = 90, adj = 0, cex = cex.lab)
  mtext(xlab, 1, 0)
  mtext(ylab, 2, 0)
  mat <- diag(c(1, 1))
  plotcorrInternal <- function() {
    if (i == j){ #diag behavior
      if (diag == 'none'){
        return()
      } else if (diag == 'number'){
        text(j + 0.3, length(rows) + 1 - i, round(corr[i, j], digits=digits), adj = 1, cex = cex)
      } else if (diag == 'ellipse') {
        mat[1, 2] <- corr[i, j]
        mat[2, 1] <- mat[1, 2]
        ell <- ellipse(mat, t = 0.43)
        ell[, 1] <- ell[, 1] + j
        ell[, 2] <- ell[, 2] + length(rows) + 1 - i
        polygon(ell, col = col[i, j])
        if (outline)
          lines(ell)
      }
    } else if (i >= j){ #lower half of plot
      if (lower.panel == 'ellipse') { #check if ellipses should go here
        mat[1, 2] <- corr[i, j]
        mat[2, 1] <- mat[1, 2]
        ell <- ellipse(mat, t = 0.43)
        ell[, 1] <- ell[, 1] + j
        ell[, 2] <- ell[, 2] + length(rows) + 1 - i
        polygon(ell, col = col[i, j])
        if (outline)
          lines(ell)
      } else if (lower.panel == 'number') { #check if ellipses should go here
        text(j + 0.3, length(rows) + 1 - i, round(corr[i, j], digits=digits), adj = 1, cex = cex)
      } else {
        return()
      }
    } else { #upper half of plot
      if (upper.panel == 'ellipse') { #check if ellipses should go here
        mat[1, 2] <- corr[i, j]
        mat[2, 1] <- mat[1, 2]
        ell <- ellipse(mat, t = 0.43)
        ell[, 1] <- ell[, 1] + j
        ell[, 2] <- ell[, 2] + length(rows) + 1 - i
        polygon(ell, col = col[i, j])
        if (outline)
          lines(ell)
      } else if (upper.panel == 'number') { #check if ellipses should go here
        text(j + 0.3, length(rows) + 1 - i, round(corr[i, j], digits=digits), adj = 1, cex = cex)
      } else {
        return()
      }
    }
  }
  for (i in 1:dim(corr)[1]) {
    for (j in 1:dim(corr)[2]) {
      plotcorrInternal()
    }
  }
  invisible()
}


full_corr <- cor(EIS_clean[2:6])

colnames(full_corr) = c("Score", "Submission Yr", "Duration", "# Lanes", "Length (km)")
rownames(full_corr) = colnames(full_corr)

colsc = c(rgb(241, 54, 23, maxColorValue=255), 'white', rgb(0, 61, 104, maxColorValue=255))

colramp = colorRampPalette(colsc, space='Lab')
 
colors = colramp(100)

my.plotcorr(full_corr, col=colors[((full_corr + 1)/2) * 100], diag='ellipse', upper.panel="number", main='Quantitative Predictor Correlations', mar=c(0,0,2,0))

## Warning: package 'ellipse' was built under R version 4.2.3

## 
## Attaching package: 'ellipse'

## The following object is masked from 'package:graphics':
## 
##     pairs

Connectivity Score Analysis - EEB428

Matthew Ho