Things that still need to be addressed

library(here)
library(dplyr)
library(reshape2)
library(ggplot2)
library(DescTools)
library(knitr)
library(dplyr)
library(lubridate)
library(pracma)
library(ordinal)
library(MASS)
library(ppcor)
library(lme4)
library(bbmle)
xy<-read.csv(here::here("allSitesDetailsCoordinates.csv"))
hab<-read.csv(here::here("allSitesHabitat.csv"))
area<-read.csv(here::here("siteAreaAndPerimeter.csv"))
temp<-read.csv(here::here("temperature.csv"))
garden<-read.csv(here::here("garden100m.csv"))
colnames(garden)[3]<-"Area_Garden"
open<-read.csv(here::here("openSpace100m.csv"))
colnames(open)[3]<-"Area_Open"

df<-readRDS("all_days_feature_extractions_2019_09_02.RDS")

df<-df %>% 
      dplyr::select(site, day, steepest_anth, steepest_bio, steepest_time_anth, steepest_time_bio, auc_anthro, auc_bio, n_peaks_anth, n_peaks_bio, auc_an_min_bio, peak_anth, peak_bio, peak_anth_time, peak_bio_time, steepest_dawn_anth, steepest_dawn_bio, auc_dawn_anthro, auc_dawn_bio, dawn_auc_an_min_bio, peak_dawn_anth, peak_dawn_bio, steepest_dusk_anth, steepest_dusk_bio, auc_dusk_anthro, auc_dusk_bio, dusk_auc_an_min_bio, peak_dusk_anth, peak_dusk_bio) %>% 
    distinct()

df_hab<-merge(df,hab, by.x = "site", by.y="Site" )
df_hab<-merge(df_hab, temp, by.x = "site", by.y = "Site")
df_hab<-merge(df_hab, xy[,c("SiteCode", "StartDate", "EndDate")], by.x = "site", by.y = "SiteCode")
df_hab<-merge(df_hab, open[,c(2,3)], by.x = "site", by.y = "SiteCode")
df_hab<-merge(df_hab, garden[,c(2,3)], by.x = "site", by.y = "SiteCode")

df_hab$n_peaks_anth[is.na(df_hab$n_peaks_anth)]<-0
df_hab$n_peaks_bio[is.na(df_hab$n_peaks_bio)]<-0

df_hab$tempRange<-df_hab$maxTemp - df_hab$minTemp

Remove start day and end day as these are incomplete days

df_hab$StartDate<-as.Date(df_hab$StartDate, format = "%d-%m-%y")
df_hab$EndDate<-as.Date(df_hab$EndDate, format = "%d-%m-%y")

df_hab<-df_hab %>% 
          group_by(site) %>% 
          filter(day != day(StartDate) & day != day(EndDate)) %>% 
          ungroup()
ggplot(df_hab, aes(x = Complexity, y = auc_an_min_bio, group = site,colour = site))+
  geom_jitter(width = 0.2)+ 
  theme(legend.position = "none")

ggplot(df_hab, aes(x = siteDiversity, y = auc_an_min_bio, colour = site))+
  geom_jitter(width = 0.2)+
  theme(legend.position = "none")

ggplot(df_hab, aes(x =Area_Open, y = auc_an_min_bio, colour = site))+
  geom_jitter(width = 0.2)+
  theme(legend.position = "none")

ggplot(df_hab, aes(x =Area_Garden, y = auc_an_min_bio, colour = site))+
  geom_jitter(width = 0.2)+
  theme(legend.position = "none")

ggplot(df_hab, aes(x =Area_Open+Area_Garden, y = auc_an_min_bio))+
  geom_smooth()+
  geom_jitter(width = 0.2)+
  theme(legend.position = "none")

ggplot(df_hab, aes(x =Area_Open+Area_Garden, y = peak_bio))+
  geom_smooth()+
  ylim(0,1)+
  geom_jitter(width = 0.2)+
  theme(legend.position = "none")

ggplot(df_hab, aes(x =Area_Open+Area_Garden, y = peak_anth))+
  geom_smooth()+
  ylim(0,1)+
  geom_jitter(width = 0.2)+
  theme(legend.position = "none")

plot(df_hab[,c(9:13,21,22,36,43)])

vars<-colnames(df_hab[,c(9:13,21,22,36,43)])

df_hab_sc<-scale(df_hab[,vars])

df_hab_sc<-data.frame(Complexity = df_hab$Complexity, siteDiversity = df_hab$siteDiversity, site = df_hab$site,day = df_hab$day,Area_Garden = df_hab$Area_Garden, Area_Open = df_hab$Area_Open, df_hab_sc)

Ordinal logistic regression

df_hab_sc$Complexity<-ordered(as.factor(df_hab_sc$Complexity), levels = c("0","1", "2", "3", "4"))


m1<-polr(Complexity ~ auc_an_min_bio + peak_bio + peak_anth, data  =  df_hab_sc)
m2<-polr(Complexity ~ peak_bio + peak_anth, data  =  df_hab_sc)
m3<-polr(Complexity ~ peak_bio, data  =  df_hab_sc)
m4<-polr(Complexity ~ peak_anth, data  =  df_hab_sc)
m5<-polr(Complexity ~ auc_an_min_bio, data  =  df_hab_sc)

AIC(m1,m2,m3,m4,m5)
##    df      AIC
## m1  7 514.9236
## m2  6 514.9482
## m3  5 516.6499
## m4  5 514.1652
## m5  5 511.0033

Cumulative link mixed models from the ordinal package

df_hab_sc$site<-as.factor(df_hab_sc$site)
df_hab_sc$meanTemp_f<-as.factor(df_hab_sc$meanTemp)

glob_clm<-clmm(Complexity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth + peak_bio + peak_dawn_anth + peak_dawn_bio + (1|site), data = df_hab_sc)

summary(glob_clm)
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: 
## Complexity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth +  
##     peak_bio + peak_dawn_anth + peak_dawn_bio + (1 | site)
## data:    df_hab_sc
## 
##  link  threshold nobs logLik AIC   niter       max.grad cond.H
##  logit flexible  261  -21.59 67.17 1241(27815) 1.45e+02 NaN   
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  site   (Intercept) 13430    115.9   
## Number of groups:  site 45 
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)
## n_peaks_anth    0.11605         NA      NA       NA
## n_peaks_bio    -0.18960         NA      NA       NA
## auc_an_min_bio -0.38687         NA      NA       NA
## peak_anth      -0.25221         NA      NA       NA
## peak_bio        0.15131         NA      NA       NA
## peak_dawn_anth  0.34113         NA      NA       NA
## peak_dawn_bio  -0.06812         NA      NA       NA
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 0|1   -58.23         NA      NA
## 1|2   -36.96         NA      NA
## 2|3   -16.46         NA      NA
## 3|4    16.02         NA      NA
gc1<-update(glob_clm, ~ . -n_peaks_anth)
gc2<-update(gc1, ~ . -n_peaks_bio)
gc3<-update(gc2, ~ . -peak_anth)
gc4<-update(gc3, ~ . -peak_dawn_anth)
gc5<-update(gc4, ~ . -peak_dawn_bio)
gc6<-update(gc5, ~ . -peak_bio)

AIC(glob_clm, gc1, gc2, gc3, gc4, gc5, gc6)
##          df      AIC
## glob_clm 12 67.17094
## gc1      11 63.88252
## gc2      10 63.52190
## gc3       9 64.64833
## gc4       8 59.43398
## gc5       7 57.80092
## gc6       6 55.94991
# 
# summary(gc4)
# summary(gc5)
# summary(gc6)
global<-glmer(siteDiversity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth + peak_bio + peak_dawn_anth + peak_dawn_bio + (1|site) , data = df_hab_sc, family = poisson)

glmm1<-glmer(siteDiversity ~ auc_an_min_bio+peak_bio+peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm2<-glmer(siteDiversity ~ peak_bio + peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm3<-glmer(siteDiversity ~ peak_bio + (1|site), data  =  df_hab_sc, family = poisson)
glmm4<-glmer(siteDiversity ~ peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm5<-glmer(siteDiversity ~ auc_an_min_bio + (1|site), data  =  df_hab_sc, family = poisson)

AIC(global, glmm1,glmm2,glmm3,glmm4,glmm5)
##        df      AIC
## global  9 1692.893
## glmm1   5 1684.971
## glmm2   4 1683.286
## glmm3   3 1681.414
## glmm4   3 1681.306
## glmm5   3 1681.094
library(bbmle)

#df_hab_sc<-scale(df_hab[,vars])

df_hab_sc$siteDiversity <-as.numeric(df_hab_sc$siteDiversity )

global<-glmer(siteDiversity  ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth + peak_bio + peak_dawn_anth + peak_dawn_bio + (1|site), data = df_hab_sc, family = poisson)

glmm1<-glmer(siteDiversity ~ auc_an_min_bio+peak_bio+peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm2<-glmer(siteDiversity ~ peak_bio + peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm3<-glmer(siteDiversity ~ peak_bio + (1|site), data  =  df_hab_sc, family = poisson)
glmm4<-glmer(siteDiversity ~ peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm5<-glmer(siteDiversity ~ auc_an_min_bio + (1|site), data  =  df_hab_sc, family = poisson)

AICtab(global, glmm1,glmm2,glmm3,glmm4,glmm5)
##        dAIC df
## glmm5   0.0 3 
## glmm4   0.2 3 
## glmm3   0.3 3 
## glmm2   2.2 4 
## glmm1   3.9 5 
## global 11.8 9

Area Garden

Here we use the beta distribution as the response variable is a proportion - value between 0 and 1.

library(glmmTMB)
library(betareg)
library(glmmADMB)

df_hab$Area_Garden_Prop<-df_hab$Area_Garden/(pi*(100^2))
df_hab_sc$Area_Garden_Prop<-df_hab_sc$Area_Garden/(pi*(100^2))


df_hab$Area_Open_Prop<-df_hab$Area_Open/(pi*(100^2))
df_hab_sc$Area_Open_Prop<-df_hab_sc$Area_Open/(pi*(100^2))

df_hab$Area_Open_Garden_Prop<-df_hab$Area_Open_Prop+ df_hab$Area_Garden_Prop
df_hab_sc$Area_Open_Garden_Prop<-df_hab_sc$Area_Open_Prop+ df_hab_sc$Area_Garden_Prop

global_garden_admb<-glmmadmb(Area_Open_Garden_Prop ~   meanTemp +auc_an_min_bio + (1|site),  data = df_hab_sc, family="beta", link = "logit")

g1<-glmmadmb(Area_Open_Garden_Prop ~  auc_an_min_bio + (1|site),  data = df_hab_sc, family="beta")

gnull<-glmmadmb(Area_Open_Garden_Prop ~  meanTemp + (1|site),  data = df_hab_sc, family="beta")

AICtab(global_garden_admb, g1, gnull)
##                    dAIC df
## g1                 0.0  4 
## gnull              1.2  4 
## global_garden_admb 2.0  5
exp(cbind(OR = coef(g1), confint(g1)))
##                       OR     2.5 %   97.5 %
## (Intercept)    1.0702469 0.8215450 1.394237
## auc_an_min_bio 0.9472734 0.8615721 1.041499
exp(cbind(OR = coef(global_garden_admb), confint(global_garden_admb)))
##                       OR     2.5 %   97.5 %
## (Intercept)    1.0684402 0.8197724 1.392538
## meanTemp       1.0256206 0.7889417 1.333302
## auc_an_min_bio 0.9472105 0.8615150 1.041430

#global_garden_tmb<-glmmTMB(Area_Open_Garden_Prop ~ meanTemp + auc_an_min_bio , data = df_hab_sc, family = list(family = “beta”, link = “logit”))

b1<-betareg(Area_Open_Garden_Prop ~ auc_an_min_bio +meanTemp, data = df_hab_sc, link = "logit")
b2<-betareg(Area_Open_Garden_Prop ~ auc_an_min_bio , data = df_hab_sc, link = "logit")
bnull<-betareg(Area_Open_Garden_Prop ~ meanTemp , data = df_hab_sc, link = "logit")


AICtab(b1, b2, bnull)
##       dAIC df
## b2     0.0 3 
## b1     2.0 4 
## bnull 76.4 3
exp(cbind(OR = coef(b2), confint(b2)))
##                          OR       2.5 %       97.5 %
## (Intercept)       1.0866832   1.0012290 1.179431e+00
## auc_an_min_bio    0.6752435   0.6209519 7.342818e-01
## (phi)          2833.5387623 780.0411911 1.029297e+04
ran_gg<-as.data.frame(ranef(global_garden_admb))

ran_gg<-as.data.frame(ranef(g1))

ran_gg$site<-row.names(ran_gg)

ran_gg %>% 
  arrange(X.Intercept.)
##    X.Intercept.    site
## 1   -1.89392814 SW1W0QP
## 2   -1.84739539 WC2H8LG
## 3   -1.60705147  NW10TA
## 4   -1.39616323   W84LA
## 5   -1.23677727  CR01SG
## 6   -1.04504426  W112NN
## 7   -0.98116338 SW112PN
## 8   -0.58305085   E29RR
## 9   -0.56966930     CR0
## 10  -0.46897784     SE6
## 11  -0.45885810  NW33RY
## 12  -0.45649080   W42PH
## 13  -0.40453676 SW154JY
## 14  -0.32154583  RM25EL
## 15  -0.30713423  RM41LD
## 16  -0.30479404     NW1
## 17  -0.29593926  E140EY
## 18  -0.28249446   E47EN
## 19  -0.24505140  SE84EA
## 20  -0.14059216 SE116DN
## 21  -0.07031144  NW32BZ
## 22  -0.02994089  TW76ER
## 23   0.01883176  NW23SH
## 24   0.04805794   N41ES
## 25   0.05959987     CR8
## 26   0.08185608  TW76BE
## 27   0.09483646 SE154EE
## 28   0.16665050  E105JP
## 29   0.21277665  SE41SA
## 30   0.24510562     SE3
## 31   0.33160232  HA53AA
## 32   0.33584900 RM143YB
## 33   0.35363084  RM41PL
## 34   0.37051816  BR67US
## 35   0.57227152   W55EQ
## 36   0.66528280 SE232NZ
## 37   0.71704710  CR05EF
## 38   0.74936703 TN147QB
## 39   0.77721440  BR20EG
## 40   0.82593374 SW154LA
## 41   1.07684917   E10NR
## 42   1.19412978  BR28LB
## 43   1.47016386     BR4
## 44   1.83736600  HA86RB
## 45   2.73621610  SE64PL
df<-unique(merge(ran_gg, df_hab_sc[,c("site", "Area_Open_Garden_Prop")], by = "site"))



plot(df$X.Intercept., df$Area_Open_Garden_Prop)

# dotplot()
# 
# r.squaredGLMM(global_garden)

#Open and Garden together

df_hab_sc$meanTemp_fac<-as.factor(df_hab_sc$meanTemp)

global_garden_rt<-glmmadmb(Area_Garden_Prop ~   auc_an_min_bio +(1|meanTemp_fac) ,  data = df_hab_sc, family="beta")


summary(global_garden_rt)
## 
## Call:
## glmmadmb(formula = Area_Garden_Prop ~ auc_an_min_bio + (1 | meanTemp_fac), 
##     data = df_hab_sc, family = "beta")
## 
## AIC: -558.3 
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)     -1.7084     0.1525  -11.20  < 2e-16 ***
## auc_an_min_bio  -0.3296     0.0726   -4.54  5.7e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Number of observations: total=261, meanTemp_fac=34 
## Random effect variance(s):
## Group=meanTemp_fac
##             Variance StdDev
## (Intercept)   0.7129 0.8443
## 
## Beta dispersion parameter: 16.837 (std. err.: 1.6157)
## 
## Log-likelihood: 283.167
glmm1<-glmer(siteDiversity ~ auc_an_min_bio+peak_bio+peak_anth + (1|site), data  =  df_hab_sc, family = poisson)
glmm2<-glmer(siteDiversity ~ peak_bio + peak_anth + (1|site)+ (1|meanTemp)+ (1|tempRange), data  =  df_hab_sc, family = poisson)
glmm3<-glmer(siteDiversity ~ peak_bio + (1|site)+ (1|meanTemp)+ (1|tempRange), data  =  df_hab_sc, family = poisson)
glmm4<-glmer(siteDiversity ~ peak_anth + (1|site)+ (1|meanTemp)+ (1|tempRange), data  =  df_hab_sc, family = poisson)
glmm5<-glmer(siteDiversity ~ auc_an_min_bio + (1|site)+ (1|meanTemp)+ (1|tempRange), data  =  df_hab_sc, family = poisson)

AIC(global, glmm1,glmm2,glmm3,glmm4,glmm5)
##        df      AIC
## global  9 1692.893
## glmm1   5 1684.971
## glmm2   6 1687.180
## glmm3   5 1685.304
## glmm4   5 1685.197
## glmm5   5 1684.984
df_hab_sc %>% 
  group_by(site) %>% 
  summarise(mean_auc_diff = mean(auc_an_min_bio), mean_area_green = mean(Area_Open_Garden_Prop)) %>% 
  ggplot(aes(x = mean_area_green, y = mean_auc_diff))+
  geom_point()+
  geom_smooth()

sum_df_hab_sc<-df_hab_sc %>% 
  group_by(site) %>% 
  summarise(mean_auc_diff = mean(auc_an_min_bio), mean_area_green = mean(Area_Open_Garden_Prop))


bmean<-betareg(sum_df_hab_sc$mean_area_green ~  sum_df_hab_sc$mean_auc_diff )


summary(bmean)
## 
## Call:
## betareg(formula = sum_df_hab_sc$mean_area_green ~ sum_df_hab_sc$mean_auc_diff)
## 
## Standardized weighted residuals 2:
##     Min      1Q  Median      3Q     Max 
## -1.9716 -0.6436 -0.0762  0.4321  2.5833 
## 
## Coefficients (mean model with logit link):
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                  0.08729    0.10362   0.842 0.399533    
## sum_df_hab_sc$mean_auc_diff -0.41304    0.10970  -3.765 0.000166 ***
## 
## Phi coefficients (precision model with identity link):
##       Estimate Std. Error z value Pr(>|z|)    
## (phi)    7.484      1.488   5.031 4.89e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
## 
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 17.36 on 3 Df
## Pseudo R-squared: 0.2433
## Number of iterations: 13 (BFGS) + 2 (Fisher scoring)
exp(cbind(OR = coef(bmean), confint(bmean)))
##                                       OR      2.5 %       97.5 %
## (Intercept)                    1.0912185  0.8906571 1.336943e+00
## sum_df_hab_sc$mean_auc_diff    0.6616375  0.5336343 8.203449e-01
## (phi)                       1780.2127413 96.3992012 3.287535e+04