Things that still need to be addressed
library(here)
library(dplyr)
library(reshape2)
library(ggplot2)
library(DescTools)
library(knitr)
library(dplyr)
library(lubridate)
library(pracma)
library(ordinal)
library(MASS)
library(ppcor)
library(lme4)
library(bbmle)
xy<-read.csv(here::here("allSitesDetailsCoordinates.csv"))
hab<-read.csv(here::here("allSitesHabitat.csv"))
area<-read.csv(here::here("siteAreaAndPerimeter.csv"))
temp<-read.csv(here::here("temperature.csv"))
garden<-read.csv(here::here("garden100m.csv"))
colnames(garden)[3]<-"Area_Garden"
open<-read.csv(here::here("openSpace100m.csv"))
colnames(open)[3]<-"Area_Open"
df<-readRDS("all_days_feature_extractions_2019_09_02.RDS")
df<-df %>%
dplyr::select(site, day, steepest_anth, steepest_bio, steepest_time_anth, steepest_time_bio, auc_anthro, auc_bio, n_peaks_anth, n_peaks_bio, auc_an_min_bio, peak_anth, peak_bio, peak_anth_time, peak_bio_time, steepest_dawn_anth, steepest_dawn_bio, auc_dawn_anthro, auc_dawn_bio, dawn_auc_an_min_bio, peak_dawn_anth, peak_dawn_bio, steepest_dusk_anth, steepest_dusk_bio, auc_dusk_anthro, auc_dusk_bio, dusk_auc_an_min_bio, peak_dusk_anth, peak_dusk_bio) %>%
distinct()
df_hab<-merge(df,hab, by.x = "site", by.y="Site" )
df_hab<-merge(df_hab, temp, by.x = "site", by.y = "Site")
df_hab<-merge(df_hab, xy[,c("SiteCode", "StartDate", "EndDate")], by.x = "site", by.y = "SiteCode")
df_hab<-merge(df_hab, open[,c(2,3)], by.x = "site", by.y = "SiteCode")
df_hab<-merge(df_hab, garden[,c(2,3)], by.x = "site", by.y = "SiteCode")
df_hab$n_peaks_anth[is.na(df_hab$n_peaks_anth)]<-0
df_hab$n_peaks_bio[is.na(df_hab$n_peaks_bio)]<-0
df_hab$tempRange<-df_hab$maxTemp - df_hab$minTemp
Remove start day and end day as these are incomplete days
df_hab$StartDate<-as.Date(df_hab$StartDate, format = "%d-%m-%y")
df_hab$EndDate<-as.Date(df_hab$EndDate, format = "%d-%m-%y")
df_hab<-df_hab %>%
group_by(site) %>%
filter(day != day(StartDate) & day != day(EndDate)) %>%
ungroup()
ggplot(df_hab, aes(x = Complexity, y = auc_an_min_bio, group = site,colour = site))+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
ggplot(df_hab, aes(x = siteDiversity, y = auc_an_min_bio, colour = site))+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
ggplot(df_hab, aes(x =Area_Open, y = auc_an_min_bio, colour = site))+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
ggplot(df_hab, aes(x =Area_Garden, y = auc_an_min_bio, colour = site))+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
ggplot(df_hab, aes(x =Area_Open+Area_Garden, y = auc_an_min_bio))+
geom_smooth()+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
ggplot(df_hab, aes(x =Area_Open+Area_Garden, y = peak_bio))+
geom_smooth()+
ylim(0,1)+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
ggplot(df_hab, aes(x =Area_Open+Area_Garden, y = peak_anth))+
geom_smooth()+
ylim(0,1)+
geom_jitter(width = 0.2)+
theme(legend.position = "none")
plot(df_hab[,c(9:13,21,22,36,43)])
vars<-colnames(df_hab[,c(9:13,21,22,36,43)])
df_hab_sc<-scale(df_hab[,vars])
df_hab_sc<-data.frame(Complexity = df_hab$Complexity, siteDiversity = df_hab$siteDiversity, site = df_hab$site,day = df_hab$day,Area_Garden = df_hab$Area_Garden, Area_Open = df_hab$Area_Open, df_hab_sc)
Ordinal logistic regression
df_hab_sc$Complexity<-ordered(as.factor(df_hab_sc$Complexity), levels = c("0","1", "2", "3", "4"))
m1<-polr(Complexity ~ auc_an_min_bio + peak_bio + peak_anth, data = df_hab_sc)
m2<-polr(Complexity ~ peak_bio + peak_anth, data = df_hab_sc)
m3<-polr(Complexity ~ peak_bio, data = df_hab_sc)
m4<-polr(Complexity ~ peak_anth, data = df_hab_sc)
m5<-polr(Complexity ~ auc_an_min_bio, data = df_hab_sc)
AIC(m1,m2,m3,m4,m5)
## df AIC
## m1 7 514.9236
## m2 6 514.9482
## m3 5 516.6499
## m4 5 514.1652
## m5 5 511.0033
Cumulative link mixed models from the ordinal package
df_hab_sc$site<-as.factor(df_hab_sc$site)
df_hab_sc$meanTemp_f<-as.factor(df_hab_sc$meanTemp)
glob_clm<-clmm(Complexity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth + peak_bio + peak_dawn_anth + peak_dawn_bio + (1|site), data = df_hab_sc)
summary(glob_clm)
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula:
## Complexity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth +
## peak_bio + peak_dawn_anth + peak_dawn_bio + (1 | site)
## data: df_hab_sc
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 261 -21.59 67.17 1241(27815) 1.45e+02 NaN
##
## Random effects:
## Groups Name Variance Std.Dev.
## site (Intercept) 13430 115.9
## Number of groups: site 45
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## n_peaks_anth 0.11605 NA NA NA
## n_peaks_bio -0.18960 NA NA NA
## auc_an_min_bio -0.38687 NA NA NA
## peak_anth -0.25221 NA NA NA
## peak_bio 0.15131 NA NA NA
## peak_dawn_anth 0.34113 NA NA NA
## peak_dawn_bio -0.06812 NA NA NA
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0|1 -58.23 NA NA
## 1|2 -36.96 NA NA
## 2|3 -16.46 NA NA
## 3|4 16.02 NA NA
gc1<-update(glob_clm, ~ . -n_peaks_anth)
gc2<-update(gc1, ~ . -n_peaks_bio)
gc3<-update(gc2, ~ . -peak_anth)
gc4<-update(gc3, ~ . -peak_dawn_anth)
gc5<-update(gc4, ~ . -peak_dawn_bio)
gc6<-update(gc5, ~ . -peak_bio)
AIC(glob_clm, gc1, gc2, gc3, gc4, gc5, gc6)
## df AIC
## glob_clm 12 67.17094
## gc1 11 63.88252
## gc2 10 63.52190
## gc3 9 64.64833
## gc4 8 59.43398
## gc5 7 57.80092
## gc6 6 55.94991
#
# summary(gc4)
# summary(gc5)
# summary(gc6)
global<-glmer(siteDiversity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth + peak_bio + peak_dawn_anth + peak_dawn_bio + (1|site) , data = df_hab_sc, family = poisson)
glmm1<-glmer(siteDiversity ~ auc_an_min_bio+peak_bio+peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm2<-glmer(siteDiversity ~ peak_bio + peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm3<-glmer(siteDiversity ~ peak_bio + (1|site), data = df_hab_sc, family = poisson)
glmm4<-glmer(siteDiversity ~ peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm5<-glmer(siteDiversity ~ auc_an_min_bio + (1|site), data = df_hab_sc, family = poisson)
AIC(global, glmm1,glmm2,glmm3,glmm4,glmm5)
## df AIC
## global 9 1692.893
## glmm1 5 1684.971
## glmm2 4 1683.286
## glmm3 3 1681.414
## glmm4 3 1681.306
## glmm5 3 1681.094
library(bbmle)
#df_hab_sc<-scale(df_hab[,vars])
df_hab_sc$siteDiversity <-as.numeric(df_hab_sc$siteDiversity )
global<-glmer(siteDiversity ~ n_peaks_anth + n_peaks_bio + auc_an_min_bio + peak_anth + peak_bio + peak_dawn_anth + peak_dawn_bio + (1|site), data = df_hab_sc, family = poisson)
glmm1<-glmer(siteDiversity ~ auc_an_min_bio+peak_bio+peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm2<-glmer(siteDiversity ~ peak_bio + peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm3<-glmer(siteDiversity ~ peak_bio + (1|site), data = df_hab_sc, family = poisson)
glmm4<-glmer(siteDiversity ~ peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm5<-glmer(siteDiversity ~ auc_an_min_bio + (1|site), data = df_hab_sc, family = poisson)
AICtab(global, glmm1,glmm2,glmm3,glmm4,glmm5)
## dAIC df
## glmm5 0.0 3
## glmm4 0.2 3
## glmm3 0.3 3
## glmm2 2.2 4
## glmm1 3.9 5
## global 11.8 9
Area Garden
Here we use the beta distribution as the response variable is a proportion - value between 0 and 1.
library(glmmTMB)
library(betareg)
library(glmmADMB)
df_hab$Area_Garden_Prop<-df_hab$Area_Garden/(pi*(100^2))
df_hab_sc$Area_Garden_Prop<-df_hab_sc$Area_Garden/(pi*(100^2))
df_hab$Area_Open_Prop<-df_hab$Area_Open/(pi*(100^2))
df_hab_sc$Area_Open_Prop<-df_hab_sc$Area_Open/(pi*(100^2))
df_hab$Area_Open_Garden_Prop<-df_hab$Area_Open_Prop+ df_hab$Area_Garden_Prop
df_hab_sc$Area_Open_Garden_Prop<-df_hab_sc$Area_Open_Prop+ df_hab_sc$Area_Garden_Prop
global_garden_admb<-glmmadmb(Area_Open_Garden_Prop ~ meanTemp +auc_an_min_bio + (1|site), data = df_hab_sc, family="beta", link = "logit")
g1<-glmmadmb(Area_Open_Garden_Prop ~ auc_an_min_bio + (1|site), data = df_hab_sc, family="beta")
gnull<-glmmadmb(Area_Open_Garden_Prop ~ meanTemp + (1|site), data = df_hab_sc, family="beta")
AICtab(global_garden_admb, g1, gnull)
## dAIC df
## g1 0.0 4
## gnull 1.2 4
## global_garden_admb 2.0 5
exp(cbind(OR = coef(g1), confint(g1)))
## OR 2.5 % 97.5 %
## (Intercept) 1.0702469 0.8215450 1.394237
## auc_an_min_bio 0.9472734 0.8615721 1.041499
exp(cbind(OR = coef(global_garden_admb), confint(global_garden_admb)))
## OR 2.5 % 97.5 %
## (Intercept) 1.0684402 0.8197724 1.392538
## meanTemp 1.0256206 0.7889417 1.333302
## auc_an_min_bio 0.9472105 0.8615150 1.041430
#global_garden_tmb<-glmmTMB(Area_Open_Garden_Prop ~ meanTemp + auc_an_min_bio , data = df_hab_sc, family = list(family = “beta”, link = “logit”))
b1<-betareg(Area_Open_Garden_Prop ~ auc_an_min_bio +meanTemp, data = df_hab_sc, link = "logit")
b2<-betareg(Area_Open_Garden_Prop ~ auc_an_min_bio , data = df_hab_sc, link = "logit")
bnull<-betareg(Area_Open_Garden_Prop ~ meanTemp , data = df_hab_sc, link = "logit")
AICtab(b1, b2, bnull)
## dAIC df
## b2 0.0 3
## b1 2.0 4
## bnull 76.4 3
exp(cbind(OR = coef(b2), confint(b2)))
## OR 2.5 % 97.5 %
## (Intercept) 1.0866832 1.0012290 1.179431e+00
## auc_an_min_bio 0.6752435 0.6209519 7.342818e-01
## (phi) 2833.5387623 780.0411911 1.029297e+04
ran_gg<-as.data.frame(ranef(global_garden_admb))
ran_gg<-as.data.frame(ranef(g1))
ran_gg$site<-row.names(ran_gg)
ran_gg %>%
arrange(X.Intercept.)
## X.Intercept. site
## 1 -1.89392814 SW1W0QP
## 2 -1.84739539 WC2H8LG
## 3 -1.60705147 NW10TA
## 4 -1.39616323 W84LA
## 5 -1.23677727 CR01SG
## 6 -1.04504426 W112NN
## 7 -0.98116338 SW112PN
## 8 -0.58305085 E29RR
## 9 -0.56966930 CR0
## 10 -0.46897784 SE6
## 11 -0.45885810 NW33RY
## 12 -0.45649080 W42PH
## 13 -0.40453676 SW154JY
## 14 -0.32154583 RM25EL
## 15 -0.30713423 RM41LD
## 16 -0.30479404 NW1
## 17 -0.29593926 E140EY
## 18 -0.28249446 E47EN
## 19 -0.24505140 SE84EA
## 20 -0.14059216 SE116DN
## 21 -0.07031144 NW32BZ
## 22 -0.02994089 TW76ER
## 23 0.01883176 NW23SH
## 24 0.04805794 N41ES
## 25 0.05959987 CR8
## 26 0.08185608 TW76BE
## 27 0.09483646 SE154EE
## 28 0.16665050 E105JP
## 29 0.21277665 SE41SA
## 30 0.24510562 SE3
## 31 0.33160232 HA53AA
## 32 0.33584900 RM143YB
## 33 0.35363084 RM41PL
## 34 0.37051816 BR67US
## 35 0.57227152 W55EQ
## 36 0.66528280 SE232NZ
## 37 0.71704710 CR05EF
## 38 0.74936703 TN147QB
## 39 0.77721440 BR20EG
## 40 0.82593374 SW154LA
## 41 1.07684917 E10NR
## 42 1.19412978 BR28LB
## 43 1.47016386 BR4
## 44 1.83736600 HA86RB
## 45 2.73621610 SE64PL
df<-unique(merge(ran_gg, df_hab_sc[,c("site", "Area_Open_Garden_Prop")], by = "site"))
plot(df$X.Intercept., df$Area_Open_Garden_Prop)
# dotplot()
#
# r.squaredGLMM(global_garden)
#Open and Garden together
df_hab_sc$meanTemp_fac<-as.factor(df_hab_sc$meanTemp)
global_garden_rt<-glmmadmb(Area_Garden_Prop ~ auc_an_min_bio +(1|meanTemp_fac) , data = df_hab_sc, family="beta")
summary(global_garden_rt)
##
## Call:
## glmmadmb(formula = Area_Garden_Prop ~ auc_an_min_bio + (1 | meanTemp_fac),
## data = df_hab_sc, family = "beta")
##
## AIC: -558.3
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.7084 0.1525 -11.20 < 2e-16 ***
## auc_an_min_bio -0.3296 0.0726 -4.54 5.7e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Number of observations: total=261, meanTemp_fac=34
## Random effect variance(s):
## Group=meanTemp_fac
## Variance StdDev
## (Intercept) 0.7129 0.8443
##
## Beta dispersion parameter: 16.837 (std. err.: 1.6157)
##
## Log-likelihood: 283.167
glmm1<-glmer(siteDiversity ~ auc_an_min_bio+peak_bio+peak_anth + (1|site), data = df_hab_sc, family = poisson)
glmm2<-glmer(siteDiversity ~ peak_bio + peak_anth + (1|site)+ (1|meanTemp)+ (1|tempRange), data = df_hab_sc, family = poisson)
glmm3<-glmer(siteDiversity ~ peak_bio + (1|site)+ (1|meanTemp)+ (1|tempRange), data = df_hab_sc, family = poisson)
glmm4<-glmer(siteDiversity ~ peak_anth + (1|site)+ (1|meanTemp)+ (1|tempRange), data = df_hab_sc, family = poisson)
glmm5<-glmer(siteDiversity ~ auc_an_min_bio + (1|site)+ (1|meanTemp)+ (1|tempRange), data = df_hab_sc, family = poisson)
AIC(global, glmm1,glmm2,glmm3,glmm4,glmm5)
## df AIC
## global 9 1692.893
## glmm1 5 1684.971
## glmm2 6 1687.180
## glmm3 5 1685.304
## glmm4 5 1685.197
## glmm5 5 1684.984
df_hab_sc %>%
group_by(site) %>%
summarise(mean_auc_diff = mean(auc_an_min_bio), mean_area_green = mean(Area_Open_Garden_Prop)) %>%
ggplot(aes(x = mean_area_green, y = mean_auc_diff))+
geom_point()+
geom_smooth()
sum_df_hab_sc<-df_hab_sc %>%
group_by(site) %>%
summarise(mean_auc_diff = mean(auc_an_min_bio), mean_area_green = mean(Area_Open_Garden_Prop))
bmean<-betareg(sum_df_hab_sc$mean_area_green ~ sum_df_hab_sc$mean_auc_diff )
summary(bmean)
##
## Call:
## betareg(formula = sum_df_hab_sc$mean_area_green ~ sum_df_hab_sc$mean_auc_diff)
##
## Standardized weighted residuals 2:
## Min 1Q Median 3Q Max
## -1.9716 -0.6436 -0.0762 0.4321 2.5833
##
## Coefficients (mean model with logit link):
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.08729 0.10362 0.842 0.399533
## sum_df_hab_sc$mean_auc_diff -0.41304 0.10970 -3.765 0.000166 ***
##
## Phi coefficients (precision model with identity link):
## Estimate Std. Error z value Pr(>|z|)
## (phi) 7.484 1.488 5.031 4.89e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Type of estimator: ML (maximum likelihood)
## Log-likelihood: 17.36 on 3 Df
## Pseudo R-squared: 0.2433
## Number of iterations: 13 (BFGS) + 2 (Fisher scoring)
exp(cbind(OR = coef(bmean), confint(bmean)))
## OR 2.5 % 97.5 %
## (Intercept) 1.0912185 0.8906571 1.336943e+00
## sum_df_hab_sc$mean_auc_diff 0.6616375 0.5336343 8.203449e-01
## (phi) 1780.2127413 96.3992012 3.287535e+04