#Statistics for Box Turtle Manuscript:
#save.image(file = "Manuscript.RData") #Saved 8-5-2016
#load("Manuscript.RData")
#Packages
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
library(ggplot2)
library(scales)
#library(gridExtra)
library(reshape)
##
## Attaching package: 'reshape'
## The following objects are masked from 'package:plyr':
##
## rename, round_any
#library(data.table)
#library(doBy)
library(plyr)
#library(splitstackshape)
library(ade4)
#library(adehabitatHR)
#library(adehabitatHS)
library(adehabitatLT)
## Loading required package: sp
## Loading required package: adehabitatMA
##
## Attaching package: 'adehabitatMA'
## The following object is masked from 'package:plyr':
##
## join
## Loading required package: CircStats
## Loading required package: MASS
## Loading required package: boot
##
## Attaching package: 'boot'
## The following object is masked from 'package:lattice':
##
## melanoma
##
## Attaching package: 'adehabitatLT'
## The following object is masked from 'package:plyr':
##
## id
#library(adehabitatMA)
#library(geosphere)
Overall Goal: Investigate the effects of temperature on daily movement patterns and habitat occupancy, and #determine how thermal variation influences thermoregulatory performance.
Overall Objective: To test the relationship between habitat and thermoregulatory performance. Determined the extent to which thermal habitat influences daily movement and habitat use of box turtles.
getwd()
## [1] "E:/R - Research Files/Manuscript Files"
setwd("E:/R - Research Files/Manuscript Files")
options(warn=-1)
Objective 1: Temperature-Movement Relationship (1) Movement would be positively related to body temperature
#Upload file
dist<-read.csv('Temp-Movement.csv', #1665 observations of 11 variables
sep=',',
header=T)
str(dist)
## 'data.frame': 1665 obs. of 11 variables:
## $ X : int 1 2 3 4 5 6 7 8 9 10 ...
## $ turtle : Factor w/ 12 levels "R3L1","R3L11",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ datehour: Factor w/ 1143 levels "2015-05-23 07:00:00",..: 14 15 16 17 18 19 20 21 22 23 ...
## $ N : int 12 12 12 12 12 12 12 12 12 12 ...
## $ Tb : num 14.2 16.9 18.7 23 23 ...
## $ sd : num 0.681 0.524 1.711 0.319 0.223 ...
## $ se : num 0.1967 0.1512 0.4938 0.0922 0.0645 ...
## $ ci : num 0.433 0.333 1.087 0.203 0.142 ...
## $ date : Factor w/ 1159 levels "5/23/2015 10:00",..: 25 26 27 14 15 16 17 18 19 20 ...
## $ dist.km : num 3.27e-04 2.81e-04 1.04e-04 2.65e-05 2.43e-05 ...
## $ dist.m : num 32.71 28.14 10.36 2.65 2.43 ...
head(dist)
## X turtle datehour N Tb sd se ci
## 1 1 R3L1 2015-05-24 07:00:00 12 14.20900 0.6814383 0.19671430 0.4329653
## 2 2 R3L1 2015-05-24 08:00:00 12 16.87950 0.5238004 0.15120814 0.3328069
## 3 3 R3L1 2015-05-24 09:00:00 12 18.72508 1.7106342 0.49381756 1.0868851
## 4 4 R3L1 2015-05-24 10:00:00 12 23.00417 0.3193037 0.09217505 0.2028759
## 5 5 R3L1 2015-05-24 11:00:00 12 23.01992 0.2234248 0.06449718 0.1419573
## 6 6 R3L1 2015-05-24 12:00:00 12 25.20750 2.1428533 0.61858846 1.3615040
## date dist.km dist.m
## 1 5/24/2015 7:00 0.000327107 32.710697
## 2 5/24/2015 8:00 0.000281428 28.142782
## 3 5/24/2015 9:00 0.000103645 10.364502
## 4 5/24/2015 10:00 0.000026500 2.653639
## 5 5/24/2015 11:00 0.000024300 2.425634
## 6 5/24/2015 12:00 0.000162664 16.266407
#Correcting for time
dist$datehour<-as.POSIXct(as.character(as.factor(dist$datehour)),format="%Y-%m-%d %H:%M:%S")
dist$time<-format(as.POSIXct(dist$datehour) ,format = "%H")
head(dist)
## X turtle datehour N Tb sd se ci
## 1 1 R3L1 2015-05-24 07:00:00 12 14.20900 0.6814383 0.19671430 0.4329653
## 2 2 R3L1 2015-05-24 08:00:00 12 16.87950 0.5238004 0.15120814 0.3328069
## 3 3 R3L1 2015-05-24 09:00:00 12 18.72508 1.7106342 0.49381756 1.0868851
## 4 4 R3L1 2015-05-24 10:00:00 12 23.00417 0.3193037 0.09217505 0.2028759
## 5 5 R3L1 2015-05-24 11:00:00 12 23.01992 0.2234248 0.06449718 0.1419573
## 6 6 R3L1 2015-05-24 12:00:00 12 25.20750 2.1428533 0.61858846 1.3615040
## date dist.km dist.m time
## 1 5/24/2015 7:00 0.000327107 32.710697 07
## 2 5/24/2015 8:00 0.000281428 28.142782 08
## 3 5/24/2015 9:00 0.000103645 10.364502 09
## 4 5/24/2015 10:00 0.000026500 2.653639 10
## 5 5/24/2015 11:00 0.000024300 2.425634 11
## 6 5/24/2015 12:00 0.000162664 16.266407 12
dist$time<-as.factor(as.character(dist$time))
#mm2 <- mm[mm[,1]!=2,]
#Removing hour 19 because it's the distance between 7pm and 7am (time when GPS was off)
dist<-dist[dist[,"time"]!=19,] #removes data based on condition in column/row
dist.sum<-summarySE(dist,
measurevar="dist.m",
groupvar="time")
#View(dist.sum) viewing data set
#dist.sum<-dist.sum[-c(13),] #hour 19 removed from data set
#Linear Model for Body Temperature and Movement
lm1<-lm(dist.m~Tb, data=dist)
summary(lm1)
##
## Call:
## lm(formula = dist.m ~ Tb, data = dist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.555 -10.442 -4.320 4.778 104.179
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.94591 1.91502 10.415 <2e-16 ***
## Tb -0.10750 0.08476 -1.268 0.205
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.94 on 1544 degrees of freedom
## Multiple R-squared: 0.001041, Adjusted R-squared: 0.0003937
## F-statistic: 1.608 on 1 and 1544 DF, p-value: 0.2049
Graphing average hour body temperature and distance moved per hour To see how it looks
ggplot(dist, aes(x=Tb,
y=dist.m)) +
xlab(expression(paste("Body Temperature (°C)"))) +
ylab(expression(paste("Distance (m)"))) +
#labs(title=(expression(paste("Body Temperature-Movement")))) +
xlim(10,35)+
ylim(0,130)+
theme(legend.position='none',
panel.grid.major = element_blank(),
panel.grid.minor =element_blank(),
panel.background =element_blank(),
panel.border =element_rect(color = "black",
fill = NA,
size=NA),
legend.background= element_rect(fill=FALSE),
legend.key = element_blank(),
legend.title = element_blank(),
axis.text = element_text("Times","bold","black"),
axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5)) +
geom_point() +
#eom_point(aes(colour=turtle), size=1) +
stat_smooth(method=lm, level=0.95,
fill="grey", colour="black", size=0)
#I don't listen to ggplot warnings
Distance and Time of Day graphing
limits <- aes(ymax = dist.m + se, ymin=dist.m - se)
ggplot(dist.sum, aes(x = time, y = dist.m)) +
xlab(expression(paste("Time of Day (hour)"))) +
ylab(expression(paste("Distance (m)"))) +
#labs(title=(expression(paste("Time of Day-Movement")))) +
#ylim(0,40)+
theme(legend.position='none',
panel.grid.major = element_blank(),
panel.grid.minor =element_blank(),
panel.background =element_blank(),
panel.border =element_rect(color = "black",
fill = NA,
size=NA),
legend.background= element_rect(fill=FALSE),
legend.key = element_blank(),
legend.title = element_blank(),
axis.text = element_text("Times","bold","black"),
axis.line.x = element_line(color="black", size = 0.5),
axis.line.y = element_line(color="black", size = 0.5))+
geom_bar(stat="identity", position=position_dodge())+
geom_errorbar(limits, width=0.25) +
scale_y_continuous(expand = c(0, 0),
limits = c(0, 35),
breaks =c(0,5,10,15,20,25,30,35))
head(dist)
## X turtle datehour N Tb sd se ci
## 1 1 R3L1 2015-05-24 07:00:00 12 14.20900 0.6814383 0.19671430 0.4329653
## 2 2 R3L1 2015-05-24 08:00:00 12 16.87950 0.5238004 0.15120814 0.3328069
## 3 3 R3L1 2015-05-24 09:00:00 12 18.72508 1.7106342 0.49381756 1.0868851
## 4 4 R3L1 2015-05-24 10:00:00 12 23.00417 0.3193037 0.09217505 0.2028759
## 5 5 R3L1 2015-05-24 11:00:00 12 23.01992 0.2234248 0.06449718 0.1419573
## 6 6 R3L1 2015-05-24 12:00:00 12 25.20750 2.1428533 0.61858846 1.3615040
## date dist.km dist.m time
## 1 5/24/2015 7:00 0.000327107 32.710697 07
## 2 5/24/2015 8:00 0.000281428 28.142782 08
## 3 5/24/2015 9:00 0.000103645 10.364502 09
## 4 5/24/2015 10:00 0.000026500 2.653639 10
## 5 5/24/2015 11:00 0.000024300 2.425634 11
## 6 5/24/2015 12:00 0.000162664 16.266407 12
qqnorm(dist$dist.m) #right skewed
qqline(dist$dist.m)
dist$log.dist.m<-log(dist$dist.m) #correct for skewness in data
qqnorm(dist$log.dist.m) #corrected data
qqline(dist$log.dist.m)
lm2<-lm(log.dist.m~time, dist)
summary(lm2)
##
## Call:
## lm(formula = log.dist.m ~ time, data = dist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.6875 -0.5477 0.0727 0.5782 2.2351
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.508220 0.078942 31.773 <2e-16 ***
## time08 0.206633 0.110994 1.862 0.0628 .
## time09 0.063802 0.111207 0.574 0.5662
## time10 0.161363 0.110994 1.454 0.1462
## time11 0.056257 0.110994 0.507 0.6123
## time12 -0.087876 0.111207 -0.790 0.4295
## time13 0.009031 0.111207 0.081 0.9353
## time14 -0.049207 0.110994 -0.443 0.6576
## time15 -0.069338 0.111207 -0.624 0.5330
## time16 -0.030018 0.110994 -0.270 0.7869
## time17 -0.091858 0.112086 -0.820 0.4126
## time18 -0.161229 0.111422 -1.447 0.1481
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8896 on 1534 degrees of freedom
## Multiple R-squared: 0.01326, Adjusted R-squared: 0.006181
## F-statistic: 1.874 on 11 and 1534 DF, p-value: 0.03856
A1<-aov(dist$log.dist.m~dist$time)
A1 #Estimated effects may be unbalanced
## Call:
## aov(formula = dist$log.dist.m ~ dist$time)
##
## Terms:
## dist$time Residuals
## Sum of Squares 16.3108 1214.0651
## Deg. of Freedom 11 1534
##
## Residual standard error: 0.8896277
## Estimated effects may be unbalanced
TukeyHSD(A1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dist$log.dist.m ~ dist$time)
##
## $`dist$time`
## diff lwr upr p adj
## 08-07 0.206632556 -0.1566634 0.569928561 0.7828420
## 09-07 0.063801588 -0.3001896 0.427792771 0.9999899
## 10-07 0.161363327 -0.2019327 0.524659332 0.9523482
## 11-07 0.056256564 -0.3070394 0.419552569 0.9999972
## 12-07 -0.087875846 -0.4518670 0.276115337 0.9997481
## 13-07 0.009030644 -0.3549605 0.373021827 1.0000000
## 14-07 -0.049206549 -0.4125026 0.314089456 0.9999993
## 15-07 -0.069337897 -0.4333291 0.294653286 0.9999762
## 16-07 -0.030018181 -0.3933142 0.333277824 1.0000000
## 17-07 -0.091857753 -0.4587267 0.275011234 0.9996418
## 18-07 -0.161229408 -0.5259253 0.203466459 0.9538980
## 09-08 -0.142830968 -0.5046996 0.219037671 0.9801825
## 10-08 -0.045269230 -0.4064386 0.315900146 0.9999997
## 11-08 -0.150375993 -0.5115454 0.210793383 0.9702104
## 12-08 -0.294508402 -0.6563770 0.067360237 0.2455429
## 13-08 -0.197601912 -0.5594706 0.164266728 0.8250427
## 14-08 -0.255839105 -0.6170085 0.105330271 0.4636465
## 15-08 -0.275970453 -0.6378391 0.085898187 0.3429839
## 16-08 -0.236650737 -0.5978201 0.124518639 0.5904231
## 17-08 -0.298490309 -0.6632535 0.066272880 0.2379733
## 18-08 -0.367861964 -0.7304394 -0.005284515 0.0431335
## 10-09 0.097561739 -0.2643069 0.459430378 0.9992759
## 11-09 -0.007545024 -0.3694137 0.354323615 1.0000000
## 12-09 -0.151677434 -0.5142440 0.210889121 0.9691349
## 13-09 -0.054770944 -0.4173375 0.307795611 0.9999978
## 14-09 -0.113008137 -0.4748768 0.248860503 0.9972152
## 15-09 -0.133139484 -0.4957060 0.229427070 0.9888648
## 16-09 -0.093819769 -0.4556884 0.268048871 0.9994999
## 17-09 -0.155659341 -0.5211149 0.209796236 0.9647523
## 18-09 -0.225030996 -0.5883050 0.138243007 0.6744849
## 11-10 -0.105106763 -0.4662761 0.256062613 0.9985276
## 12-10 -0.249239173 -0.6111078 0.112629467 0.5100461
## 13-10 -0.152332682 -0.5142013 0.209535957 0.9676723
## 14-10 -0.210569876 -0.5717393 0.150599500 0.7541443
## 15-10 -0.230701223 -0.5925699 0.131167416 0.6324909
## 16-10 -0.191381508 -0.5525509 0.169787868 0.8521616
## 17-10 -0.253221080 -0.6179843 0.111542110 0.4970600
## 18-10 -0.322592735 -0.6851702 0.039984714 0.1374953
## 12-11 -0.144132410 -0.5060010 0.217736230 0.9787371
## 13-11 -0.047225920 -0.4090946 0.314642720 0.9999995
## 14-11 -0.105463113 -0.4666325 0.255706263 0.9984812
## 15-11 -0.125594460 -0.4874631 0.236274179 0.9930341
## 16-11 -0.086274745 -0.4474441 0.274894631 0.9997728
## 17-11 -0.148114317 -0.5128775 0.216648873 0.9753431
## 18-11 -0.217485972 -0.5800634 0.145091477 0.7183834
## 13-12 0.096906490 -0.2656601 0.459473045 0.9993327
## 14-12 0.038669297 -0.3231993 0.400537937 0.9999999
## 15-12 0.018537949 -0.3440286 0.381104504 1.0000000
## 16-12 0.057857665 -0.3040110 0.419726305 0.9999961
## 17-12 -0.003981907 -0.3694375 0.361473670 1.0000000
## 18-12 -0.073353562 -0.4366276 0.289920440 0.9999571
## 14-13 -0.058237193 -0.4201058 0.303631446 0.9999958
## 15-13 -0.078368541 -0.4409351 0.284198014 0.9999151
## 16-13 -0.039048825 -0.4009175 0.322819814 0.9999999
## 17-13 -0.100888397 -0.4663440 0.264567180 0.9990961
## 18-13 -0.170260052 -0.5335341 0.193013950 0.9310727
## 15-14 -0.020131348 -0.3820000 0.341737292 1.0000000
## 16-14 0.019188368 -0.3419810 0.380357744 1.0000000
## 17-14 -0.042651204 -0.4074144 0.322111985 0.9999999
## 18-14 -0.112022859 -0.4746003 0.250554590 0.9974690
## 16-15 0.039319716 -0.3225489 0.401188355 0.9999999
## 17-15 -0.022519856 -0.3879754 0.342935721 1.0000000
## 18-15 -0.091891511 -0.4551655 0.271382491 0.9996048
## 17-16 -0.061839572 -0.4266028 0.302923617 0.9999928
## 18-16 -0.131211227 -0.4937887 0.231366222 0.9901246
## 18-17 -0.069371655 -0.4355291 0.296785788 0.9999775
Only statistical difference is between 18:00 and 08:00 where at 8am turtles move more than at 6pm
Thermal Preference Prep:
ZPTsel<-read.table("ZPTsel.dat",
sep="\t",
header=TRUE) #Pedro's data - 11 male box turtles
dateRef<-Sys.Date()
dateRefn<-Sys.Date() + 1
######################################
#Constant Thermal Preference (Tsel-C)#
######################################
#meanTselC<-mean(ZPTselDynamic$value, na.rm=TRUE)
#quantile(ZPTselDynamic$value, na.rm=TRUE)
#0% 25% 50% 75% 100%
#6.0 25.5 29.3 31.3 36.6
TselC.min<-25.5
TselC.max<-31.3
# New Tsel
date_time<-seq(as.POSIXct("2014-08-01 07:00:00"),
as.POSIXct("2014-08-03 07:00:00"),
length.out=289)
ZPTsel$date<-date_time[1:287]
ZPTselDynamic<-melt(ZPTsel, id.vars=c('date'))
meanTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
mean, na.rm=TRUE)
colnames(meanTselD)<-c("hour","mTsel")
medianTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
median, na.rm=TRUE)
colnames(medianTselD)<-c("hour","medianTsel")
maxTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
max, na.rm=TRUE)
colnames(maxTselD)<-c("hour","maxTsel")
minTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
min, na.rm=TRUE)
colnames(minTselD)<-c("hour","minTsel")
sdTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
sd, na.rm=TRUE)
colnames(sdTselD)<-c("hour","sdTsel")
nTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
length)
colnames(nTselD)<-c("hour","nTsel")
errorTselD<-1.96*(sdTselD$sdTsel/sqrt(nTselD$nTsel))
ZPTselD<-cbind(nTselD, meanTselD, medianTselD, minTselD, maxTselD, sdTselD, errorTselD)
ZPTselD<-ZPTselD[,-c(3,5,7,9,11)]
#View(ZPTselD)
ZPTselD$hour<-as.POSIXlt(ZPTselD$hour,format="%H:%M:%S")
ZPTselD$hour.day<-format(round(ZPTselD$hour, "hours"), "%H")
When given the opportunity, box turtles select warmer temperatures at night (when we incorporate time into the Tsel).
LULC.Habitat<-read.table(file = "clipboard",
sep = "\t",
header=TRUE)
View(LULC.Habitat)
Habitat Preference is based on Neu et al. 1972, using chi-square analysis. Based on the total area (456m buffer), the null hypothesis is that turtles would use each habitat proportionally. Proportionally higher indicates preference, proportionally lower indicidates avoidance.
Box turtles prefer deciduous and evergreen forest, as well as grassland and herbaceous. Shrub/scrub habitat is used as expected, and all other habitat types are used proportionally less (indicating avoidance).
#Upload file
box.turtle.data<-read.csv("2014 2015 Tb Tex db CORRECTED.csv",
header=TRUE,
sep="\t")
operant.data<-read.csv("2014 2015 Te de CORRECTED.csv",
header=TRUE,
sep=",")
#Time Correction for data.frames
box.turtle.data$date<-as.POSIXct(as.character(box.turtle.data$date,
format= "%Y-%m-%d %H:%M:%S"))
operant.data$date<-as.POSIXct(as.character(operant.data$date,
format= "%Y-%m-%d %H:%M:%S"))
str(box.turtle.data)
## 'data.frame': 89103 obs. of 12 variables:
## $ date : POSIXct, format: "2014-06-06 00:00:00" "2014-06-06 00:05:00" ...
## $ turtle : Factor w/ 26 levels "L1","L2","R1",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ Tb : num 18.4 18.3 18.3 18.3 18.2 ...
## $ Tex : num 17.3 17.2 17.2 17.1 17 ...
## $ season.year : Factor w/ 6 levels "fall2014","fall2015",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ season : Factor w/ 3 levels "fall","spring",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ db.NEW.signs: num -11.4 -11.5 -11.6 -11.6 -11.6 ...
## $ db.OLD.signs: num -10.9 -11 -11 -11 -11.1 ...
## $ db.OLD : num 7.09 7.16 7.22 7.22 7.28 ...
## $ hour.day : int 0 0 0 0 0 0 1 1 1 1 ...
## $ db.NEW : num 10.7 10.7 10.8 10.8 10.9 ...
str(operant.data)
## 'data.frame': 658009 obs. of 11 variables:
## $ date : POSIXct, format: "2014-06-06 00:00:00" "2014-06-06 00:00:00" ...
## $ probe : Factor w/ 90 levels "CDE-01","CDE-02",..: 71 72 73 74 75 76 77 78 79 80 ...
## $ Te : num 18.3 16.6 16.2 16.1 16.8 ...
## $ habitat : Factor w/ 2 levels "closed","open": 2 2 2 2 2 2 2 2 2 2 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ season : Factor w/ 3 levels "fall","spring",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ hour.day : int 0 0 0 0 0 0 0 0 0 0 ...
## $ de.NEW.signs: num -11.5 -13.3 -13.6 -13.8 -13.1 ...
## $ de.OLD.signs: num -11 -12.7 -13.1 -13.2 -12.6 ...
## $ de.NEW : num 10.8 12.5 12.9 13 12.3 ...
## $ de.OLD : num 7.16 8.93 9.29 9.42 8.75 ...
Thermal exploitation (Ex) is typically calculated as the following: (Time Tb in Tsel)/(Time Te in Tsel) This is represented in a 24-hour period calculate Ex for each day for each turtle
#Body Temperatures (Tb)
meanTb<-aggregate(list(mTb = box.turtle.data$Tb),
list(date = cut(box.turtle.data$date, "1 hour"),
turtle = box.turtle.data$turtle),
data=box.turtle.data,
mean)
head(meanTb)
## date turtle mTb
## 1 2014-06-10 00:00:00 L1 17.37267
## 2 2014-06-10 01:00:00 L1 17.08025
## 3 2014-06-10 02:00:00 L1 16.84533
## 4 2014-06-10 03:00:00 L1 16.71975
## 5 2014-06-10 04:00:00 L1 16.74075
## 6 2014-06-10 05:00:00 L1 16.80900
meanTb$date<-as.POSIXct(as.character(meanTb$date,
format="%Y-%m-%d %H:%M:%S"))
meanTb$hour.day<-format(round(meanTb$date, "hours"), "%H")
#View(meanTb)
#Shell Temperature
meanTex<-aggregate(list(mTex = box.turtle.data$Tex),
list(date = cut(box.turtle.data$date, "1 hour"),
turtle = box.turtle.data$turtle),
data=box.turtle.data,
mean)
head(meanTex)
## date turtle mTex
## 1 2014-06-10 00:00:00 L1 16.63283
## 2 2014-06-10 01:00:00 L1 16.14217
## 3 2014-06-10 02:00:00 L1 15.94992
## 4 2014-06-10 03:00:00 L1 15.89592
## 5 2014-06-10 04:00:00 L1 16.11475
## 6 2014-06-10 05:00:00 L1 16.25725
meanTex$date<-as.POSIXct(as.character(meanTex$date,
format="%Y-%m-%d %H:%M:%S"))
#View(meanTex)
meanTex$hour.day<-format(round(meanTex$date, "hours"), "%H")
#Operant Temperatures (Te)
meanTe<-aggregate(list(mTe = operant.data$Te),
list(date = cut(operant.data$date, "1 hour"),
year = operant.data$year,
season = operant.data$season),
data=operant.data,
mean)
meanTe$date<-as.POSIXct(as.character(meanTe$date,
format="%Y-%m-%d %H:%M:%S"))
#View(meanTe)
meanTe$hour.day<-format(round(meanTe$date, "hours"), "%H")
#Habitat Types
Open.Close.Te<-aggregate(list(mTe = operant.data$Te),
list(date = cut(operant.data$date, "1 hour"),
year = operant.data$year,
habitat = operant.data$habitat,
season = operant.data$season),
data=operant.data,
mean)
Open.Close.Te$date<-as.POSIXct(as.character(Open.Close.Te$date,
format="%Y-%m-%d %H:%M:%S"))
Open.Close.Te$hour.day<-format(round(Open.Close.Te$date, "hours"), "%H")
#Exploitation Index
#Constant (Tsel-C) Tb
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%
C.Tb<-ifelse(meanTb$mTb > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(meanTb$mTb < Tsel.min, "below",
"inside"))
head(C.Tb)
## [1] "below" "below" "below" "below" "below" "below"
meanTb$Tsel.C<-C.Tb
head(meanTb)
## date turtle mTb hour.day Tsel.C
## 1 2014-06-10 00:00:00 L1 17.37267 00 below
## 2 2014-06-10 01:00:00 L1 17.08025 01 below
## 3 2014-06-10 02:00:00 L1 16.84533 02 below
## 4 2014-06-10 03:00:00 L1 16.71975 03 below
## 5 2014-06-10 04:00:00 L1 16.74075 04 below
## 6 2014-06-10 05:00:00 L1 16.80900 05 below
#Dynamic (Tsel-D) Tb
D.Tb<-ifelse(meanTb$mTb[format(round(meanTb$date, "hours"), "%H")%in%meanTb$hour.day] < (ZPTselD$mTsel[as.numeric(meanTb$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTb$hour.day)+1]), "below",
ifelse(meanTb$mTb[format(round(meanTb$date, "hours"), "%H")%in%meanTb$hour.day] > (ZPTselD$mTsel[as.numeric(meanTb$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTb$hour.day)+1]), "above",
"inside"))
head(D.Tb)
## [1] "below" "below" "below" "below" "below" "below"
meanTb$Tsel.D<-D.Tb
#Environmental Temperatures
#Constant (Tsel-C) Te
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%
C.Te<-ifelse(meanTe$mTe > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(meanTe$mTe < Tsel.min, "below",
"inside"))
head(C.Te)
## [1] "below" "below" "below" "below" "below" "below"
meanTe$Tsel.C<-C.Te
head(meanTe)
## date year season mTe hour.day Tsel.C
## 1 2014-09-22 00:00:00 2014 fall 13.47166 00 below
## 2 2014-09-22 01:00:00 2014 fall 12.81672 01 below
## 3 2014-09-22 02:00:00 2014 fall 12.20028 02 below
## 4 2014-09-22 03:00:00 2014 fall 11.49313 03 below
## 5 2014-09-22 04:00:00 2014 fall 10.92334 04 below
## 6 2014-09-22 05:00:00 2014 fall 10.37895 05 below
#Dynamic (Tsel-D)
D.Te<-ifelse(meanTe$mTe[format(round(meanTe$date, "hours"), "%H")%in%meanTe$hour.day] < (ZPTselD$mTsel[as.numeric(meanTe$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTe$hour.day)+1]), "below",
ifelse(meanTe$mTe[format(round(meanTe$date, "hours"), "%H")%in%meanTe$hour.day] > (ZPTselD$mTsel[as.numeric(meanTe$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTe$hour.day)+1]), "above",
"inside"))
head(D.Te)
## [1] "below" "below" "below" "below" "below" "below"
meanTe$Tsel.D<-D.Te
#Open-Closed Habitat Comparison
#Constant (Tsel-C)
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%
C.TeOC<-ifelse(Open.Close.Te$mTe > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(Open.Close.Te$mTe < Tsel.min, "below",
"inside"))
head(C.TeOC)
## [1] "below" "below" "below" "below" "below" "below"
Open.Close.Te$Tsel.C<-C.TeOC
head(Open.Close.Te)
## date year habitat season mTe hour.day Tsel.C
## 1 2014-09-22 00:00:00 2014 closed fall 13.95541 00 below
## 2 2014-09-22 01:00:00 2014 closed fall 13.31886 01 below
## 3 2014-09-22 02:00:00 2014 closed fall 12.74671 02 below
## 4 2014-09-22 03:00:00 2014 closed fall 12.09879 03 below
## 5 2014-09-22 04:00:00 2014 closed fall 11.49964 04 below
## 6 2014-09-22 05:00:00 2014 closed fall 10.94159 05 below
#Dynamic (Tsel-D)
D.TeOC<-ifelse(Open.Close.Te$mTe[format(round(Open.Close.Te$date, "hours"), "%H")%in%Open.Close.Te$hour.day] < (ZPTselD$mTsel[as.numeric(Open.Close.Te$hour.day)+1] - ZPTselD$errorTsel[as.numeric(Open.Close.Te$hour.day)+1]), "below",
ifelse(Open.Close.Te$mTe[format(round(Open.Close.Te$date, "hours"), "%H")%in%Open.Close.Te$hour.day] > (ZPTselD$mTsel[as.numeric(Open.Close.Te$hour.day)+1] + ZPTselD$errorTsel[as.numeric(Open.Close.Te$hour.day)+1]), "above",
"inside"))
head(D.TeOC)
## [1] "below" "below" "below" "below" "below" "below"
Open.Close.Te$Tsel.D<-D.TeOC
#Aggregation of data
#Tb in Tsel
Tb.Ex.C<-aggregate(Tsel.C ~ cut(meanTb$date, "1 day") + meanTb$turtle,
data=meanTb,
function(x) sum(x==c("inside")))
Tb.Ex.D<-aggregate(Tsel.D ~ cut(meanTb$date, "1 day") + meanTb$turtle,
data=meanTb,
function(x) sum(x==c("inside")))
Tb.Ex.C<-rename(Tb.Ex.C, c('cut(meanTb$date, "1 day")' = "date"))
Tb.Ex.D<-rename(Tb.Ex.D, c('cut(meanTb$date, "1 day")' = "date"))
head(Tb.Ex.C)
## date meanTb$turtle Tsel.C
## 1 2014-06-10 00:00:00 L1 0
## 2 2014-06-11 00:00:00 L1 0
## 3 2014-06-12 00:00:00 L1 2
## 4 2014-06-13 00:00:00 L1 3
## 5 2014-06-14 00:00:00 L1 2
## 6 2014-06-15 00:00:00 L1 4
head(Tb.Ex.D)
## date meanTb$turtle Tsel.D
## 1 2014-06-10 00:00:00 L1 0
## 2 2014-06-11 00:00:00 L1 0
## 3 2014-06-12 00:00:00 L1 1
## 4 2014-06-13 00:00:00 L1 1
## 5 2014-06-14 00:00:00 L1 1
## 6 2014-06-15 00:00:00 L1 2
#Te in Tsel
Te.Ex.C<-aggregate(Tsel.C ~ cut(meanTe$date, "1 day") + meanTe$season + meanTe$year,
data=meanTe,
function(x) sum(x==c("inside")))
Te.Ex.D<-aggregate(Tsel.D ~ cut(meanTe$date, "1 day") + meanTe$season + meanTe$year,
data=meanTe,
function(x) sum(x==c("inside")))
Te.Ex.C<-rename(Te.Ex.C, c('cut(meanTe$date, "1 day")' = "date"))
Te.Ex.D<-rename(Te.Ex.D, c('cut(meanTe$date, "1 day")' = "date"))
head(Te.Ex.C)
## date meanTe$season meanTe$year Tsel.C
## 1 2014-09-22 00:00:00 fall 2014 0
## 2 2014-09-23 00:00:00 fall 2014 0
## 3 2014-09-24 00:00:00 fall 2014 0
## 4 2014-09-25 00:00:00 fall 2014 0
## 5 2014-09-26 00:00:00 fall 2014 2
## 6 2014-09-27 00:00:00 fall 2014 1
head(Te.Ex.D)
## date meanTe$season meanTe$year Tsel.D
## 1 2014-09-22 00:00:00 fall 2014 0
## 2 2014-09-23 00:00:00 fall 2014 0
## 3 2014-09-24 00:00:00 fall 2014 0
## 4 2014-09-25 00:00:00 fall 2014 0
## 5 2014-09-26 00:00:00 fall 2014 0
## 6 2014-09-27 00:00:00 fall 2014 0
#Habitat Types in Tsel
Open.Close.Te.Ex.C<-aggregate(Tsel.C ~ cut(Open.Close.Te$date, "1 day") + Open.Close.Te$season + Open.Close.Te$year + Open.Close.Te$habitat,
data=Open.Close.Te,
function(x) sum(x==c("inside")))
Open.Close.Te.Ex.D<-aggregate(Tsel.D ~ cut(Open.Close.Te$date, "1 day") + Open.Close.Te$season + Open.Close.Te$year + Open.Close.Te$habitat,
data=Open.Close.Te,
function(x) sum(x==c("inside")))
Open.Close.Te.Ex.C<-rename(Open.Close.Te.Ex.C, c('cut(Open.Close.Te$date, "1 day")' = "date"))
Open.Close.Te.Ex.D<-rename(Open.Close.Te.Ex.D, c('cut(Open.Close.Te$date, "1 day")' = "date"))
head(Open.Close.Te.Ex.C)
## date Open.Close.Te$season Open.Close.Te$year
## 1 2014-09-22 00:00:00 fall 2014
## 2 2014-09-23 00:00:00 fall 2014
## 3 2014-09-24 00:00:00 fall 2014
## 4 2014-09-25 00:00:00 fall 2014
## 5 2014-09-26 00:00:00 fall 2014
## 6 2014-09-27 00:00:00 fall 2014
## Open.Close.Te$habitat Tsel.C
## 1 closed 0
## 2 closed 0
## 3 closed 0
## 4 closed 0
## 5 closed 0
## 6 closed 0
head(Open.Close.Te.Ex.D)
## date Open.Close.Te$season Open.Close.Te$year
## 1 2014-09-22 00:00:00 fall 2014
## 2 2014-09-23 00:00:00 fall 2014
## 3 2014-09-24 00:00:00 fall 2014
## 4 2014-09-25 00:00:00 fall 2014
## 5 2014-09-26 00:00:00 fall 2014
## 6 2014-09-27 00:00:00 fall 2014
## Open.Close.Te$habitat Tsel.D
## 1 closed 0
## 2 closed 0
## 3 closed 0
## 4 closed 0
## 5 closed 0
## 6 closed 0
#Percent Te in Tsel for Habitat Types
Open.Close.Te.Ex.C$Te.Prnct.C<-Open.Close.Te.Ex.C$Tsel.C/24
Open.Close.Te.Ex.D$Te.Prnct.D<-Open.Close.Te.Ex.D$Tsel.D/24
Open.Close.Te.Ex.C<-rename(Open.Close.Te.Ex.C,c('Open.Close.Te$year' = 'year',
'Open.Close.Te$habitat' = 'habitat',
'Open.Close.Te$season' = 'season'))
Open.Close.Te.Ex.D<-rename(Open.Close.Te.Ex.D,c('Open.Close.Te$year' = 'year',
'Open.Close.Te$habitat' = 'habitat',
'Open.Close.Te$season' = 'season'))
Habitat.Prcnt.C<-aggregate(list( Mean = Open.Close.Te.Ex.C$Te.Prnct.C),
list(habitat = Open.Close.Te.Ex.C$habitat,
year = Open.Close.Te.Ex.C$year,
season = Open.Close.Te.Ex.C$season),
mean)
head(Habitat.Prcnt.C)
## habitat year season Mean
## 1 closed 2014 fall 0.000000000
## 2 open 2014 fall 0.098958333
## 3 closed 2015 fall 0.006666667
## 4 open 2015 fall 0.123333333
## 5 closed 2014 spring 0.044444444
## 6 open 2014 spring 0.163888889
Habitat.Prcnt.D<-aggregate(list( Mean = Open.Close.Te.Ex.D$Te.Prnct.D),
list(habitat = Open.Close.Te.Ex.D$habitat,
year = Open.Close.Te.Ex.D$year,
season = Open.Close.Te.Ex.D$season),
mean)
head(Habitat.Prcnt.D)
## habitat year season Mean
## 1 closed 2014 fall 0.00000000
## 2 open 2014 fall 0.03385417
## 3 closed 2015 fall 0.01000000
## 4 open 2015 fall 0.05666667
## 5 closed 2014 spring 0.03888889
## 6 open 2014 spring 0.05833333
#Open-Closed Habitat Comparison
qqnorm(Open.Close.Te.Ex.C$Te.Prnct.C) #bimodal possibly?
qqline(Open.Close.Te.Ex.C$Te.Prnct.C)
#Tsel-C Comparison of Te in Tsel
t.test(Te.Prnct.C ~ habitat, Open.Close.Te.Ex.C)
##
## Welch Two Sample t-test
##
## data: Te.Prnct.C by habitat
## t = -13.639, df = 238.62, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1842277 -0.1377268
## sample estimates:
## mean in group closed mean in group open
## 0.02439693 0.18537415
qqnorm(Open.Close.Te.Ex.D$Te.Prnct.D) #More bimodal possibly?
qqline(Open.Close.Te.Ex.D$Te.Prnct.D)
#Tsel-C Comparison of Te in Tsel
t.test(Te.Prnct.D ~ habitat, Open.Close.Te.Ex.D)
##
## Welch Two Sample t-test
##
## data: Te.Prnct.D by habitat
## t = -6.5141, df = 292.35, p-value = 3.189e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07151030 -0.03832531
## sample estimates:
## mean in group closed mean in group open
## 0.0312500 0.0861678
In general, closed habitats had less time percentage of Te in Tsel. Furthermore, there is a significant difference between time Te in Tsel for both calculation. So why do box turtles select forested habitat? Could it be due to the conditions at the northern limit of their distribution?
#Merging Data for Thermal Exploitation
#Data:
#Tb.Ex.C
#Tb.Ex.D
#Te.Ex.C
#Te.Ex.D
#Constant Calculation
Tb.Ex.C<-rename(Tb.Ex.C, c('cut(meanTb$date, "1 day")' = "date"))
Te.Ex.C<-rename(Te.Ex.C, c('cut(meanTe$date, "1 day")' = "date"))
Ex.Constant<-merge(Tb.Ex.C, Te.Ex.C, by='date')
head(Ex.Constant)
## date meanTb$turtle Tsel.C.x meanTe$season meanTe$year
## 1 2014-06-06 00:00:00 R1 4 spring 2014
## 2 2014-06-07 00:00:00 R1 3 spring 2014
## 3 2014-06-07 00:00:00 R10 0 spring 2014
## 4 2014-06-08 00:00:00 R1 0 spring 2014
## 5 2014-06-08 00:00:00 R3 0 spring 2014
## 6 2014-06-08 00:00:00 R10 0 spring 2014
## Tsel.C.y
## 1 5
## 2 6
## 3 6
## 4 0
## 5 0
## 6 0
Ex.Constant<-rename(Ex.Constant, c("Tsel.C.x" = "Tsel.Tb",
'meanTe$season' = 'season',
'meanTe$year' = 'year',
'Tsel.C.y' = 'Tsel.Te'))
Ex.Constant$Tb.Prcnt<-Ex.Constant$Tsel.Tb/24
Ex.Constant$Te.Prcnt<-Ex.Constant$Tsel.Te/24
Ex.Constant$Ex<-Ex.Constant$Tb.Prcnt/Ex.Constant$Te.Prcnt
#Ex.Constant
#Dynamic Calculation
Tb.Ex.D<-rename(Tb.Ex.D, c('cut(meanTb$date, "1 day")' = "date"))
Te.Ex.D<-rename(Te.Ex.D, c('cut(meanTe$date, "1 day")' = "date"))
Ex.Dynamic<-merge(Tb.Ex.D, Te.Ex.D, by='date')
head(Ex.Dynamic)
## date meanTb$turtle Tsel.D.x meanTe$season meanTe$year
## 1 2014-06-06 00:00:00 R1 1 spring 2014
## 2 2014-06-07 00:00:00 R1 1 spring 2014
## 3 2014-06-07 00:00:00 R10 0 spring 2014
## 4 2014-06-08 00:00:00 R1 0 spring 2014
## 5 2014-06-08 00:00:00 R3 0 spring 2014
## 6 2014-06-08 00:00:00 R10 0 spring 2014
## Tsel.D.y
## 1 0
## 2 4
## 3 4
## 4 0
## 5 0
## 6 0
Ex.Dynamic<-rename(Ex.Dynamic, c("Tsel.D.x" = "Tsel.Tb",
'meanTe$season' = 'season',
'meanTe$year' = 'year',
'Tsel.D.y' = 'Tsel.Te'))
Ex.Dynamic$Tb.Prcnt<-Ex.Dynamic$Tsel.Tb/24
Ex.Dynamic$Te.Prcnt<-Ex.Dynamic$Tsel.Te/24
Ex.Dynamic$Ex<-Ex.Dynamic$Tb.Prcnt/Ex.Dynamic$Te.Prcnt
#Ex.Dynamic
#View(Ex.Dynamic)
Here’s where it gets a little tricky with the Thermal exploitation. Issues that we run into are: 1. Division by zero, where Te in Tsel is not observed in a 24-h period yet Tb in Tsel is (possibly finding thermal niches that the models weren’t placed in? Don’t know, we placed a lot of models) 2. Tb in Tsel being greater than Te in Tsel (values over 100%)
In general, habitat selected doesn’t reflect thermal preference because open habitats are significantly higher (%Te in Tsel) than closed habitats.
Box turtles would rely on thermoconformity given their limited capacity to seek out basking sites.
I. How accurate are box turtles at maintaining thermal preference (thermal accuracy (db))?
*Report the mean +/- SE of db to compare with other studies of turtles
Statistics done by Hank, with some modifications
library(dplyr) # data manipulation
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:adehabitatLT':
##
## id
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:reshape':
##
## rename
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lattice) # graphics
library(ggplot2) # graphics
library(lme4) # stats for mixed models
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:reshape':
##
## expand
#library(R2jags) # Bayesian analysis
df <- read.csv("2014 2015 Tb Tex db CORRECTED.csv",
header=T,
sep='\t')
## Assess only the complete data set
df <- df[complete.cases(df), ]
# transform and relabel for convenience
df$y <- df$year - 2014 # this will be useful when I do the Bayesian model
df$yf <- as.factor(df$y)
df$season <- factor(df$season, levels=c("spring", "summer", "fall"))
# check structure
# with(df, table(season,turtle, year))
# Note: there are different turtles in different seasons and years. We have to assume that turtles do not vary systematically across years and seasons.
# Convert time and date into variables that are more useful
t <- strptime(df$date, "%Y-%m-%d %H:%M:%S")
## samples is the relative time (H/M) within a d day
df$samples <- as.numeric(format(t, "%H")) + as.numeric(format(t, "%M"))/60 +
as.numeric(format(t, "%S"))/(60*60)
## day is the date
df$day <- as.numeric(format(t, "%y")) +
as.numeric(format(t, "%m"))/12 + as.numeric(format(t, "%d")) / 365
# xyplot( Tb ~ Tex|turtle, groups = day, data=df , type="smooth")
# log ratio
# > 0 means body temp > shell
# < 0 mean body temp < shell
df$log.ratio <- log( df$Tb/df$Tex )
#A few graphs to get the picture of the times series.
# one day, one turtle
xyplot(db.NEW.signs ~ samples, groups = turtle, data = df, type="l",
subset = day == min(day) )
# one turtle
r1l3 <- subset(df, turtle == "R1L3")
xyplot(db.NEW.signs ~ samples, groups = day, data = r1l3, type="l")
#
# ggplot(data=r1l3, aes(samples, db.NEW.setp,
# colour = as.factor(day),
# group=as.factor(day))) + geom_path()
#Aggregate within turtles and days
#This will make summarize turtle teps for each day,
#including average dbdb and standard deviation of body
#and shell temperatures.
a1 <- select(df, turtle, season, yf, samples, day, Tb, Tex,
db.NEW.signs, db.OLD.signs, log.ratio)
# It was suggested we not use fall data...
# a1s <- filter(a1, season != "fall")
a1s <- a1
a2 <- group_by(a1s, turtle, day, season, yf)
a3 <- summarise(a2,
mn.b = mean(Tb),
mn.x = mean(Tex),
sd.b = sd(Tb),
sd.x = sd( Tex ),
sd.xb = sd.x - sd.b,
mn.db.new = mean( abs( db.NEW.signs ) ),
mn.db.old = mean( abs( db.OLD.signs ) ),
sd.db.new = sd(db.NEW.signs ),
sd.db.old = sd( db.OLD.signs )
)
Here we address this question using a graph and stats.
ggplot(a3, aes(season, mn.b, colour = yf) ) + geom_boxplot()
#Stats.
m0 <- lmer(mn.b ~ season * yf + (1|turtle), data = a3)
m1 <- update(m0, . ~ . - season:yf)
anova(m0, m1)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## m1: mn.b ~ season + yf + (1 | turtle)
## m0: mn.b ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 6 1465.5 1488.0 -726.75 1453.5
## m0 8 1448.5 1478.6 -716.25 1432.5 20.998 2 2.757e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## m1: mn.b ~ season + yf + (1 | turtle)
## m0: mn.b ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 6 1465.5 1488.0 -726.75 1453.5
## m0 8 1448.5 1478.6 -716.25 1432.5 20.998 2 2.757e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Depends on the interaction.
# Create a variable that includes each season x year combo as a different factor levels (3 seasons x 2 y).
sxy <- interaction(a3$season, a3$yf, drop=TRUE)
# Set fall of the first year as the control. This allows all the model coefficients to test whether the other levels differ from fall 2014
sxy <- relevel(sxy, "fall.0")
# test whether some year-seasons differ from fall 2014
m0b <- lmer(mn.b ~ sxy + (1|turtle), data = a3)
bm0 <- confint(m0b, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
bm0
## 2.5 % 97.5 %
## .sig01 0.2527804 1.204139
## .sigma 2.0592846 2.446191
## (Intercept) 14.5447035 16.321692
## sxyspring.0 4.1881884 6.698315
## sxysummer.0 3.7343195 6.448430
## sxyspring.1 3.1211458 5.800662
## sxysummer.1 5.0479929 7.696827
## sxyfall.1 2.7615059 6.116983
#All season:year combos are greater than fall 2014
#Does daily dbdb vary by season, year, or the interaction?
#Similar to body temp approach, above.
ggplot(a3, aes(season, mn.db.new, colour = yf) ) + geom_boxplot()
m0x <- lmer(mn.db.new ~ season * yf + (1|turtle), data = a3)
qplot( fitted(m0x), resid(m0x)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
qqnorm(resid(m0x))
qqline(resid(m0x))
#Data conform reasonably well to model assumptions. Test season by year effect.
m1x <- update(m0x, . ~ . - season:yf)
anova(m0x, m1x)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## m1x: mn.db.new ~ season + yf + (1 | turtle)
## m0x: mn.db.new ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1x 6 1439.7 1462.2 -713.85 1427.7
## m0x 8 1419.8 1449.9 -701.92 1403.8 23.863 2 6.579e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Yes, it depends on the interaction
# test whether some year-seasons differ from fall 2014
m0c <- lmer(mn.db.new ~ sxy + (1|turtle), data = a3)
dm0 <- confint(m0b, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
dm0
## 2.5 % 97.5 %
## .sig01 0.1463693 1.157309
## .sigma 2.0822360 2.431359
## (Intercept) 14.6145243 16.213159
## sxyspring.0 4.1745865 6.845189
## sxysummer.0 3.7300530 6.243952
## sxyspring.1 3.2033061 5.643689
## sxysummer.1 5.2543491 7.517060
## sxyfall.1 2.7459781 5.943480
The db in all seasons differs from db of fall 2014.
Seems like fall 2014 was a cold-ass season.
Environmental data management.
env <- read.table("2014 2015 Te de CORRECTED.csv", header=T, sep=",")
str(env)
## 'data.frame': 658009 obs. of 11 variables:
## $ date : Factor w/ 33245 levels "2014-06-06 00:00:00",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ probe : Factor w/ 90 levels "CDE-01","CDE-02",..: 71 72 73 74 75 76 77 78 79 80 ...
## $ Te : num 18.3 16.6 16.2 16.1 16.8 ...
## $ habitat : Factor w/ 2 levels "closed","open": 2 2 2 2 2 2 2 2 2 2 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ season : Factor w/ 3 levels "fall","spring",..: 2 2 2 2 2 2 2 2 2 2 ...
## $ hour.day : int 0 0 0 0 0 0 0 0 0 0 ...
## $ de.NEW.signs: num -11.5 -13.3 -13.6 -13.8 -13.1 ...
## $ de.OLD.signs: num -11 -12.7 -13.1 -13.2 -12.6 ...
## $ de.NEW : num 10.8 12.5 12.9 13 12.3 ...
## $ de.OLD : num 7.16 8.93 9.29 9.42 8.75 ...
env$yf <- as.factor( env$year - 2014 )
env$season <- factor(env$season, levels=c("spring", "summer", "fall"))
t <- strptime(env$date, "%Y-%m-%d %H:%M:%S")
env$samples <- as.numeric(format(t, "%H")) +
as.numeric(format(t, "%M"))/60 +
as.numeric(format(t, "%S"))/60^2
env$day <- as.numeric(format(t, "%y")) +
as.numeric(format(t, "%m"))/12 + as.numeric(format(t, "%d")) / 365
summary(env)
## date probe Te
## 2014-06-06 00:00:00: 57 Closed1 : 14544 Min. : 2.583
## 2014-06-06 00:15:00: 57 Closed10: 14544 1st Qu.:17.431
## 2014-06-06 00:30:00: 57 Closed11: 14544 Median :20.314
## 2014-06-06 00:45:00: 57 Closed12: 14544 Mean :20.345
## 2014-06-06 01:00:00: 57 Closed13: 14544 3rd Qu.:23.085
## 2014-06-06 01:15:00: 57 Closed2 : 14544 Max. :50.813
## (Other) :657667 (Other) :570745
## habitat year season hour.day
## closed:362747 Min. :2014 spring:238607 Min. : 0.00
## open :295262 1st Qu.:2014 summer:235658 1st Qu.: 5.00
## Median :2015 fall :183744 Median :11.00
## Mean :2015 Mean :11.47
## 3rd Qu.:2015 3rd Qu.:17.00
## Max. :2015 Max. :23.00
##
## de.NEW.signs de.OLD.signs de.NEW de.OLD
## Min. :-26.170 Min. :-26.717 Min. : 0.000 Min. : 0.000
## 1st Qu.:-10.796 1st Qu.:-11.869 1st Qu.: 3.475 1st Qu.: 2.659
## Median : -7.571 Median : -8.986 Median : 6.858 Median : 5.252
## Mean : -7.362 Mean : -8.955 Mean : 7.176 Mean : 5.833
## 3rd Qu.: -3.969 3rd Qu.: -6.215 3rd Qu.:10.051 3rd Qu.: 8.126
## Max. : 21.792 Max. : 21.513 Max. :25.447 Max. :22.917
##
## yf samples day
## 0:239978 Min. : 0.000 Min. :14.52
## 1:418031 1st Qu.: 5.833 1st Qu.:14.81
## Median :11.833 Median :15.54
## Mean :11.872 Mean :15.29
## 3rd Qu.:17.833 3rd Qu.:15.66
## Max. :23.983 Max. :15.84
##
#Take a peak at the two different ways of assessing environmental quality.
env14 <- subset( env, year == 2014 & day == min(day) )
dim(env14)
## [1] 5472 14
names(env14)
## [1] "date" "probe" "Te" "habitat"
## [5] "year" "season" "hour.day" "de.NEW.signs"
## [9] "de.OLD.signs" "de.NEW" "de.OLD" "yf"
## [13] "samples" "day"
xyplot(de.NEW.signs ~ samples, groups = probe, data = env14, type="l")
xyplot(de.OLD.signs ~ samples, groups = probe, data = env14, type="l")
xyplot(de.NEW.signs ~ de.OLD.signs, groups = probe, data = env14, type="p")
#Aggregate within day and operant model ('probe').
e1 <- select(env, probe, season, yf, habitat, samples, day,
Te, de.NEW.signs, de.OLD.signs)
e2 <- group_by(e1, probe, day, season, yf)
e3 <- summarise(e2,
habitat = unique(habitat),
mn.e = mean(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
ggplot(e3, aes(season, mn.e, colour = yf) ) + geom_boxplot()
ggplot(e3, aes(season, mn.de.new, colour = yf) ) + geom_boxplot()
ggplot(e3, aes(season, mn.de.old, colour = yf) ) + geom_boxplot()
#Does daily temperature quality vary by season, year, or the interaction?
m0 <- lmer(log(mn.de.new) ~ season * yf + (1|probe), data = e3)
qplot(fitted(m0), resid(m0)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
qqnorm(resid(m0)); qqline(resid(m0))
# assumptions reasonably met.
m1 <- update(m0, . ~ . - season:yf)
anova(m0, m1)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## m1: log(mn.de.new) ~ season + yf + (1 | probe)
## m0: log(mn.de.new) ~ season * yf + (1 | probe)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## m1 6 1353.2 1392.8 -670.61 1341.2
## m0 8 1088.6 1141.4 -536.32 1072.6 268.58 2 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Depends on the interaction. Thermal quality de depends on season x year interaction.
Pending.
db=|Tse-Tb| de=|Tsel-Te| E’=de-db
#Use only operants from closed habitat. We need to justify this.
#justification:
#Box turtles remain in closed habitats according to chi-square of LULC values.
e4 <- group_by(e1, day, season, habitat, yf)
e5 <- summarise(e4,
mn.e = mean(Te),
sd.e = sd(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
# DAILY MEAN TEMPS
ggplot(e5, aes(season, mn.e, colour = yf) ) +
geom_boxplot() +
facet_grid(.~habitat)
# DAILY STANDARD DEV OF TEMPS
ggplot(e5, aes(season, sd.e, colour = yf) ) +
geom_boxplot() +
facet_grid(.~habitat)
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
#How does this (or something else) justify not using operants from the open?
e1ss <- filter(e1, habitat == "closed")
e2a <- group_by(e1ss, day, season, yf)
e3a <- summarise(e2a,
mn.e = mean(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
a4 <- select(a3, turtle, day, season, yf, mn.db.new, mn.db.old, sd.db.new, sd.db.old)
e4 <- subset(e3a, select=c(day, mn.de.new, mn.de.old, sd.de.new, sd.de.old))
ae4 <- left_join(a4, e4, by = "day")
#ae3 <- merge(a4, e4, by = "day", keep.x=TRUE)
dim(a4)
## [1] 317 8
## [1] 317 8
dim(ae4)
## [1] 317 12
## [1] 317 12
ae4$E.old <- (ae4$mn.de.old - ae4$mn.db.old)
ae4$V.old <- (ae4$sd.de.old - ae4$sd.db.old)
ae4$E.new <- (ae4$mn.de.new - ae4$mn.db.new)
ae4$V.new <- (ae4$sd.de.new - ae4$sd.db.new)
hist(ae4$E.new, main = "Efficiency based on closed habitat probes")
hist(ae4$E.old, main = "Efficiency based on closed habitat probes")
hist(ae4$V.new)
hist(ae4$V.old)
names(ae4)
## [1] "turtle" "day" "season" "yf" "mn.db.new"
## [6] "mn.db.old" "sd.db.new" "sd.db.old" "mn.de.new" "mn.de.old"
## [11] "sd.de.new" "sd.de.old" "E.old" "V.old" "E.new"
## [16] "V.new"
## [1] "turtle" "day" "season" "yf" "mn.db.new"
## [6] "mn.db.old" "sd.db.new" "sd.db.old" "mn.de.new" "mn.de.old"
## [11] "sd.de.new" "sd.de.old" "E.old" "V.old" "E.new"
## [16] "V.new"
ggplot(data=ae4, aes(season, E.new, colour=yf)) + geom_boxplot()
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
em1 <- lmer(E.new ~ 1 + (1|turtle), data = ae4)
# check assumptions.
plot(fitted(em1), resid(em1))
abline(h=0, lty=2, col=2)
qqnorm(resid(em1))
qqline(resid(em1))
# Assumptions met
# Test
summary(em1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: E.new ~ 1 + (1 | turtle)
## Data: ae4
##
## REML criterion at convergence: 806.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.1881 -0.5127 0.0150 0.6137 2.6902
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.1735 0.4165
## Residual 0.6654 0.8157
## Number of obs: 316, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.57005 0.09393 6.069
emV1 <- lmer(V.new ~ 1 + (1|turtle), data = ae4)
# check assumptions.
plot(fitted(emV1), resid(emV1))
abline(h=0, lty=2, col=2)
qqnorm(resid(emV1))
qqline(resid(emV1))
# Assumptions met
# Test
summary(emV1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: V.new ~ 1 + (1 | turtle)
## Data: ae4
##
## REML criterion at convergence: 971
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.7669 -0.6502 0.0590 0.5881 3.5229
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.7782 0.8821
## Residual 1.0453 1.0224
## Number of obs: 316, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.4652 0.1825 -2.549
Test Microclimate Envelope Hypothesis {Work in progress} Turtles may keep their body temp more constrained than shell temp, that is, regulate their body temps. The following response variables are the daily difference between the variation of shell temp minus variation of the body temp. If they are grea ter than zero, then shell temps are more variable than body temps.
Estimating the probability that the scientific hypothesis is true. Here we estimate the probability that turtles regulate their body temperature toward an opitmum. That is, we estimate (precisely) the probability that our scientific hypothesis is true, given the data, i.e., P(HA|D)P(HA|D).
Random effects model (Lynch 2007, pages 246-7).
Assume:
An overall mean, ?????N(m,s2)?????N(m,s2) A random effect of each turtle, ??i???N(??,??2)??i???N(??,??2) Each observation, yy, is a normal random variate, where yij???N(??i,??2)yij???N(??i,??2) The variance among turtles is ??2???IG(a,b)??2???IG(a,b) The variance among days with turtles (error variance) is ??2???IG(c,d)??2???IG(c,d) We should make sure that the results are robust to choices of priors (e.g., uniform vs. inverse gamma precision, Gelman and Hill 2007).
em1.mat <- model.matrix(em1)
em1.mat[1:3,]
## 1 2 3
## 1 1 1
fe <- fixef(em1)
turtle <- as.numeric( em1 @ frame $ turtle )
re <- ranef(em1)$turtle[,1]
(nre <- length(re) )
## [1] 26
y <- em1 @ resp $ y
N <- nrow(em1.mat)
dat <- list("y", "turtle", "nre", "N")
em.model <- function() {
malpha ~ dnorm(0, 0.001)
tau2 <- 1/sqrt(tau2inv)
#tau2inv ~ dgamma(.01, .01)
tau2inv ~ dunif(0, 10)
sigma2 <- 1/sqrt(sigma2inv)
#sigma2inv ~ dgamma(.01, .01)
sigma2inv ~ dunif(0, 10)
for( i in 1:nre) {
alphai[i] ~ dnorm(malpha, tau2inv)
}
for(i in 1:N) {
y[i] ~ dnorm(yhat[i], sigma2inv)
yhat[i] <- alphai[turtle[i]]
}
}
param <- c("malpha", "alphai", "tau2", "sigma2")
inits <- function(){list( "malpha" = rnorm(1), "alphai" = rnorm(length(re)),
"tau2inv" = runif(1), "sigma2inv" = runif(1) ) }
jem1 <- jags(data = dat, inits, param, model.file=em.model,
n.iter = 1e4)
## module glm loaded
## Compiling model graph
## Resolving undeclared variables
## Allocating nodes
## Graph Size: 671
##
## Initializing model
plot(jem1)
Probability that the hypothesis is correct
es <- jem1$BUGSoutput$sims.list$malpha
qplot(es, geom="density", main = "Pr( H | D )", xlab = "Climate temp variation - Body temp variation")
HPDinterval(as.mcmc(es))[1,1:2]
## lower upper
## -1.474450 -1.054253
For better, or for worse, and for the measure used here (E'???de???dbE'???de???db), box turtles do not appear to thermoregulate under these conditions (95% CI -1.474, -1.054).
CB <- read.csv("Cost-Benefit Data.csv", sep="\t", header=T)
head(CB) #26 turtles, 2 years, 3 seasons.
## turtle N de.NEW db.NEW de.NEW.closed year yf
## 1 L1 3168 6.542747 6.107920 6.542747 2014 2014
## 2 L2 2592 6.475342 5.415826 6.135750 2014 2014
## 3 R1 2704 7.124625 6.968546 7.392318 2014 2014
## 4 R10 4031 6.992030 6.998910 7.272656 2014 2014
## 5 R1L1 3232 6.969302 7.012011 7.217561 2014 2014
## 6 R1L3 4096 6.703190 6.894829 6.979912 2014 2014
str(CB)
## 'data.frame': 26 obs. of 7 variables:
## $ turtle : Factor w/ 26 levels "L1","L2","R1",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ N : int 3168 2592 2704 4031 3232 4096 3520 3808 4096 4096 ...
## $ de.NEW : num 6.54 6.48 7.12 6.99 6.97 ...
## $ db.NEW : num 6.11 5.42 6.97 7 7.01 ...
## $ de.NEW.closed: num 6.54 6.14 7.39 7.27 7.22 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ yf : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
#Year to factor
CB$yf <- factor(CB$year)
#Thermal Quality (TQ or de) is based on when the turtles were in the field, all
#other corresponding values have been omitted. Thermal accuracy is (TA or db).
str(CB)
## 'data.frame': 26 obs. of 7 variables:
## $ turtle : Factor w/ 26 levels "L1","L2","R1",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ N : int 3168 2592 2704 4031 3232 4096 3520 3808 4096 4096 ...
## $ de.NEW : num 6.54 6.48 7.12 6.99 6.97 ...
## $ db.NEW : num 6.11 5.42 6.97 7 7.01 ...
## $ de.NEW.closed: num 6.54 6.14 7.39 7.27 7.22 ...
## $ year : int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ yf : Factor w/ 2 levels "2014","2015": 1 1 1 1 1 1 1 1 1 1 ...
#factor-integer-numeric-numeric-numeric-factor
qplot(de.NEW, db.NEW, data=CB, colour=yf)
lm.CB<-lm(db.NEW~de.NEW + yf +de.NEW:yf, data=CB)
summary(lm.CB)
##
## Call:
## lm(formula = db.NEW ~ de.NEW + yf + de.NEW:yf, data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8457 -0.4323 0.1466 0.3128 0.9740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.631693 0.471569 1.340 0.194
## de.NEW 0.869422 0.051461 16.895 4.38e-14 ***
## yf2015 0.496164 0.776467 0.639 0.529
## de.NEW:yf2015 -0.002778 0.109677 -0.025 0.980
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5049 on 22 degrees of freedom
## Multiple R-squared: 0.9529, Adjusted R-squared: 0.9465
## F-statistic: 148.4 on 3 and 22 DF, p-value: 9.523e-15
summary.aov(lm.CB)
## Df Sum Sq Mean Sq F value Pr(>F)
## de.NEW 1 112.42 112.42 441.051 4.79e-16 ***
## yf 1 1.09 1.09 4.274 0.0507 .
## de.NEW:yf 1 0.00 0.00 0.001 0.9800
## Residuals 22 5.61 0.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
No difference in slopes (2014 = 2015 for TA v. TQ slope)
#ANCOVA testing against a null slope of m=1
lm.CB2<-lm(db.NEW~de.NEW+year, data=CB)
lm.CB3<-lm(db.NEW~de.NEW, data=CB)
#Google "R test whether slope differs from a constant"
# parameter estimates and their standard errors
coefs <- coef( summary(lm.CB) )
coefs
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.631692525 0.4715693 1.33955391 1.940652e-01
## de.NEW 0.869422382 0.0514605 16.89494470 4.383610e-14
## yf2015 0.496163525 0.7764672 0.63900127 5.294196e-01
## de.NEW:yf2015 -0.002778403 0.1096769 -0.02533262 9.800180e-01
# note here that the slope is about 0.87 (std. error = 0.05)
# looks as though it differs from 1.0
B1 <- coefs["de.NEW", "Estimate"]
B1.se <- coefs["de.NEW", "Std. Error"]
# Use the definition of a t-test
t <- ( B1 - 1 )/B1.se
t
## [1] -2.537434
# degrees of freedom = n - p, where p = no. of parameters
df2 <- summary(lm.CB)$df[2]
# Use the cumulative t-distribution
pt(t, df2)
## [1] 0.009382075
#[1] 0.009382075
yes it does differ from one.
another look.
summary(lm.CB)
##
## Call:
## lm(formula = db.NEW ~ de.NEW + yf + de.NEW:yf, data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8457 -0.4323 0.1466 0.3128 0.9740
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.631693 0.471569 1.340 0.194
## de.NEW 0.869422 0.051461 16.895 4.38e-14 ***
## yf2015 0.496164 0.776467 0.639 0.529
## de.NEW:yf2015 -0.002778 0.109677 -0.025 0.980
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5049 on 22 degrees of freedom
## Multiple R-squared: 0.9529, Adjusted R-squared: 0.9465
## F-statistic: 148.4 on 3 and 22 DF, p-value: 9.523e-15
summary.aov(lm.CB)
## Df Sum Sq Mean Sq F value Pr(>F)
## de.NEW 1 112.42 112.42 441.051 4.79e-16 ***
## yf 1 1.09 1.09 4.274 0.0507 .
## de.NEW:yf 1 0.00 0.00 0.001 0.9800
## Residuals 22 5.61 0.25
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qplot(de.NEW, db.NEW, data=CB, colour=yf) + geom_smooth(method="lm") +
geom_abline(intercept = 0, slope = 1, lty=3)
qplot(de.NEW, db.NEW, data=CB) + geom_smooth(method="lm") +
geom_abline(intercept = 0, slope = 1, lty=3) +
geom_point(aes(colour = yf))