Box Turtle Eco-Physiology Stats Updated (8-14-2016)

#Statistics for Box Turtle Manuscript:
#save.image(file = "Manuscript.RData") #Saved 8-5-2016
#load("Manuscript.RData")
#Packages
library(Rmisc)
## Warning: package 'Rmisc' was built under R version 3.0.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.0.3
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.0.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.0.3
library(scales)
## Warning: package 'scales' was built under R version 3.0.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.0.3
## Loading required package: grid
library(reshape)
## Warning: package 'reshape' was built under R version 3.0.3
## 
## Attaching package: 'reshape'
## 
## The following objects are masked from 'package:plyr':
## 
##     rename, round_any
#library(data.table)
#library(doBy)
library(plyr)
#library(splitstackshape)
library(ade4)
## Warning: package 'ade4' was built under R version 3.0.3
#library(adehabitatHR)
#library(adehabitatHS)
library(adehabitatLT)
## Warning: package 'adehabitatLT' was built under R version 3.0.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.0.3
## Loading required package: adehabitatMA
## Warning: package 'adehabitatMA' was built under R version 3.0.3
## 
## Attaching package: 'adehabitatMA'
## 
## The following object is masked from 'package:plyr':
## 
##     join
## 
## Loading required package: CircStats
## Warning: package 'CircStats' was built under R version 3.0.3
## Loading required package: MASS
## Loading required package: boot
## 
## Attaching package: 'boot'
## 
## The following object is masked from 'package:lattice':
## 
##     melanoma
## 
## 
## Attaching package: 'adehabitatLT'
## 
## The following object is masked from 'package:plyr':
## 
##     id
#library(adehabitatMA)
#library(geosphere)

#Overall Goal: Investigate the effects of temperature on daily movement patterns and habitat occupancy, and determine how thermal variation influences thermoregulatory performance.

#Overall Objective: To test the relationship between habitat and thermoregulatory performance. Determine the extent to which thermal habitat influences daily movement and habitat use of box turtles.

getwd()
## [1] "G:/R - Research Files/Manuscript Files"
setwd("G:/R - Research Files/Manuscript Files")

options(warn=-1)

#Objective 1: Temperature-Movement Relationship (1) Movement would be positively related to body temperature

#Upload file
dist<-read.csv('Temp-Movement.csv', #1665 observations of 11 variables
                        sep=',',
                        header=T)
#str(dist)
#head(dist)

#Correcting for time
dist$datehour<-as.POSIXct(as.character(as.factor(dist$datehour)),format="%Y-%m-%d %H:%M:%S")

dist$time<-format(as.POSIXct(dist$datehour) ,format = "%H")
#head(dist)

dist$time<-as.factor(as.character(dist$time))

#mm2 <- mm[mm[,1]!=2,]
#Removing hour 19 because it's the distance between 7pm and 7am (time when GPS was off)
dist<-dist[dist[,"time"]!=19,] #removes data based on condition in column/row

dist.sum<-summarySE(dist, 
               measurevar="dist.m",
               groupvar="time")

#View(dist.sum) viewing data set
#dist.sum<-dist.sum[-c(13),] #hour 19 removed from data set

#Linear Model for Body Temperature and Movement
lm1<-lm(dist.m~Tb, data=dist)
summary(lm1)
## 
## Call:
## lm(formula = dist.m ~ Tb, data = dist)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -17.55 -10.44  -4.32   4.78 104.18 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.9459     1.9150   10.42   <2e-16 ***
## Tb           -0.1075     0.0848   -1.27      0.2    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.9 on 1544 degrees of freedom
## Multiple R-squared:  0.00104,    Adjusted R-squared:  0.000394 
## F-statistic: 1.61 on 1 and 1544 DF,  p-value: 0.205

#Body Temperature and Movement Graphing average hour body temperature and distance moved per hour to see how it looks

ggplot(dist, aes(x=Tb,
                      y=dist.m)) +
  xlab(expression(paste("Body Temperature (°C)"))) +
  ylab(expression(paste("Distance (m)"))) +
  #labs(title=(expression(paste("Body Temperature-Movement")))) +
  xlim(10,35)+
  ylim(0,130)+ 
  theme_classic() +
  geom_point() +
  #geom_point(aes(colour=turtle), size=1) +
  stat_smooth(method=lm, level=0.95,
              fill="grey", colour="black", size=0) 

plot of chunk unnamed-chunk-4

#I don't listen to ggplot warnings

#Distance and Time of Day graphing Graphing to determine whether time of day could influence movement

limits <- aes(ymax = dist.m + se, ymin=dist.m - se)

ggplot(dist.sum, aes(x = time, y = dist.m)) +
  xlab(expression(paste("Time of Day (hour)"))) +
  ylab(expression(paste("Distance (m)"))) +
  #labs(title=(expression(paste("Time of Day-Movement")))) +
  #ylim(0,40)+ 
  theme_classic()+
  geom_bar(stat="identity", position=position_dodge())+
  geom_errorbar(limits, width=0.25) +
  scale_y_continuous(expand = c(0, 0), 
                     limits = c(0, 35),
                     breaks =c(0,5,10,15,20,25,30,35))

plot of chunk unnamed-chunk-5

##ANOVA Testing

#head(dist) 

qqnorm(dist$dist.m) #right skewed
qqline(dist$dist.m) 

plot of chunk unnamed-chunk-6

hist(dist$dist.m, breaks = 50, main = "Distance (m), breaks = 50")

plot of chunk unnamed-chunk-6

#Normality
#Null-hypothesis of this test is that the population is normally distributed.
shapiro.test(dist$dist.m)
## 
##  Shapiro-Wilk normality test
## 
## data:  dist$dist.m
## W = 0.7834, p-value < 2.2e-16
#Reject null in favor of alternative. Histogram also shows skewness in data.

#Without correcting
lm2<-lm(dist$dist.m~dist$time)
A2<-anova(lm2)
A2 #Non-significant
## Analysis of Variance Table
## 
## Response: dist$dist.m
##             Df Sum Sq Mean Sq F value Pr(>F)
## dist$time   11   3653     332    1.31   0.21
## Residuals 1534 389138     254
Aov2<-aov(dist$dist.m~dist$time)

TukeyHSD(Aov2) #only works with aov() function, both are ANOVA functions. 
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = dist$dist.m ~ dist$time)
## 
## $`dist$time`
##           diff     lwr    upr  p adj
## 08-07  3.93570  -2.568 10.440 0.7068
## 09-07  1.47347  -5.043  7.990 0.9999
## 10-07  3.33453  -3.170  9.839 0.8782
## 11-07  1.70912  -4.795  8.213 0.9994
## 12-07 -0.68067  -7.197  5.836 1.0000
## 13-07  1.53101  -4.986  8.048 0.9998
## 14-07  1.08548  -5.419  7.590 1.0000
## 15-07  0.70270  -5.814  7.219 1.0000
## 16-07  0.54738  -5.957  7.052 1.0000
## 17-07 -1.09431  -7.662  5.474 1.0000
## 18-07 -1.19607  -7.725  5.333 1.0000
## 09-08 -2.46223  -8.941  4.016 0.9853
## 10-08 -0.60117  -7.067  5.865 1.0000
## 11-08 -2.22658  -8.693  4.240 0.9935
## 12-08 -4.61637 -11.095  1.862 0.4539
## 13-08 -2.40469  -8.883  4.074 0.9878
## 14-08 -2.85023  -9.316  3.616 0.9548
## 15-08 -3.23301  -9.712  3.246 0.8967
## 16-08 -3.38832  -9.854  3.078 0.8614
## 17-08 -5.03001 -11.560  1.500 0.3273
## 18-08 -5.13177 -11.623  1.360 0.2875
## 10-09  1.86106  -4.618  8.340 0.9987
## 11-09  0.23565  -6.243  6.714 1.0000
## 12-09 -2.15414  -8.645  4.337 0.9953
## 13-09  0.05753  -6.434  6.549 1.0000
## 14-09 -0.38800  -6.867  6.091 1.0000
## 15-09 -0.77078  -7.262  5.720 1.0000
## 16-09 -0.92609  -7.405  5.553 1.0000
## 17-09 -2.56778  -9.111  3.975 0.9810
## 18-09 -2.66954  -9.173  3.834 0.9732
## 11-10 -1.62541  -8.091  4.841 0.9996
## 12-10 -4.01520 -10.494  2.463 0.6738
## 13-10 -1.80352  -8.282  4.675 0.9990
## 14-10 -2.24906  -8.715  4.217 0.9929
## 15-10 -2.63184  -9.110  3.847 0.9753
## 16-10 -2.78715  -9.253  3.679 0.9616
## 17-10 -4.42884 -10.959  2.102 0.5354
## 18-10 -4.53060 -11.022  1.961 0.4882
## 12-11 -2.38979  -8.868  4.089 0.9884
## 13-11 -0.17812  -6.657  6.300 1.0000
## 14-11 -0.62365  -7.090  5.842 1.0000
## 15-11 -1.00643  -7.485  5.472 1.0000
## 16-11 -1.16174  -7.628  5.304 1.0000
## 17-11 -2.80343  -9.334  3.727 0.9627
## 18-11 -2.90519  -9.396  3.586 0.9497
## 13-12  2.21167  -4.279  8.703 0.9940
## 14-12  1.76614  -4.712  8.245 0.9992
## 15-12  1.38336  -5.108  7.874 0.9999
## 16-12  1.22805  -5.251  7.707 1.0000
## 17-12 -0.41364  -6.956  6.129 1.0000
## 18-12 -0.51540  -7.019  5.988 1.0000
## 14-13 -0.44553  -6.924  6.033 1.0000
## 15-13 -0.82831  -7.319  5.663 1.0000
## 16-13 -0.98362  -7.462  5.495 1.0000
## 17-13 -2.62531  -9.168  3.918 0.9775
## 18-13 -2.72708  -9.231  3.777 0.9686
## 15-14 -0.38278  -6.861  6.096 1.0000
## 16-14 -0.53809  -7.004  5.928 1.0000
## 17-14 -2.17978  -8.710  4.351 0.9950
## 18-14 -2.28154  -8.773  4.210 0.9923
## 16-15 -0.15531  -6.634  6.323 1.0000
## 17-15 -1.79700  -8.340  4.746 0.9991
## 18-15 -1.89876  -8.403  4.605 0.9985
## 17-16 -1.64169  -8.172  4.889 0.9996
## 18-16 -1.74345  -8.235  4.748 0.9993
## 18-17 -0.10176  -6.657  6.454 1.0000
#Non-parametric Approach
kruskal.test(dist$dist.m~dist$time, data=dist)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  dist$dist.m by dist$time
## Kruskal-Wallis chi-squared = 21.75, df = 11, p-value = 0.02637
#Kruskal Wallis = Significant

#Correcting Data
dist$log.dist.m<-log(dist$dist.m) #correct for skewness in data

qqnorm(dist$log.dist.m) #corrected data
qqline(dist$log.dist.m) 

plot of chunk unnamed-chunk-6

lm1<-lm(dist$log.dist.m~dist$time)
A1<-anova(lm1)
A1 #significant
## Analysis of Variance Table
## 
## Response: dist$log.dist.m
##             Df Sum Sq Mean Sq F value Pr(>F)  
## dist$time   11     16   1.483    1.87  0.039 *
## Residuals 1534   1214   0.791                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Aov1<-aov(dist$log.dist.m~dist$time)

TukeyHSD(Aov1)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = dist$log.dist.m ~ dist$time)
## 
## $`dist$time`
##            diff     lwr       upr  p adj
## 08-07  0.206633 -0.1567  0.569929 0.7828
## 09-07  0.063802 -0.3002  0.427793 1.0000
## 10-07  0.161363 -0.2019  0.524659 0.9523
## 11-07  0.056257 -0.3070  0.419553 1.0000
## 12-07 -0.087876 -0.4519  0.276115 0.9997
## 13-07  0.009031 -0.3550  0.373022 1.0000
## 14-07 -0.049207 -0.4125  0.314089 1.0000
## 15-07 -0.069338 -0.4333  0.294653 1.0000
## 16-07 -0.030018 -0.3933  0.333278 1.0000
## 17-07 -0.091858 -0.4587  0.275011 0.9996
## 18-07 -0.161229 -0.5259  0.203466 0.9539
## 09-08 -0.142831 -0.5047  0.219038 0.9802
## 10-08 -0.045269 -0.4064  0.315900 1.0000
## 11-08 -0.150376 -0.5115  0.210793 0.9702
## 12-08 -0.294508 -0.6564  0.067360 0.2455
## 13-08 -0.197602 -0.5595  0.164267 0.8250
## 14-08 -0.255839 -0.6170  0.105330 0.4636
## 15-08 -0.275970 -0.6378  0.085898 0.3430
## 16-08 -0.236651 -0.5978  0.124519 0.5904
## 17-08 -0.298490 -0.6633  0.066273 0.2380
## 18-08 -0.367862 -0.7304 -0.005285 0.0431
## 10-09  0.097562 -0.2643  0.459430 0.9993
## 11-09 -0.007545 -0.3694  0.354324 1.0000
## 12-09 -0.151677 -0.5142  0.210889 0.9691
## 13-09 -0.054771 -0.4173  0.307796 1.0000
## 14-09 -0.113008 -0.4749  0.248861 0.9972
## 15-09 -0.133139 -0.4957  0.229427 0.9889
## 16-09 -0.093820 -0.4557  0.268049 0.9995
## 17-09 -0.155659 -0.5211  0.209796 0.9648
## 18-09 -0.225031 -0.5883  0.138243 0.6745
## 11-10 -0.105107 -0.4663  0.256063 0.9985
## 12-10 -0.249239 -0.6111  0.112629 0.5100
## 13-10 -0.152333 -0.5142  0.209536 0.9677
## 14-10 -0.210570 -0.5717  0.150600 0.7541
## 15-10 -0.230701 -0.5926  0.131167 0.6325
## 16-10 -0.191382 -0.5526  0.169788 0.8522
## 17-10 -0.253221 -0.6180  0.111542 0.4971
## 18-10 -0.322593 -0.6852  0.039985 0.1375
## 12-11 -0.144132 -0.5060  0.217736 0.9787
## 13-11 -0.047226 -0.4091  0.314643 1.0000
## 14-11 -0.105463 -0.4666  0.255706 0.9985
## 15-11 -0.125594 -0.4875  0.236274 0.9930
## 16-11 -0.086275 -0.4474  0.274895 0.9998
## 17-11 -0.148114 -0.5129  0.216649 0.9753
## 18-11 -0.217486 -0.5801  0.145091 0.7184
## 13-12  0.096906 -0.2657  0.459473 0.9993
## 14-12  0.038669 -0.3232  0.400538 1.0000
## 15-12  0.018538 -0.3440  0.381105 1.0000
## 16-12  0.057858 -0.3040  0.419726 1.0000
## 17-12 -0.003982 -0.3694  0.361474 1.0000
## 18-12 -0.073354 -0.4366  0.289920 1.0000
## 14-13 -0.058237 -0.4201  0.303631 1.0000
## 15-13 -0.078369 -0.4409  0.284198 0.9999
## 16-13 -0.039049 -0.4009  0.322820 1.0000
## 17-13 -0.100888 -0.4663  0.264567 0.9991
## 18-13 -0.170260 -0.5335  0.193014 0.9311
## 15-14 -0.020131 -0.3820  0.341737 1.0000
## 16-14  0.019188 -0.3420  0.380358 1.0000
## 17-14 -0.042651 -0.4074  0.322112 1.0000
## 18-14 -0.112023 -0.4746  0.250555 0.9975
## 16-15  0.039320 -0.3225  0.401188 1.0000
## 17-15 -0.022520 -0.3880  0.342936 1.0000
## 18-15 -0.091892 -0.4552  0.271382 0.9996
## 17-16 -0.061840 -0.4266  0.302924 1.0000
## 18-16 -0.131211 -0.4938  0.231366 0.9901
## 18-17 -0.069372 -0.4355  0.296786 1.0000
#18-08  p adj = 0.0431335.
#Pretty negligable if you ask me. 

Only statistical difference is between 18:00 and 08:00 where at 8am turtles move more than at 6pm. P-value is 0.043, which is on the cusp of not being significant [in my opinion].

#Objective 2: Habitat Selection and Preferred Body Temperature (2) habitat selected would reflect preferred body temperatures

Thermal Preference Prep:

ZPTsel<-read.table("ZPTsel.dat", 
                   sep="\t", 
                   header=TRUE) #Pedro's data - 11 male box turtles

dateRef<-Sys.Date()
dateRefn<-Sys.Date() + 1

######################################
#Constant Thermal Preference (Tsel-C)#
######################################

#meanTselC<-mean(ZPTselDynamic$value, na.rm=TRUE)

#quantile(ZPTselDynamic$value, na.rm=TRUE)
#0%  25%  50%  75% 100% 
#6.0 25.5 29.3 31.3 36.6 

TselC.min<-25.5
TselC.max<-31.3


# New Tsel

date_time<-seq(as.POSIXct("2014-08-01 07:00:00"),
               as.POSIXct("2014-08-03 07:00:00"), 
               length.out=289)

ZPTsel$date<-date_time[1:287]

ZPTselDynamic<-melt(ZPTsel, id.vars=c('date'))

meanTselD<-aggregate(ZPTselDynamic$value, 
                     list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
                     mean, na.rm=TRUE)

colnames(meanTselD)<-c("hour","mTsel")

medianTselD<-aggregate(ZPTselDynamic$value,
                       list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")), 
                       median, na.rm=TRUE)

colnames(medianTselD)<-c("hour","medianTsel")

maxTselD<-aggregate(ZPTselDynamic$value, 
                    list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")), 
                    max, na.rm=TRUE)

colnames(maxTselD)<-c("hour","maxTsel")

minTselD<-aggregate(ZPTselDynamic$value,
                    list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
                    min, na.rm=TRUE)

colnames(minTselD)<-c("hour","minTsel")

sdTselD<-aggregate(ZPTselDynamic$value, 
                   list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
                   sd, na.rm=TRUE)

colnames(sdTselD)<-c("hour","sdTsel")

nTselD<-aggregate(ZPTselDynamic$value,
                  list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")), 
                  length)

colnames(nTselD)<-c("hour","nTsel")

errorTselD<-1.96*(sdTselD$sdTsel/sqrt(nTselD$nTsel))

ZPTselD<-cbind(nTselD, meanTselD, medianTselD, minTselD, maxTselD, sdTselD, errorTselD)
ZPTselD<-ZPTselD[,-c(3,5,7,9,11)]
#View(ZPTselD)

ZPTselD$hour<-as.POSIXlt(ZPTselD$hour,format="%H:%M:%S")
ZPTselD$hour.day<-format(round(ZPTselD$hour, "hours"), "%H")  

#Plotting Thermal Preference (Dynamic, Tsel-D; Constant, Tsel-C) Dynamic Calculation

ggplot(ZPTselD, aes(x=hour, y=mTsel))+
  xlab("Hour of day") +
  ylab(expression(paste("Thermal Preference (°C)"))) +
  #labs(title=(expression(paste("Combined E"^"' "," (closed models de)")))) +
  #xlim(0,23)+
  ylim(15,35)+
theme_classic() + #Praise baby jebus
geom_ribbon(aes(ymin=c(mTsel-errorTselD), ymax=c(mTsel+errorTselD)), data=ZPTselD)+
  scale_x_datetime(breaks = "2 hour", labels =date_format("%H"))

plot of chunk unnamed-chunk-8

When given the opportunity, box turtles select warmer temperatures at night (when we incorporate time into the Tsel).

Constant Calculation

TselC.min<-25.5
TselC.max<-31.3

ggplot(ZPTselD, aes(x=hour, y=mTsel))+
  xlab("Hour of day") +
  ylab(expression(paste("Thermal Preference (°C)"))) +
  #labs(title=(expression(paste("Combined E"^"' "," (closed models de)")))) +
  #xlim(0,23)+
  ylim(15,35)+
theme_classic() + #Praise baby jebus
geom_ribbon(aes(ymin=TselC.min, ymax=TselC.max))+
  scale_x_datetime(breaks = "2 hour", labels =date_format("%H"))

plot of chunk unnamed-chunk-9

Thermal preference for box turtles ranges from 25.5°C to 31.3°C.

#Habitat Preference


```r
#LULC.Habitat<-read.table(file = "clipboard",  
#               sep = "\t", 
#               header=TRUE)

#write.table(LULC.Habitat, file="LULC Habitat.csv", sep="\t")               
LULC.Habitat<-read.csv("LULC Habitat.csv", 
                       header=T,
                       sep="\t")
LULC.Habitat<-LULC.Habitat[,-c(6,7,8,9,10)]

Calculated slightly different than a regular chi-square analysis. For starters, the Z-crit is calculated as the following: z(1-??/2k) … NOT z(1-??/2)

In this analysis, k represents the number of simultaneous estimates being made. Should probably put that in the methods…

head(LULC.Habitat)
##          LULC_Type Area_m2 Perc_Area Obs_Count_Turtle Expected
## 1   Developed Area   30212    0.1413               94      269
## 2       Open Water    4044    0.0189                0       36
## 3 Deciduous Forest  101648    0.4754             1034      906
## 4 Evergreen Forest   12816    0.0599              435      114
## 5     Mixed Forest    5604    0.0262                6       50
## 6      Shrub/Scrub    1608    0.0075               14       14

LULC Type is based the GIS layer land-use / land-cover.

Area_m2 is Area (m2) of LULC values that are within the 456m buffer radius from turtle start locations (ranges were dissolved so there's no duplicate LULC values for total potential habitat occupied) Perc_Area is the percentage of area for each LULC value Obs_Count_Turtle is observations (GPS coordinates) of turtles in each of the 10 LULC types Expected is based on the total 1906 (2014 and 2015) GPS coordinates and applied to the percent area. It is assumed that box turtles use each habitat type proportionally, relative to other habitat types.

#(1-??/2k), where k=10 simultaneous comparisons.
z<- abs(qnorm((1-0.95)/(2*(10))))
z
## [1] 2.807
LULC.Habitat$Perc_Turt_Hab<-(LULC.Habitat$Obs_Count_Turtle/1906) #obtaining percentage of Turtle observations in each habitat type

#Confidence interval is calculated as: z(1-??/2k)*???(pi(1-pi))/n

LULC.Habitat$Conf.Int<-(2.807034*(sqrt(LULC.Habitat$Perc_Turt_Hab*(1-LULC.Habitat$Perc_Turt_Hab)/1906))) 

LULC.Habitat$Preference<-ifelse(LULC.Habitat$Perc_Turt_Hab-LULC.Habitat$Conf.Int > LULC.Habitat$Perc_Area, "Preferred",ifelse(LULC.Habitat$Perc_Turt_Hab+LULC.Habitat$Conf.Int < LULC.Habitat$Perc_Area, "Avoid","Expected"))

LULC.Habitat
##                       LULC_Type  Area_m2 Perc_Area Obs_Count_Turtle
## 1                Developed Area  30211.7    0.1413               94
## 2                    Open Water   4044.5    0.0189                0
## 3              Deciduous Forest 101647.8    0.4754             1034
## 4              Evergreen Forest  12815.6    0.0599              435
## 5                  Mixed Forest   5603.8    0.0262                6
## 6                   Shrub/Scrub   1608.0    0.0075               14
## 7          Grassland/Herbaceous   2387.7    0.0112              141
## 8                   Pasture/Hay  29237.2    0.1367              178
## 9              Cultivated Crops  26118.5    0.1222                4
## 10 Emergent Herbaceous Wetlands    146.2    0.0007                0
##    Expected Perc_Turt_Hab Conf.Int Preference
## 1       269      0.049318 0.013922      Avoid
## 2        36      0.000000 0.000000      Avoid
## 3       906      0.542497 0.032032  Preferred
## 4       114      0.228227 0.026984  Preferred
## 5        50      0.003148 0.003602      Avoid
## 6        14      0.007345 0.005490   Expected
## 7        21      0.073977 0.016828  Preferred
## 8       261      0.093389 0.018709      Avoid
## 9       233      0.002099 0.002942      Avoid
## 10        1      0.000000 0.000000      Avoid
Habitat Preference is based on Neu et al. 1972, using chi-square analysis. Based on the total area (456m buffer), the null hypothesis is that turtles would use each habitat proportionally. Proportionally higher indicates preference, proportionally lower indicidates avoidance.

Box turtles prefer deciduous and evergreen forest, as well as grassland and herbaceous. Shrub/scrub habitat is used as expected, and all other habitat types are used proportionally less (indicating avoidance).

Canopy Cover and Slope between Potential habitat (456m buffer) and occupied habitat (KDE estimate):

```r
Canopy<-read.csv("Canopy Cover.csv",
                 sep='\t',
                 header=T)

t.test(Canopy$buff_cc, Canopy$kde_cc, paired=TRUE)
## 
##  Paired t-test
## 
## data:  Canopy$buff_cc and Canopy$kde_cc
## t = -2.34, df = 18, p-value = 0.03101
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -23.912  -1.287
## sample estimates:
## mean of the differences 
##                   -12.6
Slope<-read.csv("Slope.csv",
                sep='\t',
                header=T)

t.test(Slope$kde_slope, Slope$buff_slope, paired=TRUE)
## 
##  Paired t-test
## 
## data:  Slope$kde_slope and Slope$buff_slope
## t = 0.6699, df = 21, p-value = 0.5102
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.8971  1.7497
## sample estimates:
## mean of the differences 
##                  0.4263
#Upload file
box.turtle.data<-read.csv("2014 2015 Tb Tex db CORRECTED.csv", 
               header=TRUE,
               sep="\t")

operant.data<-read.csv("2014 2015 Te de CORRECTED.csv", 
                          header=TRUE,
                          sep=",")

#Time Correction for data.frames

box.turtle.data$date<-as.POSIXct(as.character(box.turtle.data$date,
                                  format= "%Y-%m-%d %H:%M:%S"))

operant.data$date<-as.POSIXct(as.character(operant.data$date,
                                              format= "%Y-%m-%d %H:%M:%S"))

#str(box.turtle.data)
#str(operant.data)

Thermal exploitation (Ex) is typically calculated as the following: (Time Tb in Tsel)/(Time Te in Tsel) This is represented in a 24-hour period calculate Ex for each day for each turtle

Body Temperature:

meanTb<-aggregate(list(mTb = box.turtle.data$Tb), 
          list(date = cut(box.turtle.data$date, "1 hour"), 
               turtle = box.turtle.data$turtle), 
          data=box.turtle.data,
          mean)
head(meanTb)
##                  date turtle   mTb
## 1 2014-06-10 00:00:00     L1 17.37
## 2 2014-06-10 01:00:00     L1 17.08
## 3 2014-06-10 02:00:00     L1 16.85
## 4 2014-06-10 03:00:00     L1 16.72
## 5 2014-06-10 04:00:00     L1 16.74
## 6 2014-06-10 05:00:00     L1 16.81
meanTb$date<-as.POSIXct(as.character(meanTb$date,
                                     format="%Y-%m-%d %H:%M:%S"))

meanTb$hour.day<-format(round(meanTb$date, "hours"), "%H") 

#Exploitation Index

#Constant (Tsel-C) Tb
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%

C.Tb<-ifelse(meanTb$mTb > Tsel.max, "above", #ifelse(condition, yes, no)
           ifelse(meanTb$mTb < Tsel.min, "below",
                  "inside"))

#head(C.Tb)
meanTb$Tsel.C_Tb<-C.Tb
#head(meanTb)

#Dynamic (Tsel-D) Tb

D.Tb<-ifelse(meanTb$mTb[format(round(meanTb$date, "hours"), "%H")%in%meanTb$hour.day] < (ZPTselD$mTsel[as.numeric(meanTb$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTb$hour.day)+1]), "below",
       ifelse(meanTb$mTb[format(round(meanTb$date, "hours"), "%H")%in%meanTb$hour.day] > (ZPTselD$mTsel[as.numeric(meanTb$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTb$hour.day)+1]), "above",
       "inside"))

#head(D.Tb)
meanTb$Tsel.D_Tb<-D.Tb

Shell Temperature

meanTex<-aggregate(list(mTex = box.turtle.data$Tex), 
                  list(date = cut(box.turtle.data$date, "1 hour"), 
                       turtle = box.turtle.data$turtle), 
                  data=box.turtle.data,
                  mean)
#head(meanTex)

meanTex$date<-as.POSIXct(as.character(meanTex$date,
                                     format="%Y-%m-%d %H:%M:%S"))
#View(meanTex)
meanTex$hour.day<-format(round(meanTex$date, "hours"), "%H") 

Shell.C<-ifelse(meanTex$mTex > Tsel.max, "above", #ifelse(condition, yes, no)
           ifelse(meanTex$mTex < Tsel.min, "below",
                  "inside"))

meanTex$Tsel.C_Shell<-Shell.C

#Dynamic (Tsel-D)

Shell.D<-ifelse(meanTex$mTex[format(round(meanTex$date, "hours"), "%H")%in%meanTex$hour.day] < (ZPTselD$mTsel[as.numeric(meanTex$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTex$hour.day)+1]), "below",
           ifelse(meanTex$mTex[format(round(meanTex$date, "hours"), "%H")%in%meanTex$hour.day] > (ZPTselD$mTsel[as.numeric(meanTex$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTex$hour.day)+1]), "above",
                  "inside"))


meanTex$Tsel.D_Shell<-Shell.D

What we are interested in were the following conditions: #(1) When Tshell is above Tsel and Tb is inside Tsel, #(2) When Tshell is below Tsel and Tb is inside Tsel, #(3) When shell is inside Tsel and Tb is inside Tsel.

Merging data.frames (the long way):

head(meanTb)
##                  date turtle   mTb hour.day Tsel.C_Tb Tsel.D_Tb
## 1 2014-06-10 00:00:00     L1 17.37       00     below     below
## 2 2014-06-10 01:00:00     L1 17.08       01     below     below
## 3 2014-06-10 02:00:00     L1 16.85       02     below     below
## 4 2014-06-10 03:00:00     L1 16.72       03     below     below
## 5 2014-06-10 04:00:00     L1 16.74       04     below     below
## 6 2014-06-10 05:00:00     L1 16.81       05     below     below
head(meanTex)
##                  date turtle  mTex hour.day Tsel.C_Shell Tsel.D_Shell
## 1 2014-06-10 00:00:00     L1 16.63       00        below        below
## 2 2014-06-10 01:00:00     L1 16.14       01        below        below
## 3 2014-06-10 02:00:00     L1 15.95       02        below        below
## 4 2014-06-10 03:00:00     L1 15.90       03        below        below
## 5 2014-06-10 04:00:00     L1 16.11       04        below        below
## 6 2014-06-10 05:00:00     L1 16.26       05        below        below
Therm.Ex<-meanTb
Therm.Ex$mTex<-meanTex$mTex
Therm.Ex$Tsel.C_Shell<-meanTex$Tsel.C_Shell
Therm.Ex$Tsel.D_Shell<-meanTex$Tsel.D_Shell
Therm.Ex<-na.omit(Therm.Ex)
#head(Therm.Ex)
table(Therm.Ex$Tsel.C_Shell)
## 
##  above  below inside 
##    171   6684    567
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.C_Shell ==c("above"))])
##     x freq
## 1  08    1
## 2  09    2
## 3  10    4
## 4  11   13
## 5  12   29
## 6  13   36
## 7  14   40
## 8  15   26
## 9  16   15
## 10 17    5
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.C_Shell ==c("below"))])
##     x freq
## 1  00  317
## 2  01  317
## 3  02  317
## 4  03  317
## 5  04  317
## 6  05  317
## 7  06  308
## 8  07  308
## 9  08  307
## 10 09  301
## 11 10  289
## 12 11  263
## 13 12  219
## 14 13  188
## 15 14  176
## 16 15  184
## 17 16  207
## 18 17  245
## 19 18  273
## 20 19  291
## 21 20  302
## 22 21  307
## 23 22  307
## 24 23  307
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.C_Shell ==c("inside"))])
##     x freq
## 1  09    5
## 2  10   14
## 3  11   30
## 4  12   58
## 5  13   82
## 6  14   90
## 7  15   96
## 8  16   84
## 9  17   56
## 10 18   33
## 11 19   15
## 12 20    4
table(Therm.Ex$Tsel.D_Shell)
## 
##  above  below inside 
##    477   6665    280
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.D_Shell ==c("above"))])
##     x freq
## 1  08    3
## 2  09   18
## 3  10   28
## 4  11   48
## 5  12   92
## 6  13   98
## 7  14   90
## 8  15   60
## 9  16   27
## 10 17    9
## 11 18    4
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.D_Shell ==c("below"))])
##     x freq
## 1  00  317
## 2  01  317
## 3  02  317
## 4  03  317
## 5  04  317
## 6  05  317
## 7  06  308
## 8  07  308
## 9  08  291
## 10 09  256
## 11 10  239
## 12 11  208
## 13 12  172
## 14 13  185
## 15 14  190
## 16 15  223
## 17 16  265
## 18 17  286
## 19 18  300
## 20 19  305
## 21 20  306
## 22 21  307
## 23 22  307
## 24 23  307
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.D_Shell ==c("inside"))])
##     x freq
## 1  08   14
## 2  09   34
## 3  10   40
## 4  11   50
## 5  12   42
## 6  13   23
## 7  14   26
## 8  15   23
## 9  16   14
## 10 17   11
## 11 18    2
## 12 19    1

#(1) When Tshell is above Tsel and Tb is inside Tsel:

#Constant
Therm.Ex.AboveC<-Therm.Ex[with(Therm.Ex, Tsel.C_Shell == c("above") & Tsel.C_Tb == c("inside")),]

#x = hour.day, freq = count
count(Therm.Ex.AboveC$hour.day) 
##     x freq
## 1  08    1
## 2  09    1
## 3  10    2
## 4  11    7
## 5  12   22
## 6  13   11
## 7  14   12
## 8  15    4
## 9  16    2
## 10 17    2
table(Therm.Ex.AboveC$Tsel.C_Shell)
## 
## above 
##    64
#Dynamic
Therm.Ex.AboveD<-Therm.Ex[with(Therm.Ex, Tsel.D_Shell == c("above") & Tsel.D_Tb == c("inside")),]

#x = hour.day, freq = count
count(Therm.Ex.AboveD$hour.day) 
##     x freq
## 1  08    1
## 2  09    6
## 3  10    8
## 4  11   17
## 5  12   21
## 6  13   16
## 7  14   11
## 8  15    4
## 9  16    2
## 10 17    2
## 11 18    1
table(Therm.Ex.AboveD$Tsel.D_Shell)
## 
## above 
##    89

For constant calculation: There are a total of 171 Tshell above Tsel. Out of those 171, there are 64 occurences of Tb in Tsel when Tshell is above Tsel.

The largest counts are during the mid-day, between noon and 2 pm. There does not appear to be occurences before 8am or after 5pm.

For dynamic calculation: There are a total of 477 Tshell above Tsel. Out of those 477, there are 89 occurences of Tb in Tsel when Tshell is above Tsel

The largest counts are during the still mid-day, between 11am and 2 pm. There does not appear to be any occurences before 8am or after 6pm.

#(2) When Tshell is below Tsel and Tb is inside Tsel:

#Constant
Therm.Ex.BelowC<-Therm.Ex[with(Therm.Ex, Tsel.C_Shell == c("below") & Tsel.C_Tb == c("inside")),]

#x = hour.day, freq = count
count(Therm.Ex.BelowC$hour.day) 
##    x freq
## 1 14    5
## 2 15    7
## 3 16    8
## 4 17   17
## 5 18   11
## 6 19   11
## 7 20    4
## 8 21    2
table(Therm.Ex.BelowC$Tsel.C_Shell)
## 
## below 
##    65
#Dynamic
Therm.Ex.BelowD<-Therm.Ex[with(Therm.Ex, Tsel.D_Shell == c("below") & Tsel.D_Tb == c("inside")),]

#x = hour.day, freq = count
count(Therm.Ex.BelowD$hour.day) 
##     x freq
## 1  08    4
## 2  09    3
## 3  10    1
## 4  11    2
## 5  12    1
## 6  14    4
## 7  15   10
## 8  16    6
## 9  17    7
## 10 18    3
## 11 19    1
table(Therm.Ex.BelowD$Tsel.D_Shell)
## 
## below 
##    42

For constant calculation: There are a total of 6684 Tshell below Tsel. Out of those 6684, there are 65 occurences of Tb inside Tsel when Tshell is below Tsel.

The largest counts are during the evening, between 5-7pm. There does not appear to be any occurences before 2pm or after 9pm for constant calculation.

For dynamic calculation: There are a total of 6665 Tshell above Tsel. Out of those 6665, there are 42 occurences of Tb inside Tsel when Tshell is below Tsel

The largest counts are during the still mid-evening, between 3-5 pm. There does not appear to be any occurences before 8am or after 7pm for dynamic calculation.

#(3) When Tshell inside Tsel and Tb inside Tsel

#Constant
Therm.Ex.InsideC<-Therm.Ex[with(Therm.Ex, Tsel.C_Shell == c("inside") & Tsel.C_Tb == c("inside")),]

#x = hour.day, freq = count
count(Therm.Ex.InsideC$hour.day) 
##     x freq
## 1  09    2
## 2  10    9
## 3  11   15
## 4  12   35
## 5  13   69
## 6  14   81
## 7  15   88
## 8  16   78
## 9  17   53
## 10 18   28
## 11 19   15
## 12 20    4
table(Therm.Ex.InsideC$Tsel.C_Shell)
## 
## inside 
##    477
#Dynamic
Therm.Ex.InsideD<-Therm.Ex[with(Therm.Ex, Tsel.D_Shell == c("inside") & Tsel.D_Tb == c("inside")),]

#x = hour.day, freq = count
count(Therm.Ex.InsideD$hour.day) 
##     x freq
## 1  08   12
## 2  09   28
## 3  10   32
## 4  11   35
## 5  12   27
## 6  13   11
## 7  14   15
## 8  15   13
## 9  16    8
## 10 17    6
## 11 18    1
## 12 19    1
table(Therm.Ex.InsideD$Tsel.D_Shell)
## 
## inside 
##    189

For constant calculation: There are a total of 567 Tshell inside Tsel. Out of those 567, there are 477 occurences of Tb in Tsel when Tshell is inside Tsel.

The largest counts are from 11am until 7pm. No occurences before 9am or after 8pm.

For dynamic calculation: There are a total of 280 Tshell inside Tsel. Out of those 280, there are 189 occurences of Tb in Tsel when Tshell is inside Tsel

The largest counts are from 8am until 3pm. There does not appear to be any occurences before 8am or after 7pm.

#Continuing with operant temperatures:

#Operant Temperatures (Te)
meanTe<-aggregate(list(mTe = operant.data$Te),
                  list(date = cut(operant.data$date, "1 hour"),
                       year = operant.data$year,
                       season = operant.data$season),
                  data=operant.data,
                  mean)
meanTe$date<-as.POSIXct(as.character(meanTe$date,
                                     format="%Y-%m-%d %H:%M:%S"))
#View(meanTe)

meanTe$hour.day<-format(round(meanTe$date, "hours"), "%H") 

#Habitat Types
Open.Close.Te<-aggregate(list(mTe = operant.data$Te),
                  list(date = cut(operant.data$date, "1 hour"),
                       year = operant.data$year,
                       habitat = operant.data$habitat,
                       season = operant.data$season),
                  data=operant.data,
                  mean)
Open.Close.Te$date<-as.POSIXct(as.character(Open.Close.Te$date,
                                     format="%Y-%m-%d %H:%M:%S"))

Open.Close.Te$hour.day<-format(round(Open.Close.Te$date, "hours"), "%H") 

#Environmental Temperatures
#Constant (Tsel-C) Te
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%

C.Te<-ifelse(meanTe$mTe > Tsel.max, "above", #ifelse(condition, yes, no)
           ifelse(meanTe$mTe < Tsel.min, "below",
                  "inside"))

#head(C.Te)
meanTe$Tsel.C<-C.Te
#head(meanTe)

#Dynamic (Tsel-D)

D.Te<-ifelse(meanTe$mTe[format(round(meanTe$date, "hours"), "%H")%in%meanTe$hour.day] < (ZPTselD$mTsel[as.numeric(meanTe$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTe$hour.day)+1]), "below",
       ifelse(meanTe$mTe[format(round(meanTe$date, "hours"), "%H")%in%meanTe$hour.day] > (ZPTselD$mTsel[as.numeric(meanTe$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTe$hour.day)+1]), "above",
       "inside"))

#head(D.Te)
meanTe$Tsel.D<-D.Te

#Open-Closed Habitat Comparison
#Constant (Tsel-C)
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%

C.TeOC<-ifelse(Open.Close.Te$mTe > Tsel.max, "above", #ifelse(condition, yes, no)
           ifelse(Open.Close.Te$mTe < Tsel.min, "below",
                  "inside"))

#head(C.TeOC)
Open.Close.Te$Tsel.C<-C.TeOC
#head(Open.Close.Te)

#Dynamic (Tsel-D)

D.TeOC<-ifelse(Open.Close.Te$mTe[format(round(Open.Close.Te$date, "hours"), "%H")%in%Open.Close.Te$hour.day] < (ZPTselD$mTsel[as.numeric(Open.Close.Te$hour.day)+1] - ZPTselD$errorTsel[as.numeric(Open.Close.Te$hour.day)+1]), "below",
       ifelse(Open.Close.Te$mTe[format(round(Open.Close.Te$date, "hours"), "%H")%in%Open.Close.Te$hour.day] > (ZPTselD$mTsel[as.numeric(Open.Close.Te$hour.day)+1] + ZPTselD$errorTsel[as.numeric(Open.Close.Te$hour.day)+1]), "above",
       "inside"))

#head(D.TeOC)
Open.Close.Te$Tsel.D<-D.TeOC

Everything run above was just to get the data prepared. Before going through and calculating just the thermal exploitation index, I'll go through above, inside, and below the Tsel range. The focus will be body temperatures, then models

#Body Temperatures
table(meanTb$Tsel.C)
## 
##  above  below inside 
##    113   6713    606
#above Tsel-C
113/7432
## [1] 0.0152
#below Tsel-C
6713/7432
## [1] 0.9033
#inside Tsel-C
606/7432
## [1] 0.08154
table(meanTb$Tsel.D)
## 
##  above  below inside 
##    399   6713    320
#above Tsel-D
399/7432
## [1] 0.05369
#below Tsel-D
6713/7432
## [1] 0.9033
#inside Tsel-D
320/7432
## [1] 0.04306
#Warning - last time I tried to create a table format of above-below-inside for each season, each turtle, and each year, I apparently created a 2gig vector that crashed my computer. 

#Operative Temperatures
table(Open.Close.Te$Tsel.C)
## 
##  above  below inside 
##    257   6125    743
#above Tsel-C
257/7125
## [1] 0.03607
#below Tsel-C
6125/7125
## [1] 0.8596
#inside Tsel-C
743/7125
## [1] 0.1043
table(Open.Close.Te$Tsel.D)
## 
##  above  below inside 
##    579   6128    418
#above Tsel-D
579/7125
## [1] 0.08126
#below Tsel-D
6128/7125
## [1] 0.8601
#inside Tsel-D
418/7125
## [1] 0.05867
#Well, just for fun:
#Thermal Exploitation Tsel-C
((606/7432) / (743/7125))
## [1] 0.7819
#Thermal Exploitation Tsel-D
 ((320/7432)/ (418/7125))
## [1] 0.7339

The following code should just be a review.

#Aggregation of data for calculation?
#Tb in Tsel
#Tb.Ex.C<-aggregate(Tsel.C ~ cut(meanTb$date, "1 day") + meanTb$turtle, 
#                data=meanTb, 
#                function(x) sum(x==c("inside")))

#Tb.Ex.D<-aggregate(Tsel.D ~ cut(meanTb$date, "1 day") + meanTb$turtle, 
#                data=meanTb, 
#                function(x) sum(x==c("inside")))

#Tb.Ex.C<-rename(Tb.Ex.C, c('cut(meanTb$date, "1 day")' = "date"))
#Tb.Ex.D<-rename(Tb.Ex.D, c('cut(meanTb$date, "1 day")' = "date"))

#head(Tb.Ex.C)
#head(Tb.Ex.D)

#Te in Tsel
#Te.Ex.C<-aggregate(Tsel.C ~ cut(meanTe$date, "1 day") + meanTe$season + meanTe$year, 
#                data=meanTe, 
#                function(x) sum(x==c("inside")))

#Te.Ex.D<-aggregate(Tsel.D ~ cut(meanTe$date, "1 day") + meanTe$season + meanTe$year, 
#                data=meanTe, 
#                function(x) sum(x==c("inside")))

#Te.Ex.C<-rename(Te.Ex.C, c('cut(meanTe$date, "1 day")' = "date"))
#Te.Ex.D<-rename(Te.Ex.D, c('cut(meanTe$date, "1 day")' = "date"))

#head(Te.Ex.C)
#head(Te.Ex.D)


#Habitat Types in Tsel
Open.Close.Te.Ex.C<-aggregate(Tsel.C ~ cut(Open.Close.Te$date, "1 day") + Open.Close.Te$season + Open.Close.Te$year + Open.Close.Te$habitat, 
                data=Open.Close.Te, 
                function(x) sum(x==c("inside")))

Open.Close.Te.Ex.D<-aggregate(Tsel.D ~ cut(Open.Close.Te$date, "1 day") + Open.Close.Te$season + Open.Close.Te$year + Open.Close.Te$habitat, 
                data=Open.Close.Te, 
                function(x) sum(x==c("inside")))

Open.Close.Te.Ex.C<-rename(Open.Close.Te.Ex.C, c('cut(Open.Close.Te$date, "1 day")' = "date"))
Open.Close.Te.Ex.D<-rename(Open.Close.Te.Ex.D, c('cut(Open.Close.Te$date, "1 day")' = "date"))


#head(Open.Close.Te.Ex.C)
#head(Open.Close.Te.Ex.D)

#Percent Te in Tsel for Habitat Types
Open.Close.Te.Ex.C$Te.Prnct.C<-Open.Close.Te.Ex.C$Tsel.C/24
Open.Close.Te.Ex.D$Te.Prnct.D<-Open.Close.Te.Ex.D$Tsel.D/24

Open.Close.Te.Ex.C<-rename(Open.Close.Te.Ex.C,c('Open.Close.Te$year' = 'year', 
'Open.Close.Te$habitat' = 'habitat', 
'Open.Close.Te$season' = 'season'))
Open.Close.Te.Ex.D<-rename(Open.Close.Te.Ex.D,c('Open.Close.Te$year' = 'year', 
'Open.Close.Te$habitat' = 'habitat', 
'Open.Close.Te$season' = 'season'))


Habitat.Prcnt.C<-aggregate(list( Mean = Open.Close.Te.Ex.C$Te.Prnct.C),
                  list(habitat = Open.Close.Te.Ex.C$habitat,
                       year = Open.Close.Te.Ex.C$year,
                       season = Open.Close.Te.Ex.C$season),
                                        mean)
#head(Habitat.Prcnt.C)

Habitat.Prcnt.D<-aggregate(list( Mean = Open.Close.Te.Ex.D$Te.Prnct.D),
                  list(habitat = Open.Close.Te.Ex.D$habitat,
                       year = Open.Close.Te.Ex.D$year,
                       season = Open.Close.Te.Ex.D$season),
                                        mean)
#head(Habitat.Prcnt.D)

#Open-Closed Habitat Comparison

qqnorm(Open.Close.Te.Ex.C$Te.Prnct.C) #bimodal possibly?
qqline(Open.Close.Te.Ex.C$Te.Prnct.C)

plot of chunk unnamed-chunk-24

#Tsel-C Comparison of Te in Tsel
t.test(Te.Prnct.C ~ habitat, Open.Close.Te.Ex.C) 
## 
##  Welch Two Sample t-test
## 
## data:  Te.Prnct.C by habitat
## t = -13.64, df = 238.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1842 -0.1377
## sample estimates:
## mean in group closed   mean in group open 
##               0.0244               0.1854
qqnorm(Open.Close.Te.Ex.D$Te.Prnct.D) #More bimodal possibly?
qqline(Open.Close.Te.Ex.D$Te.Prnct.D)

plot of chunk unnamed-chunk-24

#Tsel-C Comparison of Te in Tsel
t.test(Te.Prnct.D ~ habitat, Open.Close.Te.Ex.D) 
## 
##  Welch Two Sample t-test
## 
## data:  Te.Prnct.D by habitat
## t = -6.514, df = 292.4, p-value = 3.189e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.07151 -0.03833
## sample estimates:
## mean in group closed   mean in group open 
##              0.03125              0.08617

In general, closed habitats had less time percentage of Te in Tsel. Furthermore, there is a significant difference between time Te in Tsel for both calculation. So why do box turtles select forested habitat? Could it be due to the conditions at the northern limit of their distribution?

#Merging Data for Thermal Exploitation
#Data:
#Tb.Ex.C
#Tb.Ex.D
#Te.Ex.C
#Te.Ex.D

#Constant Calculation
#Tb.Ex.C<-rename(Tb.Ex.C, c('cut(meanTb$date, "1 day")' = "date"))
#Te.Ex.C<-rename(Te.Ex.C, c('cut(meanTe$date, "1 day")' = "date"))

#Ex.Constant<-merge(Tb.Ex.C, Te.Ex.C, by='date')
#head(Ex.Constant)

#Ex.Constant<-rename(Ex.Constant, c("Tsel.C.x" = "Tsel.Tb",
#'meanTe$season' = 'season',
#'meanTe$year' = 'year',
#'Tsel.C.y' = 'Tsel.Te'))

#Ex.Constant$Tb.Prcnt<-Ex.Constant$Tsel.Tb/24
#Ex.Constant$Te.Prcnt<-Ex.Constant$Tsel.Te/24

#Ex.Constant$Ex<-Ex.Constant$Tb.Prcnt/Ex.Constant$Te.Prcnt
#Ex.Constant

#Dynamic Calculation
#Tb.Ex.D<-rename(Tb.Ex.D, c('cut(meanTb$date, "1 day")' = "date"))
#Te.Ex.D<-rename(Te.Ex.D, c('cut(meanTe$date, "1 day")' = "date"))

#Ex.Dynamic<-merge(Tb.Ex.D, Te.Ex.D, by='date')
#head(Ex.Dynamic)

#Ex.Dynamic<-rename(Ex.Dynamic, c("Tsel.D.x" = "Tsel.Tb",
#'meanTe$season' = 'season',
#'meanTe$year' = 'year',
#'Tsel.D.y' = 'Tsel.Te'))

#Ex.Dynamic$Tb.Prcnt<-Ex.Dynamic$Tsel.Tb/24
#Ex.Dynamic$Te.Prcnt<-Ex.Dynamic$Tsel.Te/24

#Ex.Dynamic$Ex<-Ex.Dynamic$Tb.Prcnt/Ex.Dynamic$Te.Prcnt
#Ex.Dynamic
#View(Ex.Dynamic)

Here's where it gets a little tricky with the Thermal exploitation. Issues that we run into are: 1. Division by zero, where Te in Tsel is not observed in a 24-h period yet Tb in Tsel is (possibly finding thermal niches that the models weren't placed in? Don't know, we placed a lot of models) 2. Tb in Tsel being greater than Te in Tsel (values over 100%)

8-15: I think I'm going to omit the previous chunk of code.

In general, habitat selected doesn't reflect thermal preference because open habitats are significantly higher (%Te in Tsel) than closed habitats.

#Objective 3: Thermoregulatory strategy Box turtles would rely on thermoconformity given their limited capacity to seek out basking sites.

I. How accurate are box turtles at maintaining thermal preference (thermal accuracy (db))?

*Report the mean +/- SE of db to compare with other studies of turtles

II. How are box turtles thermoregulating? (db plotted against de, m = 1 is thermoconformity, m = 0 thermoregulation) (Effectiveness of Thermoregulation)

Statistics done by Hank, with some modifications

library(dplyr) # data manipulation
## 
## Attaching package: 'dplyr'
## 
## The following object is masked from 'package:adehabitatLT':
## 
##     id
## 
## The following object is masked from 'package:MASS':
## 
##     select
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, desc, failwith, id, mutate, summarise, summarize
## 
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## 
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lattice) # graphics
library(ggplot2) # graphics
library(lme4) # stats for mixed models
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following object is masked from 'package:reshape':
## 
##     expand
## 
## The following objects are masked from 'package:base':
## 
##     crossprod, tcrossprod
## 
## Loading required package: Rcpp
#library(R2jags) # Bayesian analysis - no idea how Hank does it, can't get package to work


df <- read.csv("2014 2015 Tb Tex db CORRECTED.csv",
               header=T,
               sep='\t')

## Assess only the complete data set 

df <- df[complete.cases(df), ]

# transform and relabel for convenience
df$y <- df$year - 2014 # this will be useful when I do the Bayesian model
df$yf <- as.factor(df$y)
df$season <- factor(df$season, levels=c("spring", "summer", "fall"))

# check structure
# with(df, table(season,turtle,  year))
# Note: there are different turtles in different seasons and years. We have to assume that turtles do not vary systematically across years and seasons.

# Convert time and date into variables that are more useful
t <- strptime(df$date, "%Y-%m-%d %H:%M:%S")
## samples is the relative time (H/M) within a d day
df$samples <- as.numeric(format(t, "%H")) + as.numeric(format(t, "%M"))/60 +
  as.numeric(format(t, "%S"))/(60*60)
## day is the date
df$day <- as.numeric(format(t, "%y")) + 
  as.numeric(format(t, "%m"))/12 + as.numeric(format(t, "%d")) / 365 

# xyplot( Tb ~ Tex|turtle, groups = day, data=df , type="smooth")

# log ratio 
# > 0 means body temp > shell
# < 0 mean body temp < shell
df$log.ratio <- log( df$Tb/df$Tex )
#A few graphs to get the picture of the times series.

# one day, one turtle
#xyplot(db.NEW.signs ~ samples, groups = turtle, data = df, type="l", 
#       subset = day == min(day) )

# one turtle
#r1l3 <- subset(df, turtle == "R1L3")
#xyplot(db.NEW.signs ~ samples, groups = day, data = r1l3, type="l")

# ggplot(data=r1l3, aes(samples, db.NEW.setp, 
#                       colour = as.factor(day), 
#                       group=as.factor(day))) +  geom_path()

Removing some of the misc. graphs to focus on the model assumptions/testing to see if I can get something to report for the manuscript in the results.

#Aggregate within turtles and days
#This will make summarize turtle teps for each day, 
#including average dbdb and standard deviation of body
#and shell temperatures.

a1 <- select(df, turtle, season, yf, samples, day, Tb, Tex, 
             db.NEW.signs, db.OLD.signs, log.ratio)
# It was suggested we not use fall data...
# a1s <- filter(a1, season != "fall")
a1s <- a1
a2 <- group_by(a1s, turtle, day, season, yf)
a3 <- summarise(a2,
                mn.b = mean(Tb),
                mn.x = mean(Tex),
                sd.b = sd(Tb),
                sd.x = sd( Tex ),
                sd.xb = sd.x - sd.b,
                mn.db.new = mean( abs( db.NEW.signs ) ),
                mn.db.old = mean( abs( db.OLD.signs ) ),
                sd.db.new = sd(db.NEW.signs ),
                sd.db.old = sd( db.OLD.signs )
)

#Does average daily body temp vary with season? Here we address this question using a graph and stats.

#Plot of mean Body Temperature for year and season
ggplot(a3, aes(season, mn.b, colour = yf) ) + geom_boxplot()

plot of chunk unnamed-chunk-28

#Stats.
Tbm0 <- lmer(mn.b ~ season * yf + (1|turtle), data = a3)
Tbm1<- update(Tbm0, . ~ . - season:yf)
anova(Tbm0, Tbm1)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Tbm1: mn.b ~ season + yf + (1 | turtle)
## Tbm0: mn.b ~ season * yf + (1 | turtle)
##      Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## Tbm1  6 1465 1488   -727     1453                            
## Tbm0  8 1448 1479   -716     1432    21      2    2.8e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Tbm0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.b ~ season * yf + (1 | turtle)
##    Data: a3
## 
## REML criterion at convergence: 1431
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.877 -0.573  0.140  0.646  2.475 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.592    0.769   
##  Residual             5.109    2.260   
## Number of obs: 317, groups:  turtle, 26
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)        20.819      0.457    45.5
## seasonsummer       -0.368      0.672    -0.5
## seasonfall         -5.422      0.629    -8.6
## yf1                -0.929      0.687    -1.4
## seasonsummer:yf1    2.284      0.946     2.4
## seasonfall:yf1      5.353      1.081     5.0
## 
## Correlation of Fixed Effects:
##             (Intr) ssnsmm ssnfll yf1    ssns:1
## seasonsummr -0.680                            
## seasonfall  -0.727  0.495                     
## yf1         -0.666  0.453  0.484              
## ssnsmmr:yf1  0.483 -0.711 -0.352 -0.726       
## sesnfll:yf1  0.423 -0.288 -0.582 -0.636  0.461

Depends on the interaction of season and year: p < 0.05 Need to report estimate, std error, t-value, and p-value?

# Create a variable that includes each season x year combo as a different factor levels (3 seasons x 2 y).
sxy <- interaction(a3$season, a3$yf, drop=TRUE)

# Set fall of the first year as the control. This allows all the model coefficients to test whether the other levels differ from fall 2014
sxy <- relevel(sxy, "fall.0")

# test whether some year-seasons differ from fall 2014 [using bayesian analysis?]
Tbm0b <- lmer(mn.b ~ sxy + (1|turtle), data = a3)
Tb.bm0 <- confint(Tbm0b, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
Tb.bm0
##                         2.5 % 97.5 %
## sd_(Intercept)|turtle  0.2944  1.174
## sigma                  2.0760  2.431
## (Intercept)           14.5890 16.225
## sxyspring.0            4.1997  6.584
## sxysummer.0            3.7802  6.254
## sxyspring.1            3.1720  5.737
## sxysummer.1            5.2289  7.583
## sxyfall.1              2.7035  6.267

All season:year combos are greater than fall 2014 (or sigma? I'm pretty sure). Random effect (.sig01, or sd_(Intercept)|turtle) is small.

Does daily shell temperature (Tshell) vary by season, year, or the interaction?

#Plot of mean Shell Temperature for year and season
ggplot(a3, aes(season, mn.x, colour = yf) ) + geom_boxplot()

plot of chunk unnamed-chunk-30

#Stats.
Texm0 <- lmer(mn.x ~ season * yf + (1|turtle), data = a3)
Texm1<- update(Texm0, . ~ . - season:yf)
anova(Texm0, Texm1)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Texm1: mn.x ~ season + yf + (1 | turtle)
## Texm0: mn.x ~ season * yf + (1 | turtle)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## Texm1  6 1538 1561   -763     1526                            
## Texm0  8 1520 1550   -752     1504  22.3      2    1.4e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Texm0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.x ~ season * yf + (1 | turtle)
##    Data: a3
## 
## REML criterion at convergence: 1503
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.411 -0.542  0.139  0.667  2.448 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.377    0.614   
##  Residual             6.590    2.567   
## Number of obs: 317, groups:  turtle, 26
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)        20.263      0.438    46.2
## seasonsummer       -0.386      0.639    -0.6
## seasonfall         -5.814      0.596    -9.8
## yf1                -0.700      0.659    -1.1
## seasonsummer:yf1    2.246      0.905     2.5
## seasonfall:yf1      5.327      1.030     5.2
## 
## Correlation of Fixed Effects:
##             (Intr) ssnsmm ssnfll yf1    ssns:1
## seasonsummr -0.686                            
## seasonfall  -0.735  0.504                     
## yf1         -0.666  0.456  0.490              
## ssnsmmr:yf1  0.484 -0.706 -0.356 -0.727       
## sesnfll:yf1  0.426 -0.292 -0.579 -0.639  0.465

Depends on the interaction of season and year: p < 0.05. Similar to Tb. Interaction is denoted by the asterik (*) in the output.

# test whether some year-seasons differ from fall 2014 [using bayesian analysis?]
Texm0b <- lmer(mn.x ~ sxy + (1|turtle), data = a3)
Tex.bm0 <- confint(Texm0b, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
Tex.bm0
##                        2.5 % 97.5 %
## sd_(Intercept)|turtle  0.000  1.001
## sigma                  2.362  2.795
## (Intercept)           13.609 15.203
## sxyspring.0            4.824  6.949
## sxysummer.0            4.303  6.701
## sxyspring.1            3.831  6.291
## sxysummer.1            5.825  8.110
## sxyfall.1              3.146  6.238

All season:year combos are greater than fall 2014 Random effect (.sig01, or sd_(Intercept)|turtle) is small.

Does daily db vary by season, year, or the interaction? Dynamic Tsel Calculations:

#Plot
ggplot(a3, aes(season, mn.db.new, colour = yf) ) + geom_boxplot()

plot of chunk unnamed-chunk-32

Dbm0d <- lmer(mn.db.new ~ season * yf + (1|turtle), data = a3)
qplot( fitted(Dbm0d), resid(Dbm0d)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-32

qqnorm(resid(Dbm0d))
qqline(resid(Dbm0d))

plot of chunk unnamed-chunk-32

#Data conform reasonably well to model assumptions. Test season by year effect.

Dbm1d <- update(Dbm0d, . ~ . - season:yf)
anova(Dbm0d, Dbm1d)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Dbm1d: mn.db.new ~ season + yf + (1 | turtle)
## Dbm0d: mn.db.new ~ season * yf + (1 | turtle)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## Dbm1d  6 1440 1462   -714     1428                            
## Dbm0d  8 1420 1450   -702     1404  23.9      2    6.6e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dbm0d)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.db.new ~ season * yf + (1 | turtle)
##    Data: a3
## 
## REML criterion at convergence: 1404
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.138 -0.663 -0.144  0.478  2.873 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.371    0.609   
##  Residual             4.749    2.179   
## Number of obs: 317, groups:  turtle, 26
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)         7.420      0.398   18.64
## seasonsummer        0.340      0.583    0.58
## seasonfall          5.082      0.544    9.34
## yf1                 0.881      0.598    1.47
## seasonsummer:yf1   -2.265      0.823   -2.75
## seasonfall:yf1     -5.112      0.937   -5.45
## 
## Correlation of Fixed Effects:
##             (Intr) ssnsmm ssnfll yf1    ssns:1
## seasonsummr -0.683                            
## seasonfall  -0.732  0.500                     
## yf1         -0.666  0.455  0.487              
## ssnsmmr:yf1  0.484 -0.708 -0.354 -0.727       
## sesnfll:yf1  0.425 -0.290 -0.580 -0.638  0.463

Yes, it depends on the interaction - As per tradition.

# test whether some year-seasons differ from fall 2014
Db.m0d <- lmer(mn.db.new ~ sxy + (1|turtle), data = a3)
dm0 <- confint(Db.m0d, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
dm0
##                        2.5 %  97.5 %
## sd_(Intercept)|turtle  0.000  0.9962
## sigma                  1.996  2.3643
## (Intercept)           11.714 13.1951
## sxyspring.0           -6.092 -4.0189
## sxysummer.0           -5.890 -3.5601
## sxyspring.1           -5.362 -2.9951
## sxysummer.1           -7.168 -5.1123
## sxyfall.1             -5.649 -2.7434

The db in all seasons differs from db of fall 2014. They're also all negative (other seasons, not sigma/fall 2014)

Constant Tsel Calculations

#Plot
ggplot(a3, aes(season, mn.db.old, colour = yf) ) + geom_boxplot()

plot of chunk unnamed-chunk-34

Dbm0c <- lmer(mn.db.old ~ season * yf + (1|turtle), data = a3)
qplot( fitted(Dbm0c), resid(Dbm0c)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-34

qqnorm(resid(Dbm0c))
qqline(resid(Dbm0c))

plot of chunk unnamed-chunk-34

#Data conform reasonably well to model assumptions. Test season by year effect.

Dbm1c <- update(Dbm0c, . ~ . - season:yf)
anova(Dbm0c, Dbm1c)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Dbm1c: mn.db.old ~ season + yf + (1 | turtle)
## Dbm0c: mn.db.old ~ season * yf + (1 | turtle)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## Dbm1c  6 1437 1459   -712     1425                            
## Dbm0c  8 1419 1449   -701     1403  21.8      2    1.9e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dbm0c)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.db.old ~ season * yf + (1 | turtle)
##    Data: a3
## 
## REML criterion at convergence: 1402
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.295 -0.681 -0.145  0.555  2.958 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.489    0.699   
##  Residual             4.677    2.163   
## Number of obs: 317, groups:  turtle, 26
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)         8.747      0.425   20.56
## seasonsummer        0.287      0.625    0.46
## seasonfall          5.166      0.584    8.85
## yf1                 0.837      0.639    1.31
## seasonsummer:yf1   -2.205      0.880   -2.51
## seasonfall:yf1     -5.112      1.004   -5.09
## 
## Correlation of Fixed Effects:
##             (Intr) ssnsmm ssnfll yf1    ssns:1
## seasonsummr -0.681                            
## seasonfall  -0.728  0.496                     
## yf1         -0.666  0.454  0.485              
## ssnsmmr:yf1  0.484 -0.710 -0.352 -0.726       
## sesnfll:yf1  0.424 -0.289 -0.582 -0.636  0.462

Yes, it depends on the interaction - As per tradition.

# test whether some year-seasons differ from fall 2014
Db.m0c <- lmer(mn.db.old ~ sxy + (1|turtle), data = a3)
cm0 <- confint(Db.m0c, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
cm0
##                            2.5 % 97.5 %
## sd_(Intercept)|turtle  1.254e-07  1.052
## sigma                  1.990e+00  2.350
## (Intercept)            1.317e+01 14.705
## sxyspring.0           -6.267e+00 -4.052
## sxysummer.0           -6.053e+00 -3.672
## sxyspring.1           -5.534e+00 -3.250
## sxysummer.1           -7.394e+00 -5.178
## sxyfall.1             -5.841e+00 -2.745

Same with the dynamic calculation, all seasons differ from fall 2014.

#Does environment temp vary by habitat, year, and season?

Environmental data management.

env <- read.table("2014 2015 Te de CORRECTED.csv", header=T, sep=",")
#str(env)

env$yf <- as.factor( env$year - 2014 )
env$season <- factor(env$season, levels=c("spring", "summer", "fall"))

t <- strptime(env$date, "%Y-%m-%d %H:%M:%S")
env$samples <- as.numeric(format(t, "%H")) +
  as.numeric(format(t, "%M"))/60 +
  as.numeric(format(t, "%S"))/60^2
env$day <- as.numeric(format(t, "%y")) + 
  as.numeric(format(t, "%m"))/12 + as.numeric(format(t, "%d")) / 365 
#summary(env)

#Take a peak at the two different ways of assessing environmental quality.
#env14 <- subset( env, year == 2014 & day == min(day)  )
#dim(env14)
#names(env14)
#xyplot(de.NEW.signs ~ samples, groups = probe, data = env14, type="l")
#xyplot(de.OLD.signs ~ samples, groups = probe, data = env14, type="l")
#xyplot(de.NEW.signs ~ de.OLD.signs, groups = probe, data = env14, type="p")

Going to use Hank's aggregation and try to include the open/closed, plus closed analyses.

#Aggregate within day and operant model ('probe').

e1 <- select(env, probe, season, yf, habitat, samples, day, 
             Te, de.NEW.signs, de.OLD.signs)
e2 <- group_by(e1, probe, day, season, yf)
e3 <- summarise(e2,
                habitat = unique(habitat),
                mn.e = mean(Te),
                mn.de.new = mean(abs(de.NEW.signs)),
                mn.de.old = mean(abs(de.OLD.signs)),
                sd.de.new = sd(de.NEW.signs),
                sd.de.old = sd(de.OLD.signs)
)
#ggplot(e3, aes(season, mn.e, colour = yf) ) + geom_boxplot()
#ggplot(e3, aes(season, mn.de.new, colour = yf) ) + geom_boxplot()
#ggplot(e3, aes(season, mn.de.old, colour = yf) ) + geom_boxplot()

#Does daily temperature quality vary by season, year, or the interaction? Going to test this with open and closed habitats, or try.

Dynamic Tsel Calculation

Dem0d <- lmer(log(mn.de.new) ~ season * yf + (1|probe), data = e3)
qplot(fitted(Dem0d), resid(Dem0d)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-38

qqnorm(resid(Dem0d)); qqline(resid(Dem0d))

plot of chunk unnamed-chunk-38

#Assumptions reasonably met.

Dem1d <- update(Dem0d, . ~ . - season:yf)
anova(Dem0d, Dem1d)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## Dem1d: log(mn.de.new) ~ season + yf + (1 | probe)
## Dem0d: log(mn.de.new) ~ season * yf + (1 | probe)
##       Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## Dem1d  6 1353 1393   -671     1341                            
## Dem0d  8 1089 1141   -536     1073   269      2     <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dem0d)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(mn.de.new) ~ season * yf + (1 | probe)
##    Data: e3
## 
## REML criterion at convergence: 1115
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.317 -0.650  0.078  0.698  3.201 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  probe    (Intercept) 0.00563  0.075   
##  Residual             0.06948  0.264   
## Number of obs: 5411, groups:  probe, 90
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)        2.0229     0.0133   152.3
## seasonsummer       0.0093     0.0133     0.7
## seasonfall         0.5549     0.0129    42.8
## yf1               -0.1130     0.0207    -5.4
## seasonsummer:yf1  -0.2203     0.0174   -12.6
## seasonfall:yf1    -0.2814     0.0182   -15.4
## 
## Correlation of Fixed Effects:
##             (Intr) ssnsmm ssnfll yf1    ssns:1
## seasonsummr -0.470                            
## seasonfall  -0.482  0.501                     
## yf1         -0.640  0.301  0.309              
## ssnsmmr:yf1  0.359 -0.763 -0.382 -0.410       
## sesnfll:yf1  0.343 -0.356 -0.711 -0.389  0.473

Depends on the interaction. Thermal quality de depends on season x year interaction of all models combined for the dynamic calculation.

Constant Tsel Calculation:

Dem0c <- lmer(log(mn.de.old) ~ season * yf + (1|probe), data = e3)
qplot(fitted(Dem0c), resid(Dem0c)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-39

qqnorm(resid(Dem0c)); qqline(resid(Dem0c))

plot of chunk unnamed-chunk-39

# assumptions reasonably met.

Dem1c <- update(Dem0c, . ~ . - season:yf)
anova(Dem0c, Dem1c)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## Dem1c: log(mn.de.old) ~ season + yf + (1 | probe)
## Dem0c: log(mn.de.old) ~ season * yf + (1 | probe)
##       Df  AIC    BIC logLik deviance Chisq Chi Df Pr(>Chisq)    
## Dem1c  6   21   60.6   -4.5        9                            
## Dem0c  8 -188 -134.8  101.8     -204   213      2     <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dem0c)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(mn.de.old) ~ season * yf + (1 | probe)
##    Data: e3
## 
## REML criterion at convergence: -161.1
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -3.305 -0.629  0.096  0.696  3.461 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  probe    (Intercept) 0.00769  0.0877  
##  Residual             0.05446  0.2334  
## Number of obs: 5411, groups:  probe, 90
## 
## Fixed effects:
##                  Estimate Std. Error t value
## (Intercept)       2.19117    0.01390   157.7
## seasonsummer      0.00996    0.01182     0.8
## seasonfall        0.49997    0.01151    43.4
## yf1              -0.11971    0.02239    -5.3
## seasonsummer:yf1 -0.17232    0.01548   -11.1
## seasonfall:yf1   -0.22231    0.01617   -13.8
## 
## Correlation of Fixed Effects:
##             (Intr) ssnsmm ssnfll yf1    ssns:1
## seasonsummr -0.398                            
## seasonfall  -0.408  0.503                     
## yf1         -0.621  0.247  0.253              
## ssnsmmr:yf1  0.304 -0.764 -0.384 -0.337       
## sesnfll:yf1  0.290 -0.358 -0.712 -0.319  0.474

Again, depends on the interaction. Thermal quality de depends on season x year interaction of all models combined for the constant calculation.

But wait, there's more!

Dem0c.h <- lmer(log(mn.de.old) ~ season * yf * habitat + (1|probe), data = e3)

qplot(fitted(Dem0c.h), resid(Dem0c.h)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.

plot of chunk unnamed-chunk-40

qqnorm(resid(Dem0c.h)); qqline(resid(Dem0c.h))

plot of chunk unnamed-chunk-40

# assumptions reasonably met.

Dem1c.h <- update(Dem0c.h, . ~ . - season:yf:habitat)
anova(Dem0c.h, Dem1c.h)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## Dem1c.h: log(mn.de.old) ~ season + yf + habitat + (1 | probe) + season:yf + 
## Dem1c.h:     season:habitat + yf:habitat
## Dem0c.h: log(mn.de.old) ~ season * yf * habitat + (1 | probe)
##         Df  AIC  BIC logLik deviance Chisq Chi Df Pr(>Chisq)   
## Dem1c.h 12 -405 -326    214     -429                           
## Dem0c.h 14 -414 -322    221     -442  12.9      2     0.0016 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

If I'm interpretting the results of including habitat type into the lmer(), then although there is a significant difference, it's with the removal of season:yf:habitat and just the additive (season + yf + habitat, not other).

#How does thermal accuracy (db) vary with calculation method? Wild attempt because, why not?

t.test(df$db.NEW.signs, df$db.OLD.signs, paired=T)
## 
##  Paired t-test
## 
## data:  df$db.NEW.signs and df$db.OLD.signs
## t = 178.4, df = 88987, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.566 1.601
## sample estimates:
## mean of the differences 
##                   1.584
t.test(df$db.NEW, df$db.OLD, paired=T)
## 
##  Paired t-test
## 
## data:  df$db.NEW and df$db.OLD
## t = 150, df = 88987, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  1.378 1.415
## sample estimates:
## mean of the differences 
##                   1.397

No idea

#Does E differ from zero?

db=|Tsel-Tb| de=|Tsel-Te| E'=de-db

#Use only operants from closed habitat. We need to justify this.
#justification:
#Box turtles remain in closed habitats according to chi-square of LULC values.

e4 <- group_by(e1, day, season, habitat, yf)
e5 <- summarise(e4,
                mn.e = mean(Te),
                sd.e = sd(Te),
                mn.de.new = mean(abs(de.NEW.signs)),
                mn.de.old = mean(abs(de.OLD.signs)),
                sd.de.new = sd(de.NEW.signs),
                sd.de.old = sd(de.OLD.signs)
)

# DAILY MEAN TEMPS
#ggplot(e5, aes(season, mn.e, colour = yf) ) + 
#geom_boxplot() + 
#facet_grid(.~habitat)


# DAILY STANDARD DEV OF TEMPS
#ggplot(e5, aes(season, sd.e, colour = yf) ) + 
#geom_boxplot() + 
#facet_grid(.~habitat)

Q: How does this (or something else) justify not using operants from the open?

#Analysis using only probes from closed habitat.

e1ss <- filter(e1, habitat == "closed")
e2a <- group_by(e1ss, day, season, yf)
e3a <- summarise(e2a,
                 mn.e = mean(Te),
                 mn.de.new = mean(abs(de.NEW.signs)),
                 mn.de.old = mean(abs(de.OLD.signs)),
                 sd.de.new = sd(de.NEW.signs),
                 sd.de.old = sd(de.OLD.signs)

)

a4 <- select(a3, turtle, day, season, yf, mn.db.new, mn.db.old, sd.db.new, sd.db.old)
e4 <- subset(e3a, select=c(day, mn.de.new, mn.de.old, sd.de.new, sd.de.old))

ae4 <- left_join(a4, e4, by = "day")
#ae3 <- merge(a4, e4, by = "day", keep.x=TRUE)
dim(a4)
## [1] 317   8
dim(ae4)
## [1] 317  12
ae4$E.old <- (ae4$mn.de.old - ae4$mn.db.old)
ae4$V.old <- (ae4$sd.de.old - ae4$sd.db.old)
ae4$E.new <- (ae4$mn.de.new - ae4$mn.db.new)
ae4$V.new <- (ae4$sd.de.new - ae4$sd.db.new)

hist(ae4$E.new, main = "Efficiency based on closed habitat probes")

plot of chunk unnamed-chunk-43

hist(ae4$E.old, main = "Efficiency based on closed habitat probes")

plot of chunk unnamed-chunk-43

hist(ae4$V.new)

plot of chunk unnamed-chunk-43

hist(ae4$V.old)

plot of chunk unnamed-chunk-43

ggplot(data=ae4, aes(season, E.new, colour=yf)) + geom_boxplot()

plot of chunk unnamed-chunk-43

em1 <- lmer(E.new ~ 1 + (1|turtle), data = ae4)

# check assumptions.
plot(fitted(em1), resid(em1))
abline(h=0, lty=2, col=2)

plot of chunk unnamed-chunk-43

qqnorm(resid(em1))
qqline(resid(em1))

plot of chunk unnamed-chunk-43

# Assumptions met

# Test
summary(em1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: E.new ~ 1 + (1 | turtle)
##    Data: ae4
## 
## REML criterion at convergence: 806.9
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.188 -0.513  0.015  0.614  2.690 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.174    0.417   
##  Residual             0.665    0.816   
## Number of obs: 316, groups:  turtle, 26
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   0.5701     0.0939    6.07
emV1 <- lmer(V.new ~ 1 + (1|turtle), data = ae4)

# check assumptions.
plot(fitted(emV1), resid(emV1))
abline(h=0, lty=2, col=2)

plot of chunk unnamed-chunk-43

qqnorm(resid(emV1))
qqline(resid(emV1))

plot of chunk unnamed-chunk-43

# Assumptions met

# Test
summary(emV1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: V.new ~ 1 + (1 | turtle)
##    Data: ae4
## 
## REML criterion at convergence: 971
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.767 -0.650  0.059  0.588  3.523 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.778    0.882   
##  Residual             1.045    1.022   
## Number of obs: 316, groups:  turtle, 26
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   -0.465      0.183   -2.55

#Analysis using all probes.

#all.e1ss <- filter(e1, habitat == "closed")
all.e2a <- group_by(e1, day, season, yf)
all.e3a <- summarise(e2a,
                 mn.e = mean(Te),
                 mn.de.new = mean(abs(de.NEW.signs)),
                 mn.de.old = mean(abs(de.OLD.signs)),
                 sd.de.new = sd(de.NEW.signs),
                 sd.de.old = sd(de.OLD.signs)

)

all.a4 <- select(a3, turtle, day, season, yf, mn.db.new, mn.db.old, sd.db.new, sd.db.old)
all.e4 <- subset(all.e3a, select=c(day, mn.de.new, mn.de.old, sd.de.new, sd.de.old))

all.ae4 <- left_join(all.a4, all.e4, by = "day")
#ae3 <- merge(a4, e4, by = "day", keep.x=TRUE)
dim(all.a4)
## [1] 317   8
dim(all.ae4)
## [1] 317  12
all.ae4$E.old <- (ae4$mn.de.old - ae4$mn.db.old)
all.ae4$V.old <- (ae4$sd.de.old - ae4$sd.db.old)
all.ae4$E.new <- (ae4$mn.de.new - ae4$mn.db.new)
all.ae4$V.new <- (ae4$sd.de.new - ae4$sd.db.new)

hist(all.ae4$E.new, main = "Efficiency based on all habitat probes")

plot of chunk unnamed-chunk-44

hist(all.ae4$E.old, main = "Efficiency based on all habitat probes")

plot of chunk unnamed-chunk-44

hist(all.ae4$V.new)

plot of chunk unnamed-chunk-44

hist(all.ae4$V.old)

plot of chunk unnamed-chunk-44

ggplot(data=all.ae4, aes(season, E.new, colour=yf)) + geom_boxplot()

plot of chunk unnamed-chunk-44

all.em1 <- lmer(E.new ~ 1 + (1|turtle), data = all.ae4)

# check assumptions.
plot(fitted(all.em1), resid(all.em1))
abline(h=0, lty=2, col=2)

plot of chunk unnamed-chunk-44

qqnorm(resid(all.em1))
qqline(resid(all.em1))

plot of chunk unnamed-chunk-44

# Assumptions met

# Test
summary(all.em1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: E.new ~ 1 + (1 | turtle)
##    Data: all.ae4
## 
## REML criterion at convergence: 806.9
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -4.188 -0.513  0.015  0.614  2.690 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.174    0.417   
##  Residual             0.665    0.816   
## Number of obs: 316, groups:  turtle, 26
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   0.5701     0.0939    6.07
all.emV1 <- lmer(V.new ~ 1 + (1|turtle), data = all.ae4)

# check assumptions.
plot(fitted(all.emV1), resid(all.emV1))
abline(h=0, lty=2, col=2)

plot of chunk unnamed-chunk-44

qqnorm(resid(all.emV1))
qqline(resid(all.emV1))

plot of chunk unnamed-chunk-44

# Assumptions met

# Test
summary(all.emV1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: V.new ~ 1 + (1 | turtle)
##    Data: all.ae4
## 
## REML criterion at convergence: 971
## 
## Scaled residuals: 
##    Min     1Q Median     3Q    Max 
## -2.767 -0.650  0.059  0.588  3.523 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  turtle   (Intercept) 0.778    0.882   
##  Residual             1.045    1.022   
## Number of obs: 316, groups:  turtle, 26
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   -0.465      0.183   -2.55

#Thermoregulation according to db plotted against de

CB <- read.csv("Cost-Benefit Data - Copy.csv", sep=",", header=T)

#head(CB) #26 turtles, 2 years, 3 seasons.
#str(CB)

#Year to factor
CB$yf <- factor(CB$year)

#Thermal Quality (TQ or de) is based on when the turtles were in the field, all
#other corresponding values have been omitted. Thermal accuracy is (TA or db). 

#str(CB)
#factor-integer-numeric-numeric-numeric-factor

Dynamic Calculation (done with Hank's Help)

qplot(de.NEW, db.NEW, data=CB, colour=yf)

plot of chunk unnamed-chunk-46

lm.CB<-lm(db.NEW~de.NEW + yf +de.NEW:yf, data=CB)

summary(lm.CB) 
## 
## Call:
## lm(formula = db.NEW ~ de.NEW + yf + de.NEW:yf, data = CB)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.846 -0.432  0.147  0.313  0.974 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.63169    0.47157    1.34     0.19    
## de.NEW         0.86942    0.05146   16.89  4.4e-14 ***
## yf2015         0.49616    0.77647    0.64     0.53    
## de.NEW:yf2015 -0.00278    0.10968   -0.03     0.98    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.505 on 22 degrees of freedom
## Multiple R-squared:  0.953,  Adjusted R-squared:  0.947 
## F-statistic:  148 on 3 and 22 DF,  p-value: 9.52e-15
summary.aov(lm.CB)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## de.NEW       1  112.4   112.4  441.05 4.8e-16 ***
## yf           1    1.1     1.1    4.27   0.051 .  
## de.NEW:yf    1    0.0     0.0    0.00   0.980    
## Residuals   22    5.6     0.3                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

No difference in slopes (2014 = 2015 for TA v. TQ slope)

#ANCOVA testing against a null slope of m=1
lm.CB2<-lm(db.NEW~de.NEW+year, data=CB)
lm.CB3<-lm(db.NEW~de.NEW, data=CB)

Testing if slope differs from 0 or 1. The offset is used as a coefficient applied to the x-value.

#Dynamic
lm.0<-lm(db.NEW~de.NEW +offset(0*de.NEW), data=CB)
summary(lm.0)
## 
## Call:
## lm(formula = db.NEW ~ de.NEW + offset(0 * de.NEW), data = CB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1206 -0.3695  0.0543  0.3435  1.1314 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2209     0.3270    3.73    0.001 ** 
## de.NEW        0.8209     0.0409   20.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.528 on 24 degrees of freedom
## Multiple R-squared:  0.944,  Adjusted R-squared:  0.941 
## F-statistic:  403 on 1 and 24 DF,  p-value: <2e-16
lm.1<-lm(db.NEW~de.NEW +offset(1*de.NEW), data=CB)
summary(lm.0)
## 
## Call:
## lm(formula = db.NEW ~ de.NEW + offset(0 * de.NEW), data = CB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1206 -0.3695  0.0543  0.3435  1.1314 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.2209     0.3270    3.73    0.001 ** 
## de.NEW        0.8209     0.0409   20.07   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.528 on 24 degrees of freedom
## Multiple R-squared:  0.944,  Adjusted R-squared:  0.941 
## F-statistic:  403 on 1 and 24 DF,  p-value: <2e-16
#Constant
lm.0c<-lm(db.OLD~de.OLD +offset(0*de.OLD), data=CB)
summary(lm.0c)
## 
## Call:
## lm(formula = db.OLD ~ de.OLD + offset(0 * de.OLD), data = CB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0161 -0.2939 -0.0195  0.2188  1.2025 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.9321     0.2848    3.27   0.0032 ** 
## de.OLD        0.8176     0.0421   19.41  3.5e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.549 on 24 degrees of freedom
## Multiple R-squared:  0.94,   Adjusted R-squared:  0.938 
## F-statistic:  377 on 1 and 24 DF,  p-value: 3.52e-16
lm.1c<-lm(db.OLD~de.OLD +offset(1*de.OLD), data=CB)
summary(lm.0c)
## 
## Call:
## lm(formula = db.OLD ~ de.OLD + offset(0 * de.OLD), data = CB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.0161 -0.2939 -0.0195  0.2188  1.2025 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   0.9321     0.2848    3.27   0.0032 ** 
## de.OLD        0.8176     0.0421   19.41  3.5e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.549 on 24 degrees of freedom
## Multiple R-squared:  0.94,   Adjusted R-squared:  0.938 
## F-statistic:  377 on 1 and 24 DF,  p-value: 3.52e-16

No idea how to really interpret other than they differ from a slope of 0 and 1. Should we investigate at varying degrees of slope? such as 0.5, 0.25, 0.75?

# parameter estimates and their standard errors
coefs <- coef( summary(lm.CB) )
coefs
##                Estimate Std. Error  t value  Pr(>|t|)
## (Intercept)    0.631693    0.47157  1.33955 1.941e-01
## de.NEW         0.869422    0.05146 16.89494 4.384e-14
## yf2015         0.496164    0.77647  0.63900 5.294e-01
## de.NEW:yf2015 -0.002778    0.10968 -0.02533 9.800e-01
# note here that the slope is about 0.87 (std. error = 0.05)
# looks as though it differs from 1.0


B1 <- coefs["de.NEW", "Estimate"]
B1.se <- coefs["de.NEW", "Std. Error"]

# Use the definition of a t-test
t <- ( B1 - 1 )/B1.se 
t
## [1] -2.537
# degrees of freedom = n - p, where p = no. of parameters
df2 <- summary(lm.CB)$df[2] 

# Use the cumulative t-distribution
pt(t, df2)
## [1] 0.009382
#[1] 0.009382075

yes it does differ from one.

another look.

summary(lm.CB) 
## 
## Call:
## lm(formula = db.NEW ~ de.NEW + yf + de.NEW:yf, data = CB)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -0.846 -0.432  0.147  0.313  0.974 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    0.63169    0.47157    1.34     0.19    
## de.NEW         0.86942    0.05146   16.89  4.4e-14 ***
## yf2015         0.49616    0.77647    0.64     0.53    
## de.NEW:yf2015 -0.00278    0.10968   -0.03     0.98    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.505 on 22 degrees of freedom
## Multiple R-squared:  0.953,  Adjusted R-squared:  0.947 
## F-statistic:  148 on 3 and 22 DF,  p-value: 9.52e-15
summary.aov(lm.CB)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## de.NEW       1  112.4   112.4  441.05 4.8e-16 ***
## yf           1    1.1     1.1    4.27   0.051 .  
## de.NEW:yf    1    0.0     0.0    0.00   0.980    
## Residuals   22    5.6     0.3                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qplot(de.NEW, db.NEW, data=CB, colour=yf) + geom_smooth(method="lm") + 
  geom_abline(intercept = 0, slope = 1, lty=3)

plot of chunk unnamed-chunk-50

qplot(de.NEW, db.NEW, data=CB) + geom_smooth(method="lm") + 
  geom_abline(intercept = 0, slope = 1, lty=3) + 
  geom_point(aes(colour = yf))

plot of chunk unnamed-chunk-50

Now for the Constant Calculation

qplot(de.OLD, db.OLD, data=CB, colour=yf)

plot of chunk unnamed-chunk-51

lm.CBc<-lm(db.OLD~de.OLD + yf +de.OLD:yf, data=CB)

summary(lm.CBc) 
## 
## Call:
## lm(formula = db.OLD ~ de.OLD + yf + de.OLD:yf, data = CB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8264 -0.3658  0.0945  0.3718  1.0301 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.3556     0.4266    0.83     0.41    
## de.OLD          0.8728     0.0538   16.23    1e-13 ***
## yf2015          0.6365     0.6595    0.97     0.34    
## de.OLD:yf2015  -0.0283     0.1129   -0.25     0.80    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.527 on 22 degrees of freedom
## Multiple R-squared:  0.949,  Adjusted R-squared:  0.942 
## F-statistic:  138 on 3 and 22 DF,  p-value: 2.1e-14
summary.aov(lm.CBc)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## de.OLD       1  113.4   113.4  408.72 1.1e-15 ***
## yf           1    1.1     1.1    3.97   0.059 .  
## de.OLD:yf    1    0.0     0.0    0.06   0.804    
## Residuals   22    6.1     0.3                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

No difference in slopes (2014 = 2015 for TA v. TQ slope)

#ANCOVA testing against a null slope of m=1
lm.CB2c<-lm(db.OLD~de.OLD+year, data=CB)
lm.CB3c<-lm(db.OLD~de.OLD, data=CB)

#Google "R test whether slope differs from a constant"

# parameter estimates and their standard errors
coefs <- coef( summary(lm.CBc) )
coefs
##               Estimate Std. Error t value  Pr(>|t|)
## (Intercept)    0.35556    0.42658  0.8335 4.135e-01
## de.OLD         0.87283    0.05378 16.2284 9.978e-14
## yf2015         0.63652    0.65951  0.9651 3.450e-01
## de.OLD:yf2015 -0.02829    0.11288 -0.2507 8.044e-01
# note here that the slope is about 0.87 (std. error = 0.05)
# looks as though it differs from 1.0


B1.c <- coefs["de.OLD", "Estimate"]
B1.se.c <- coefs["de.OLD", "Std. Error"]

# Use the definition of a t-test
t <- ( B1.c - 1 )/B1.se.c 
t
## [1] -2.364
# degrees of freedom = n - p, where p = no. of parameters
df2.c <- summary(lm.CBc)$df[2] 

# Use the cumulative t-distribution
pt(t, df2.c)
## [1] 0.01365

yes it does differ from one.

another look.

summary(lm.CBc) 
## 
## Call:
## lm(formula = db.OLD ~ de.OLD + yf + de.OLD:yf, data = CB)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8264 -0.3658  0.0945  0.3718  1.0301 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.3556     0.4266    0.83     0.41    
## de.OLD          0.8728     0.0538   16.23    1e-13 ***
## yf2015          0.6365     0.6595    0.97     0.34    
## de.OLD:yf2015  -0.0283     0.1129   -0.25     0.80    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.527 on 22 degrees of freedom
## Multiple R-squared:  0.949,  Adjusted R-squared:  0.942 
## F-statistic:  138 on 3 and 22 DF,  p-value: 2.1e-14
summary.aov(lm.CBc)
##             Df Sum Sq Mean Sq F value  Pr(>F)    
## de.OLD       1  113.4   113.4  408.72 1.1e-15 ***
## yf           1    1.1     1.1    3.97   0.059 .  
## de.OLD:yf    1    0.0     0.0    0.06   0.804    
## Residuals   22    6.1     0.3                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qplot(de.OLD, db.OLD, data=CB, colour=yf) + geom_smooth(method="lm") + 
  geom_abline(intercept = 0, slope = 1, lty=3)

plot of chunk unnamed-chunk-53

qplot(de.OLD, db.OLD, data=CB) + geom_smooth(method="lm") + 
  geom_abline(intercept = 0, slope = 1, lty=3) + 
  geom_point(aes(colour = yf))

plot of chunk unnamed-chunk-53