#Statistics for Box Turtle Manuscript:
#save.image(file = "Manuscript.RData") #Saved 8-5-2016
#load("Manuscript.RData")
#Packages
library(Rmisc)
## Warning: package 'Rmisc' was built under R version 3.0.3
## Loading required package: lattice
## Warning: package 'lattice' was built under R version 3.0.3
## Loading required package: plyr
## Warning: package 'plyr' was built under R version 3.0.3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.0.3
library(scales)
## Warning: package 'scales' was built under R version 3.0.3
library(gridExtra)
## Warning: package 'gridExtra' was built under R version 3.0.3
## Loading required package: grid
library(reshape)
## Warning: package 'reshape' was built under R version 3.0.3
##
## Attaching package: 'reshape'
##
## The following objects are masked from 'package:plyr':
##
## rename, round_any
#library(data.table)
#library(doBy)
library(plyr)
#library(splitstackshape)
library(ade4)
## Warning: package 'ade4' was built under R version 3.0.3
#library(adehabitatHR)
#library(adehabitatHS)
library(adehabitatLT)
## Warning: package 'adehabitatLT' was built under R version 3.0.3
## Loading required package: sp
## Warning: package 'sp' was built under R version 3.0.3
## Loading required package: adehabitatMA
## Warning: package 'adehabitatMA' was built under R version 3.0.3
##
## Attaching package: 'adehabitatMA'
##
## The following object is masked from 'package:plyr':
##
## join
##
## Loading required package: CircStats
## Warning: package 'CircStats' was built under R version 3.0.3
## Loading required package: MASS
## Loading required package: boot
##
## Attaching package: 'boot'
##
## The following object is masked from 'package:lattice':
##
## melanoma
##
##
## Attaching package: 'adehabitatLT'
##
## The following object is masked from 'package:plyr':
##
## id
#library(adehabitatMA)
#library(geosphere)
#Overall Goal: Investigate the effects of temperature on daily movement patterns and habitat occupancy, and determine how thermal variation influences thermoregulatory performance.
#Overall Objective: To test the relationship between habitat and thermoregulatory performance. Determine the extent to which thermal habitat influences daily movement and habitat use of box turtles.
getwd()
## [1] "G:/R - Research Files/Manuscript Files"
setwd("G:/R - Research Files/Manuscript Files")
options(warn=-1)
#Objective 1: Temperature-Movement Relationship (1) Movement would be positively related to body temperature
#Upload file
dist<-read.csv('Temp-Movement.csv', #1665 observations of 11 variables
sep=',',
header=T)
#str(dist)
#head(dist)
#Correcting for time
dist$datehour<-as.POSIXct(as.character(as.factor(dist$datehour)),format="%Y-%m-%d %H:%M:%S")
dist$time<-format(as.POSIXct(dist$datehour) ,format = "%H")
#head(dist)
dist$time<-as.factor(as.character(dist$time))
#mm2 <- mm[mm[,1]!=2,]
#Removing hour 19 because it's the distance between 7pm and 7am (time when GPS was off)
dist<-dist[dist[,"time"]!=19,] #removes data based on condition in column/row
dist.sum<-summarySE(dist,
measurevar="dist.m",
groupvar="time")
#View(dist.sum) viewing data set
#dist.sum<-dist.sum[-c(13),] #hour 19 removed from data set
#Linear Model for Body Temperature and Movement
lm1<-lm(dist.m~Tb, data=dist)
summary(lm1)
##
## Call:
## lm(formula = dist.m ~ Tb, data = dist)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.55 -10.44 -4.32 4.78 104.18
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.9459 1.9150 10.42 <2e-16 ***
## Tb -0.1075 0.0848 -1.27 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.9 on 1544 degrees of freedom
## Multiple R-squared: 0.00104, Adjusted R-squared: 0.000394
## F-statistic: 1.61 on 1 and 1544 DF, p-value: 0.205
#Body Temperature and Movement Graphing average hour body temperature and distance moved per hour to see how it looks
ggplot(dist, aes(x=Tb,
y=dist.m)) +
xlab(expression(paste("Body Temperature (°C)"))) +
ylab(expression(paste("Distance (m)"))) +
#labs(title=(expression(paste("Body Temperature-Movement")))) +
xlim(10,35)+
ylim(0,130)+
theme_classic() +
geom_point() +
#geom_point(aes(colour=turtle), size=1) +
stat_smooth(method=lm, level=0.95,
fill="grey", colour="black", size=0)
#I don't listen to ggplot warnings
#Distance and Time of Day graphing Graphing to determine whether time of day could influence movement
limits <- aes(ymax = dist.m + se, ymin=dist.m - se)
ggplot(dist.sum, aes(x = time, y = dist.m)) +
xlab(expression(paste("Time of Day (hour)"))) +
ylab(expression(paste("Distance (m)"))) +
#labs(title=(expression(paste("Time of Day-Movement")))) +
#ylim(0,40)+
theme_classic()+
geom_bar(stat="identity", position=position_dodge())+
geom_errorbar(limits, width=0.25) +
scale_y_continuous(expand = c(0, 0),
limits = c(0, 35),
breaks =c(0,5,10,15,20,25,30,35))
##ANOVA Testing
#head(dist)
qqnorm(dist$dist.m) #right skewed
qqline(dist$dist.m)
hist(dist$dist.m, breaks = 50, main = "Distance (m), breaks = 50")
#Normality
#Null-hypothesis of this test is that the population is normally distributed.
shapiro.test(dist$dist.m)
##
## Shapiro-Wilk normality test
##
## data: dist$dist.m
## W = 0.7834, p-value < 2.2e-16
#Reject null in favor of alternative. Histogram also shows skewness in data.
#Without correcting
lm2<-lm(dist$dist.m~dist$time)
A2<-anova(lm2)
A2 #Non-significant
## Analysis of Variance Table
##
## Response: dist$dist.m
## Df Sum Sq Mean Sq F value Pr(>F)
## dist$time 11 3653 332 1.31 0.21
## Residuals 1534 389138 254
Aov2<-aov(dist$dist.m~dist$time)
TukeyHSD(Aov2) #only works with aov() function, both are ANOVA functions.
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dist$dist.m ~ dist$time)
##
## $`dist$time`
## diff lwr upr p adj
## 08-07 3.93570 -2.568 10.440 0.7068
## 09-07 1.47347 -5.043 7.990 0.9999
## 10-07 3.33453 -3.170 9.839 0.8782
## 11-07 1.70912 -4.795 8.213 0.9994
## 12-07 -0.68067 -7.197 5.836 1.0000
## 13-07 1.53101 -4.986 8.048 0.9998
## 14-07 1.08548 -5.419 7.590 1.0000
## 15-07 0.70270 -5.814 7.219 1.0000
## 16-07 0.54738 -5.957 7.052 1.0000
## 17-07 -1.09431 -7.662 5.474 1.0000
## 18-07 -1.19607 -7.725 5.333 1.0000
## 09-08 -2.46223 -8.941 4.016 0.9853
## 10-08 -0.60117 -7.067 5.865 1.0000
## 11-08 -2.22658 -8.693 4.240 0.9935
## 12-08 -4.61637 -11.095 1.862 0.4539
## 13-08 -2.40469 -8.883 4.074 0.9878
## 14-08 -2.85023 -9.316 3.616 0.9548
## 15-08 -3.23301 -9.712 3.246 0.8967
## 16-08 -3.38832 -9.854 3.078 0.8614
## 17-08 -5.03001 -11.560 1.500 0.3273
## 18-08 -5.13177 -11.623 1.360 0.2875
## 10-09 1.86106 -4.618 8.340 0.9987
## 11-09 0.23565 -6.243 6.714 1.0000
## 12-09 -2.15414 -8.645 4.337 0.9953
## 13-09 0.05753 -6.434 6.549 1.0000
## 14-09 -0.38800 -6.867 6.091 1.0000
## 15-09 -0.77078 -7.262 5.720 1.0000
## 16-09 -0.92609 -7.405 5.553 1.0000
## 17-09 -2.56778 -9.111 3.975 0.9810
## 18-09 -2.66954 -9.173 3.834 0.9732
## 11-10 -1.62541 -8.091 4.841 0.9996
## 12-10 -4.01520 -10.494 2.463 0.6738
## 13-10 -1.80352 -8.282 4.675 0.9990
## 14-10 -2.24906 -8.715 4.217 0.9929
## 15-10 -2.63184 -9.110 3.847 0.9753
## 16-10 -2.78715 -9.253 3.679 0.9616
## 17-10 -4.42884 -10.959 2.102 0.5354
## 18-10 -4.53060 -11.022 1.961 0.4882
## 12-11 -2.38979 -8.868 4.089 0.9884
## 13-11 -0.17812 -6.657 6.300 1.0000
## 14-11 -0.62365 -7.090 5.842 1.0000
## 15-11 -1.00643 -7.485 5.472 1.0000
## 16-11 -1.16174 -7.628 5.304 1.0000
## 17-11 -2.80343 -9.334 3.727 0.9627
## 18-11 -2.90519 -9.396 3.586 0.9497
## 13-12 2.21167 -4.279 8.703 0.9940
## 14-12 1.76614 -4.712 8.245 0.9992
## 15-12 1.38336 -5.108 7.874 0.9999
## 16-12 1.22805 -5.251 7.707 1.0000
## 17-12 -0.41364 -6.956 6.129 1.0000
## 18-12 -0.51540 -7.019 5.988 1.0000
## 14-13 -0.44553 -6.924 6.033 1.0000
## 15-13 -0.82831 -7.319 5.663 1.0000
## 16-13 -0.98362 -7.462 5.495 1.0000
## 17-13 -2.62531 -9.168 3.918 0.9775
## 18-13 -2.72708 -9.231 3.777 0.9686
## 15-14 -0.38278 -6.861 6.096 1.0000
## 16-14 -0.53809 -7.004 5.928 1.0000
## 17-14 -2.17978 -8.710 4.351 0.9950
## 18-14 -2.28154 -8.773 4.210 0.9923
## 16-15 -0.15531 -6.634 6.323 1.0000
## 17-15 -1.79700 -8.340 4.746 0.9991
## 18-15 -1.89876 -8.403 4.605 0.9985
## 17-16 -1.64169 -8.172 4.889 0.9996
## 18-16 -1.74345 -8.235 4.748 0.9993
## 18-17 -0.10176 -6.657 6.454 1.0000
#Non-parametric Approach
kruskal.test(dist$dist.m~dist$time, data=dist)
##
## Kruskal-Wallis rank sum test
##
## data: dist$dist.m by dist$time
## Kruskal-Wallis chi-squared = 21.75, df = 11, p-value = 0.02637
#Kruskal Wallis = Significant
#Correcting Data
dist$log.dist.m<-log(dist$dist.m) #correct for skewness in data
qqnorm(dist$log.dist.m) #corrected data
qqline(dist$log.dist.m)
lm1<-lm(dist$log.dist.m~dist$time)
A1<-anova(lm1)
A1 #significant
## Analysis of Variance Table
##
## Response: dist$log.dist.m
## Df Sum Sq Mean Sq F value Pr(>F)
## dist$time 11 16 1.483 1.87 0.039 *
## Residuals 1534 1214 0.791
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Aov1<-aov(dist$log.dist.m~dist$time)
TukeyHSD(Aov1)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = dist$log.dist.m ~ dist$time)
##
## $`dist$time`
## diff lwr upr p adj
## 08-07 0.206633 -0.1567 0.569929 0.7828
## 09-07 0.063802 -0.3002 0.427793 1.0000
## 10-07 0.161363 -0.2019 0.524659 0.9523
## 11-07 0.056257 -0.3070 0.419553 1.0000
## 12-07 -0.087876 -0.4519 0.276115 0.9997
## 13-07 0.009031 -0.3550 0.373022 1.0000
## 14-07 -0.049207 -0.4125 0.314089 1.0000
## 15-07 -0.069338 -0.4333 0.294653 1.0000
## 16-07 -0.030018 -0.3933 0.333278 1.0000
## 17-07 -0.091858 -0.4587 0.275011 0.9996
## 18-07 -0.161229 -0.5259 0.203466 0.9539
## 09-08 -0.142831 -0.5047 0.219038 0.9802
## 10-08 -0.045269 -0.4064 0.315900 1.0000
## 11-08 -0.150376 -0.5115 0.210793 0.9702
## 12-08 -0.294508 -0.6564 0.067360 0.2455
## 13-08 -0.197602 -0.5595 0.164267 0.8250
## 14-08 -0.255839 -0.6170 0.105330 0.4636
## 15-08 -0.275970 -0.6378 0.085898 0.3430
## 16-08 -0.236651 -0.5978 0.124519 0.5904
## 17-08 -0.298490 -0.6633 0.066273 0.2380
## 18-08 -0.367862 -0.7304 -0.005285 0.0431
## 10-09 0.097562 -0.2643 0.459430 0.9993
## 11-09 -0.007545 -0.3694 0.354324 1.0000
## 12-09 -0.151677 -0.5142 0.210889 0.9691
## 13-09 -0.054771 -0.4173 0.307796 1.0000
## 14-09 -0.113008 -0.4749 0.248861 0.9972
## 15-09 -0.133139 -0.4957 0.229427 0.9889
## 16-09 -0.093820 -0.4557 0.268049 0.9995
## 17-09 -0.155659 -0.5211 0.209796 0.9648
## 18-09 -0.225031 -0.5883 0.138243 0.6745
## 11-10 -0.105107 -0.4663 0.256063 0.9985
## 12-10 -0.249239 -0.6111 0.112629 0.5100
## 13-10 -0.152333 -0.5142 0.209536 0.9677
## 14-10 -0.210570 -0.5717 0.150600 0.7541
## 15-10 -0.230701 -0.5926 0.131167 0.6325
## 16-10 -0.191382 -0.5526 0.169788 0.8522
## 17-10 -0.253221 -0.6180 0.111542 0.4971
## 18-10 -0.322593 -0.6852 0.039985 0.1375
## 12-11 -0.144132 -0.5060 0.217736 0.9787
## 13-11 -0.047226 -0.4091 0.314643 1.0000
## 14-11 -0.105463 -0.4666 0.255706 0.9985
## 15-11 -0.125594 -0.4875 0.236274 0.9930
## 16-11 -0.086275 -0.4474 0.274895 0.9998
## 17-11 -0.148114 -0.5129 0.216649 0.9753
## 18-11 -0.217486 -0.5801 0.145091 0.7184
## 13-12 0.096906 -0.2657 0.459473 0.9993
## 14-12 0.038669 -0.3232 0.400538 1.0000
## 15-12 0.018538 -0.3440 0.381105 1.0000
## 16-12 0.057858 -0.3040 0.419726 1.0000
## 17-12 -0.003982 -0.3694 0.361474 1.0000
## 18-12 -0.073354 -0.4366 0.289920 1.0000
## 14-13 -0.058237 -0.4201 0.303631 1.0000
## 15-13 -0.078369 -0.4409 0.284198 0.9999
## 16-13 -0.039049 -0.4009 0.322820 1.0000
## 17-13 -0.100888 -0.4663 0.264567 0.9991
## 18-13 -0.170260 -0.5335 0.193014 0.9311
## 15-14 -0.020131 -0.3820 0.341737 1.0000
## 16-14 0.019188 -0.3420 0.380358 1.0000
## 17-14 -0.042651 -0.4074 0.322112 1.0000
## 18-14 -0.112023 -0.4746 0.250555 0.9975
## 16-15 0.039320 -0.3225 0.401188 1.0000
## 17-15 -0.022520 -0.3880 0.342936 1.0000
## 18-15 -0.091892 -0.4552 0.271382 0.9996
## 17-16 -0.061840 -0.4266 0.302924 1.0000
## 18-16 -0.131211 -0.4938 0.231366 0.9901
## 18-17 -0.069372 -0.4355 0.296786 1.0000
#18-08 p adj = 0.0431335.
#Pretty negligable if you ask me.
Only statistical difference is between 18:00 and 08:00 where at 8am turtles move more than at 6pm. P-value is 0.043, which is on the cusp of not being significant [in my opinion].
#Objective 2: Habitat Selection and Preferred Body Temperature (2) habitat selected would reflect preferred body temperatures
Thermal Preference Prep:
ZPTsel<-read.table("ZPTsel.dat",
sep="\t",
header=TRUE) #Pedro's data - 11 male box turtles
dateRef<-Sys.Date()
dateRefn<-Sys.Date() + 1
######################################
#Constant Thermal Preference (Tsel-C)#
######################################
#meanTselC<-mean(ZPTselDynamic$value, na.rm=TRUE)
#quantile(ZPTselDynamic$value, na.rm=TRUE)
#0% 25% 50% 75% 100%
#6.0 25.5 29.3 31.3 36.6
TselC.min<-25.5
TselC.max<-31.3
# New Tsel
date_time<-seq(as.POSIXct("2014-08-01 07:00:00"),
as.POSIXct("2014-08-03 07:00:00"),
length.out=289)
ZPTsel$date<-date_time[1:287]
ZPTselDynamic<-melt(ZPTsel, id.vars=c('date'))
meanTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
mean, na.rm=TRUE)
colnames(meanTselD)<-c("hour","mTsel")
medianTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
median, na.rm=TRUE)
colnames(medianTselD)<-c("hour","medianTsel")
maxTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
max, na.rm=TRUE)
colnames(maxTselD)<-c("hour","maxTsel")
minTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
min, na.rm=TRUE)
colnames(minTselD)<-c("hour","minTsel")
sdTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
sd, na.rm=TRUE)
colnames(sdTselD)<-c("hour","sdTsel")
nTselD<-aggregate(ZPTselDynamic$value,
list(format(round(ZPTselDynamic$date, "hours"), "%H:%M:%S")),
length)
colnames(nTselD)<-c("hour","nTsel")
errorTselD<-1.96*(sdTselD$sdTsel/sqrt(nTselD$nTsel))
ZPTselD<-cbind(nTselD, meanTselD, medianTselD, minTselD, maxTselD, sdTselD, errorTselD)
ZPTselD<-ZPTselD[,-c(3,5,7,9,11)]
#View(ZPTselD)
ZPTselD$hour<-as.POSIXlt(ZPTselD$hour,format="%H:%M:%S")
ZPTselD$hour.day<-format(round(ZPTselD$hour, "hours"), "%H")
#Plotting Thermal Preference (Dynamic, Tsel-D; Constant, Tsel-C) Dynamic Calculation
ggplot(ZPTselD, aes(x=hour, y=mTsel))+
xlab("Hour of day") +
ylab(expression(paste("Thermal Preference (°C)"))) +
#labs(title=(expression(paste("Combined E"^"' "," (closed models de)")))) +
#xlim(0,23)+
ylim(15,35)+
theme_classic() + #Praise baby jebus
geom_ribbon(aes(ymin=c(mTsel-errorTselD), ymax=c(mTsel+errorTselD)), data=ZPTselD)+
scale_x_datetime(breaks = "2 hour", labels =date_format("%H"))
When given the opportunity, box turtles select warmer temperatures at night (when we incorporate time into the Tsel).
Constant Calculation
TselC.min<-25.5
TselC.max<-31.3
ggplot(ZPTselD, aes(x=hour, y=mTsel))+
xlab("Hour of day") +
ylab(expression(paste("Thermal Preference (°C)"))) +
#labs(title=(expression(paste("Combined E"^"' "," (closed models de)")))) +
#xlim(0,23)+
ylim(15,35)+
theme_classic() + #Praise baby jebus
geom_ribbon(aes(ymin=TselC.min, ymax=TselC.max))+
scale_x_datetime(breaks = "2 hour", labels =date_format("%H"))
Thermal preference for box turtles ranges from 25.5°C to 31.3°C.
#Habitat Preference
```r
#LULC.Habitat<-read.table(file = "clipboard",
# sep = "\t",
# header=TRUE)
#write.table(LULC.Habitat, file="LULC Habitat.csv", sep="\t")
LULC.Habitat<-read.csv("LULC Habitat.csv",
header=T,
sep="\t")
LULC.Habitat<-LULC.Habitat[,-c(6,7,8,9,10)]
Calculated slightly different than a regular chi-square analysis. For starters, the Z-crit is calculated as the following: z(1-??/2k) … NOT z(1-??/2)
In this analysis, k represents the number of simultaneous estimates being made. Should probably put that in the methods…
head(LULC.Habitat)
## LULC_Type Area_m2 Perc_Area Obs_Count_Turtle Expected
## 1 Developed Area 30212 0.1413 94 269
## 2 Open Water 4044 0.0189 0 36
## 3 Deciduous Forest 101648 0.4754 1034 906
## 4 Evergreen Forest 12816 0.0599 435 114
## 5 Mixed Forest 5604 0.0262 6 50
## 6 Shrub/Scrub 1608 0.0075 14 14
LULC Type is based the GIS layer land-use / land-cover.
Area_m2 is Area (m2) of LULC values that are within the 456m buffer radius from turtle start locations (ranges were dissolved so there's no duplicate LULC values for total potential habitat occupied) Perc_Area is the percentage of area for each LULC value Obs_Count_Turtle is observations (GPS coordinates) of turtles in each of the 10 LULC types Expected is based on the total 1906 (2014 and 2015) GPS coordinates and applied to the percent area. It is assumed that box turtles use each habitat type proportionally, relative to other habitat types.
#(1-??/2k), where k=10 simultaneous comparisons.
z<- abs(qnorm((1-0.95)/(2*(10))))
z
## [1] 2.807
LULC.Habitat$Perc_Turt_Hab<-(LULC.Habitat$Obs_Count_Turtle/1906) #obtaining percentage of Turtle observations in each habitat type
#Confidence interval is calculated as: z(1-??/2k)*???(pi(1-pi))/n
LULC.Habitat$Conf.Int<-(2.807034*(sqrt(LULC.Habitat$Perc_Turt_Hab*(1-LULC.Habitat$Perc_Turt_Hab)/1906)))
LULC.Habitat$Preference<-ifelse(LULC.Habitat$Perc_Turt_Hab-LULC.Habitat$Conf.Int > LULC.Habitat$Perc_Area, "Preferred",ifelse(LULC.Habitat$Perc_Turt_Hab+LULC.Habitat$Conf.Int < LULC.Habitat$Perc_Area, "Avoid","Expected"))
LULC.Habitat
## LULC_Type Area_m2 Perc_Area Obs_Count_Turtle
## 1 Developed Area 30211.7 0.1413 94
## 2 Open Water 4044.5 0.0189 0
## 3 Deciduous Forest 101647.8 0.4754 1034
## 4 Evergreen Forest 12815.6 0.0599 435
## 5 Mixed Forest 5603.8 0.0262 6
## 6 Shrub/Scrub 1608.0 0.0075 14
## 7 Grassland/Herbaceous 2387.7 0.0112 141
## 8 Pasture/Hay 29237.2 0.1367 178
## 9 Cultivated Crops 26118.5 0.1222 4
## 10 Emergent Herbaceous Wetlands 146.2 0.0007 0
## Expected Perc_Turt_Hab Conf.Int Preference
## 1 269 0.049318 0.013922 Avoid
## 2 36 0.000000 0.000000 Avoid
## 3 906 0.542497 0.032032 Preferred
## 4 114 0.228227 0.026984 Preferred
## 5 50 0.003148 0.003602 Avoid
## 6 14 0.007345 0.005490 Expected
## 7 21 0.073977 0.016828 Preferred
## 8 261 0.093389 0.018709 Avoid
## 9 233 0.002099 0.002942 Avoid
## 10 1 0.000000 0.000000 Avoid
Habitat Preference is based on Neu et al. 1972, using chi-square analysis. Based on the total area (456m buffer), the null hypothesis is that turtles would use each habitat proportionally. Proportionally higher indicates preference, proportionally lower indicidates avoidance.
Box turtles prefer deciduous and evergreen forest, as well as grassland and herbaceous. Shrub/scrub habitat is used as expected, and all other habitat types are used proportionally less (indicating avoidance).
Canopy Cover and Slope between Potential habitat (456m buffer) and occupied habitat (KDE estimate):
```r
Canopy<-read.csv("Canopy Cover.csv",
sep='\t',
header=T)
t.test(Canopy$buff_cc, Canopy$kde_cc, paired=TRUE)
##
## Paired t-test
##
## data: Canopy$buff_cc and Canopy$kde_cc
## t = -2.34, df = 18, p-value = 0.03101
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -23.912 -1.287
## sample estimates:
## mean of the differences
## -12.6
Slope<-read.csv("Slope.csv",
sep='\t',
header=T)
t.test(Slope$kde_slope, Slope$buff_slope, paired=TRUE)
##
## Paired t-test
##
## data: Slope$kde_slope and Slope$buff_slope
## t = 0.6699, df = 21, p-value = 0.5102
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.8971 1.7497
## sample estimates:
## mean of the differences
## 0.4263
#Upload file
box.turtle.data<-read.csv("2014 2015 Tb Tex db CORRECTED.csv",
header=TRUE,
sep="\t")
operant.data<-read.csv("2014 2015 Te de CORRECTED.csv",
header=TRUE,
sep=",")
#Time Correction for data.frames
box.turtle.data$date<-as.POSIXct(as.character(box.turtle.data$date,
format= "%Y-%m-%d %H:%M:%S"))
operant.data$date<-as.POSIXct(as.character(operant.data$date,
format= "%Y-%m-%d %H:%M:%S"))
#str(box.turtle.data)
#str(operant.data)
Thermal exploitation (Ex) is typically calculated as the following: (Time Tb in Tsel)/(Time Te in Tsel) This is represented in a 24-hour period calculate Ex for each day for each turtle
Body Temperature:
meanTb<-aggregate(list(mTb = box.turtle.data$Tb),
list(date = cut(box.turtle.data$date, "1 hour"),
turtle = box.turtle.data$turtle),
data=box.turtle.data,
mean)
head(meanTb)
## date turtle mTb
## 1 2014-06-10 00:00:00 L1 17.37
## 2 2014-06-10 01:00:00 L1 17.08
## 3 2014-06-10 02:00:00 L1 16.85
## 4 2014-06-10 03:00:00 L1 16.72
## 5 2014-06-10 04:00:00 L1 16.74
## 6 2014-06-10 05:00:00 L1 16.81
meanTb$date<-as.POSIXct(as.character(meanTb$date,
format="%Y-%m-%d %H:%M:%S"))
meanTb$hour.day<-format(round(meanTb$date, "hours"), "%H")
#Exploitation Index
#Constant (Tsel-C) Tb
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%
C.Tb<-ifelse(meanTb$mTb > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(meanTb$mTb < Tsel.min, "below",
"inside"))
#head(C.Tb)
meanTb$Tsel.C_Tb<-C.Tb
#head(meanTb)
#Dynamic (Tsel-D) Tb
D.Tb<-ifelse(meanTb$mTb[format(round(meanTb$date, "hours"), "%H")%in%meanTb$hour.day] < (ZPTselD$mTsel[as.numeric(meanTb$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTb$hour.day)+1]), "below",
ifelse(meanTb$mTb[format(round(meanTb$date, "hours"), "%H")%in%meanTb$hour.day] > (ZPTselD$mTsel[as.numeric(meanTb$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTb$hour.day)+1]), "above",
"inside"))
#head(D.Tb)
meanTb$Tsel.D_Tb<-D.Tb
Shell Temperature
meanTex<-aggregate(list(mTex = box.turtle.data$Tex),
list(date = cut(box.turtle.data$date, "1 hour"),
turtle = box.turtle.data$turtle),
data=box.turtle.data,
mean)
#head(meanTex)
meanTex$date<-as.POSIXct(as.character(meanTex$date,
format="%Y-%m-%d %H:%M:%S"))
#View(meanTex)
meanTex$hour.day<-format(round(meanTex$date, "hours"), "%H")
Shell.C<-ifelse(meanTex$mTex > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(meanTex$mTex < Tsel.min, "below",
"inside"))
meanTex$Tsel.C_Shell<-Shell.C
#Dynamic (Tsel-D)
Shell.D<-ifelse(meanTex$mTex[format(round(meanTex$date, "hours"), "%H")%in%meanTex$hour.day] < (ZPTselD$mTsel[as.numeric(meanTex$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTex$hour.day)+1]), "below",
ifelse(meanTex$mTex[format(round(meanTex$date, "hours"), "%H")%in%meanTex$hour.day] > (ZPTselD$mTsel[as.numeric(meanTex$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTex$hour.day)+1]), "above",
"inside"))
meanTex$Tsel.D_Shell<-Shell.D
What we are interested in were the following conditions: #(1) When Tshell is above Tsel and Tb is inside Tsel, #(2) When Tshell is below Tsel and Tb is inside Tsel, #(3) When shell is inside Tsel and Tb is inside Tsel.
Merging data.frames (the long way):
head(meanTb)
## date turtle mTb hour.day Tsel.C_Tb Tsel.D_Tb
## 1 2014-06-10 00:00:00 L1 17.37 00 below below
## 2 2014-06-10 01:00:00 L1 17.08 01 below below
## 3 2014-06-10 02:00:00 L1 16.85 02 below below
## 4 2014-06-10 03:00:00 L1 16.72 03 below below
## 5 2014-06-10 04:00:00 L1 16.74 04 below below
## 6 2014-06-10 05:00:00 L1 16.81 05 below below
head(meanTex)
## date turtle mTex hour.day Tsel.C_Shell Tsel.D_Shell
## 1 2014-06-10 00:00:00 L1 16.63 00 below below
## 2 2014-06-10 01:00:00 L1 16.14 01 below below
## 3 2014-06-10 02:00:00 L1 15.95 02 below below
## 4 2014-06-10 03:00:00 L1 15.90 03 below below
## 5 2014-06-10 04:00:00 L1 16.11 04 below below
## 6 2014-06-10 05:00:00 L1 16.26 05 below below
Therm.Ex<-meanTb
Therm.Ex$mTex<-meanTex$mTex
Therm.Ex$Tsel.C_Shell<-meanTex$Tsel.C_Shell
Therm.Ex$Tsel.D_Shell<-meanTex$Tsel.D_Shell
Therm.Ex<-na.omit(Therm.Ex)
#head(Therm.Ex)
table(Therm.Ex$Tsel.C_Shell)
##
## above below inside
## 171 6684 567
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.C_Shell ==c("above"))])
## x freq
## 1 08 1
## 2 09 2
## 3 10 4
## 4 11 13
## 5 12 29
## 6 13 36
## 7 14 40
## 8 15 26
## 9 16 15
## 10 17 5
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.C_Shell ==c("below"))])
## x freq
## 1 00 317
## 2 01 317
## 3 02 317
## 4 03 317
## 5 04 317
## 6 05 317
## 7 06 308
## 8 07 308
## 9 08 307
## 10 09 301
## 11 10 289
## 12 11 263
## 13 12 219
## 14 13 188
## 15 14 176
## 16 15 184
## 17 16 207
## 18 17 245
## 19 18 273
## 20 19 291
## 21 20 302
## 22 21 307
## 23 22 307
## 24 23 307
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.C_Shell ==c("inside"))])
## x freq
## 1 09 5
## 2 10 14
## 3 11 30
## 4 12 58
## 5 13 82
## 6 14 90
## 7 15 96
## 8 16 84
## 9 17 56
## 10 18 33
## 11 19 15
## 12 20 4
table(Therm.Ex$Tsel.D_Shell)
##
## above below inside
## 477 6665 280
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.D_Shell ==c("above"))])
## x freq
## 1 08 3
## 2 09 18
## 3 10 28
## 4 11 48
## 5 12 92
## 6 13 98
## 7 14 90
## 8 15 60
## 9 16 27
## 10 17 9
## 11 18 4
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.D_Shell ==c("below"))])
## x freq
## 1 00 317
## 2 01 317
## 3 02 317
## 4 03 317
## 5 04 317
## 6 05 317
## 7 06 308
## 8 07 308
## 9 08 291
## 10 09 256
## 11 10 239
## 12 11 208
## 13 12 172
## 14 13 185
## 15 14 190
## 16 15 223
## 17 16 265
## 18 17 286
## 19 18 300
## 20 19 305
## 21 20 306
## 22 21 307
## 23 22 307
## 24 23 307
count(Therm.Ex$hour.day[with(Therm.Ex, Tsel.D_Shell ==c("inside"))])
## x freq
## 1 08 14
## 2 09 34
## 3 10 40
## 4 11 50
## 5 12 42
## 6 13 23
## 7 14 26
## 8 15 23
## 9 16 14
## 10 17 11
## 11 18 2
## 12 19 1
#(1) When Tshell is above Tsel and Tb is inside Tsel:
#Constant
Therm.Ex.AboveC<-Therm.Ex[with(Therm.Ex, Tsel.C_Shell == c("above") & Tsel.C_Tb == c("inside")),]
#x = hour.day, freq = count
count(Therm.Ex.AboveC$hour.day)
## x freq
## 1 08 1
## 2 09 1
## 3 10 2
## 4 11 7
## 5 12 22
## 6 13 11
## 7 14 12
## 8 15 4
## 9 16 2
## 10 17 2
table(Therm.Ex.AboveC$Tsel.C_Shell)
##
## above
## 64
#Dynamic
Therm.Ex.AboveD<-Therm.Ex[with(Therm.Ex, Tsel.D_Shell == c("above") & Tsel.D_Tb == c("inside")),]
#x = hour.day, freq = count
count(Therm.Ex.AboveD$hour.day)
## x freq
## 1 08 1
## 2 09 6
## 3 10 8
## 4 11 17
## 5 12 21
## 6 13 16
## 7 14 11
## 8 15 4
## 9 16 2
## 10 17 2
## 11 18 1
table(Therm.Ex.AboveD$Tsel.D_Shell)
##
## above
## 89
For constant calculation: There are a total of 171 Tshell above Tsel. Out of those 171, there are 64 occurences of Tb in Tsel when Tshell is above Tsel.
The largest counts are during the mid-day, between noon and 2 pm. There does not appear to be occurences before 8am or after 5pm.
For dynamic calculation: There are a total of 477 Tshell above Tsel. Out of those 477, there are 89 occurences of Tb in Tsel when Tshell is above Tsel
The largest counts are during the still mid-day, between 11am and 2 pm. There does not appear to be any occurences before 8am or after 6pm.
#(2) When Tshell is below Tsel and Tb is inside Tsel:
#Constant
Therm.Ex.BelowC<-Therm.Ex[with(Therm.Ex, Tsel.C_Shell == c("below") & Tsel.C_Tb == c("inside")),]
#x = hour.day, freq = count
count(Therm.Ex.BelowC$hour.day)
## x freq
## 1 14 5
## 2 15 7
## 3 16 8
## 4 17 17
## 5 18 11
## 6 19 11
## 7 20 4
## 8 21 2
table(Therm.Ex.BelowC$Tsel.C_Shell)
##
## below
## 65
#Dynamic
Therm.Ex.BelowD<-Therm.Ex[with(Therm.Ex, Tsel.D_Shell == c("below") & Tsel.D_Tb == c("inside")),]
#x = hour.day, freq = count
count(Therm.Ex.BelowD$hour.day)
## x freq
## 1 08 4
## 2 09 3
## 3 10 1
## 4 11 2
## 5 12 1
## 6 14 4
## 7 15 10
## 8 16 6
## 9 17 7
## 10 18 3
## 11 19 1
table(Therm.Ex.BelowD$Tsel.D_Shell)
##
## below
## 42
For constant calculation: There are a total of 6684 Tshell below Tsel. Out of those 6684, there are 65 occurences of Tb inside Tsel when Tshell is below Tsel.
The largest counts are during the evening, between 5-7pm. There does not appear to be any occurences before 2pm or after 9pm for constant calculation.
For dynamic calculation: There are a total of 6665 Tshell above Tsel. Out of those 6665, there are 42 occurences of Tb inside Tsel when Tshell is below Tsel
The largest counts are during the still mid-evening, between 3-5 pm. There does not appear to be any occurences before 8am or after 7pm for dynamic calculation.
#(3) When Tshell inside Tsel and Tb inside Tsel
#Constant
Therm.Ex.InsideC<-Therm.Ex[with(Therm.Ex, Tsel.C_Shell == c("inside") & Tsel.C_Tb == c("inside")),]
#x = hour.day, freq = count
count(Therm.Ex.InsideC$hour.day)
## x freq
## 1 09 2
## 2 10 9
## 3 11 15
## 4 12 35
## 5 13 69
## 6 14 81
## 7 15 88
## 8 16 78
## 9 17 53
## 10 18 28
## 11 19 15
## 12 20 4
table(Therm.Ex.InsideC$Tsel.C_Shell)
##
## inside
## 477
#Dynamic
Therm.Ex.InsideD<-Therm.Ex[with(Therm.Ex, Tsel.D_Shell == c("inside") & Tsel.D_Tb == c("inside")),]
#x = hour.day, freq = count
count(Therm.Ex.InsideD$hour.day)
## x freq
## 1 08 12
## 2 09 28
## 3 10 32
## 4 11 35
## 5 12 27
## 6 13 11
## 7 14 15
## 8 15 13
## 9 16 8
## 10 17 6
## 11 18 1
## 12 19 1
table(Therm.Ex.InsideD$Tsel.D_Shell)
##
## inside
## 189
For constant calculation: There are a total of 567 Tshell inside Tsel. Out of those 567, there are 477 occurences of Tb in Tsel when Tshell is inside Tsel.
The largest counts are from 11am until 7pm. No occurences before 9am or after 8pm.
For dynamic calculation: There are a total of 280 Tshell inside Tsel. Out of those 280, there are 189 occurences of Tb in Tsel when Tshell is inside Tsel
The largest counts are from 8am until 3pm. There does not appear to be any occurences before 8am or after 7pm.
#Continuing with operant temperatures:
#Operant Temperatures (Te)
meanTe<-aggregate(list(mTe = operant.data$Te),
list(date = cut(operant.data$date, "1 hour"),
year = operant.data$year,
season = operant.data$season),
data=operant.data,
mean)
meanTe$date<-as.POSIXct(as.character(meanTe$date,
format="%Y-%m-%d %H:%M:%S"))
#View(meanTe)
meanTe$hour.day<-format(round(meanTe$date, "hours"), "%H")
#Habitat Types
Open.Close.Te<-aggregate(list(mTe = operant.data$Te),
list(date = cut(operant.data$date, "1 hour"),
year = operant.data$year,
habitat = operant.data$habitat,
season = operant.data$season),
data=operant.data,
mean)
Open.Close.Te$date<-as.POSIXct(as.character(Open.Close.Te$date,
format="%Y-%m-%d %H:%M:%S"))
Open.Close.Te$hour.day<-format(round(Open.Close.Te$date, "hours"), "%H")
#Environmental Temperatures
#Constant (Tsel-C) Te
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%
C.Te<-ifelse(meanTe$mTe > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(meanTe$mTe < Tsel.min, "below",
"inside"))
#head(C.Te)
meanTe$Tsel.C<-C.Te
#head(meanTe)
#Dynamic (Tsel-D)
D.Te<-ifelse(meanTe$mTe[format(round(meanTe$date, "hours"), "%H")%in%meanTe$hour.day] < (ZPTselD$mTsel[as.numeric(meanTe$hour.day)+1] - ZPTselD$errorTsel[as.numeric(meanTe$hour.day)+1]), "below",
ifelse(meanTe$mTe[format(round(meanTe$date, "hours"), "%H")%in%meanTe$hour.day] > (ZPTselD$mTsel[as.numeric(meanTe$hour.day)+1] + ZPTselD$errorTsel[as.numeric(meanTe$hour.day)+1]), "above",
"inside"))
#head(D.Te)
meanTe$Tsel.D<-D.Te
#Open-Closed Habitat Comparison
#Constant (Tsel-C)
Tsel.min<-25.5 #bottom 25%
Tsel.mid<-29.3 #mid 50%
Tsel.max<-31.3 #top 75%
C.TeOC<-ifelse(Open.Close.Te$mTe > Tsel.max, "above", #ifelse(condition, yes, no)
ifelse(Open.Close.Te$mTe < Tsel.min, "below",
"inside"))
#head(C.TeOC)
Open.Close.Te$Tsel.C<-C.TeOC
#head(Open.Close.Te)
#Dynamic (Tsel-D)
D.TeOC<-ifelse(Open.Close.Te$mTe[format(round(Open.Close.Te$date, "hours"), "%H")%in%Open.Close.Te$hour.day] < (ZPTselD$mTsel[as.numeric(Open.Close.Te$hour.day)+1] - ZPTselD$errorTsel[as.numeric(Open.Close.Te$hour.day)+1]), "below",
ifelse(Open.Close.Te$mTe[format(round(Open.Close.Te$date, "hours"), "%H")%in%Open.Close.Te$hour.day] > (ZPTselD$mTsel[as.numeric(Open.Close.Te$hour.day)+1] + ZPTselD$errorTsel[as.numeric(Open.Close.Te$hour.day)+1]), "above",
"inside"))
#head(D.TeOC)
Open.Close.Te$Tsel.D<-D.TeOC
Everything run above was just to get the data prepared. Before going through and calculating just the thermal exploitation index, I'll go through above, inside, and below the Tsel range. The focus will be body temperatures, then models
#Body Temperatures
table(meanTb$Tsel.C)
##
## above below inside
## 113 6713 606
#above Tsel-C
113/7432
## [1] 0.0152
#below Tsel-C
6713/7432
## [1] 0.9033
#inside Tsel-C
606/7432
## [1] 0.08154
table(meanTb$Tsel.D)
##
## above below inside
## 399 6713 320
#above Tsel-D
399/7432
## [1] 0.05369
#below Tsel-D
6713/7432
## [1] 0.9033
#inside Tsel-D
320/7432
## [1] 0.04306
#Warning - last time I tried to create a table format of above-below-inside for each season, each turtle, and each year, I apparently created a 2gig vector that crashed my computer.
#Operative Temperatures
table(Open.Close.Te$Tsel.C)
##
## above below inside
## 257 6125 743
#above Tsel-C
257/7125
## [1] 0.03607
#below Tsel-C
6125/7125
## [1] 0.8596
#inside Tsel-C
743/7125
## [1] 0.1043
table(Open.Close.Te$Tsel.D)
##
## above below inside
## 579 6128 418
#above Tsel-D
579/7125
## [1] 0.08126
#below Tsel-D
6128/7125
## [1] 0.8601
#inside Tsel-D
418/7125
## [1] 0.05867
#Well, just for fun:
#Thermal Exploitation Tsel-C
((606/7432) / (743/7125))
## [1] 0.7819
#Thermal Exploitation Tsel-D
((320/7432)/ (418/7125))
## [1] 0.7339
The following code should just be a review.
#Aggregation of data for calculation?
#Tb in Tsel
#Tb.Ex.C<-aggregate(Tsel.C ~ cut(meanTb$date, "1 day") + meanTb$turtle,
# data=meanTb,
# function(x) sum(x==c("inside")))
#Tb.Ex.D<-aggregate(Tsel.D ~ cut(meanTb$date, "1 day") + meanTb$turtle,
# data=meanTb,
# function(x) sum(x==c("inside")))
#Tb.Ex.C<-rename(Tb.Ex.C, c('cut(meanTb$date, "1 day")' = "date"))
#Tb.Ex.D<-rename(Tb.Ex.D, c('cut(meanTb$date, "1 day")' = "date"))
#head(Tb.Ex.C)
#head(Tb.Ex.D)
#Te in Tsel
#Te.Ex.C<-aggregate(Tsel.C ~ cut(meanTe$date, "1 day") + meanTe$season + meanTe$year,
# data=meanTe,
# function(x) sum(x==c("inside")))
#Te.Ex.D<-aggregate(Tsel.D ~ cut(meanTe$date, "1 day") + meanTe$season + meanTe$year,
# data=meanTe,
# function(x) sum(x==c("inside")))
#Te.Ex.C<-rename(Te.Ex.C, c('cut(meanTe$date, "1 day")' = "date"))
#Te.Ex.D<-rename(Te.Ex.D, c('cut(meanTe$date, "1 day")' = "date"))
#head(Te.Ex.C)
#head(Te.Ex.D)
#Habitat Types in Tsel
Open.Close.Te.Ex.C<-aggregate(Tsel.C ~ cut(Open.Close.Te$date, "1 day") + Open.Close.Te$season + Open.Close.Te$year + Open.Close.Te$habitat,
data=Open.Close.Te,
function(x) sum(x==c("inside")))
Open.Close.Te.Ex.D<-aggregate(Tsel.D ~ cut(Open.Close.Te$date, "1 day") + Open.Close.Te$season + Open.Close.Te$year + Open.Close.Te$habitat,
data=Open.Close.Te,
function(x) sum(x==c("inside")))
Open.Close.Te.Ex.C<-rename(Open.Close.Te.Ex.C, c('cut(Open.Close.Te$date, "1 day")' = "date"))
Open.Close.Te.Ex.D<-rename(Open.Close.Te.Ex.D, c('cut(Open.Close.Te$date, "1 day")' = "date"))
#head(Open.Close.Te.Ex.C)
#head(Open.Close.Te.Ex.D)
#Percent Te in Tsel for Habitat Types
Open.Close.Te.Ex.C$Te.Prnct.C<-Open.Close.Te.Ex.C$Tsel.C/24
Open.Close.Te.Ex.D$Te.Prnct.D<-Open.Close.Te.Ex.D$Tsel.D/24
Open.Close.Te.Ex.C<-rename(Open.Close.Te.Ex.C,c('Open.Close.Te$year' = 'year',
'Open.Close.Te$habitat' = 'habitat',
'Open.Close.Te$season' = 'season'))
Open.Close.Te.Ex.D<-rename(Open.Close.Te.Ex.D,c('Open.Close.Te$year' = 'year',
'Open.Close.Te$habitat' = 'habitat',
'Open.Close.Te$season' = 'season'))
Habitat.Prcnt.C<-aggregate(list( Mean = Open.Close.Te.Ex.C$Te.Prnct.C),
list(habitat = Open.Close.Te.Ex.C$habitat,
year = Open.Close.Te.Ex.C$year,
season = Open.Close.Te.Ex.C$season),
mean)
#head(Habitat.Prcnt.C)
Habitat.Prcnt.D<-aggregate(list( Mean = Open.Close.Te.Ex.D$Te.Prnct.D),
list(habitat = Open.Close.Te.Ex.D$habitat,
year = Open.Close.Te.Ex.D$year,
season = Open.Close.Te.Ex.D$season),
mean)
#head(Habitat.Prcnt.D)
#Open-Closed Habitat Comparison
qqnorm(Open.Close.Te.Ex.C$Te.Prnct.C) #bimodal possibly?
qqline(Open.Close.Te.Ex.C$Te.Prnct.C)
#Tsel-C Comparison of Te in Tsel
t.test(Te.Prnct.C ~ habitat, Open.Close.Te.Ex.C)
##
## Welch Two Sample t-test
##
## data: Te.Prnct.C by habitat
## t = -13.64, df = 238.6, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1842 -0.1377
## sample estimates:
## mean in group closed mean in group open
## 0.0244 0.1854
qqnorm(Open.Close.Te.Ex.D$Te.Prnct.D) #More bimodal possibly?
qqline(Open.Close.Te.Ex.D$Te.Prnct.D)
#Tsel-C Comparison of Te in Tsel
t.test(Te.Prnct.D ~ habitat, Open.Close.Te.Ex.D)
##
## Welch Two Sample t-test
##
## data: Te.Prnct.D by habitat
## t = -6.514, df = 292.4, p-value = 3.189e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.07151 -0.03833
## sample estimates:
## mean in group closed mean in group open
## 0.03125 0.08617
In general, closed habitats had less time percentage of Te in Tsel. Furthermore, there is a significant difference between time Te in Tsel for both calculation. So why do box turtles select forested habitat? Could it be due to the conditions at the northern limit of their distribution?
#Merging Data for Thermal Exploitation
#Data:
#Tb.Ex.C
#Tb.Ex.D
#Te.Ex.C
#Te.Ex.D
#Constant Calculation
#Tb.Ex.C<-rename(Tb.Ex.C, c('cut(meanTb$date, "1 day")' = "date"))
#Te.Ex.C<-rename(Te.Ex.C, c('cut(meanTe$date, "1 day")' = "date"))
#Ex.Constant<-merge(Tb.Ex.C, Te.Ex.C, by='date')
#head(Ex.Constant)
#Ex.Constant<-rename(Ex.Constant, c("Tsel.C.x" = "Tsel.Tb",
#'meanTe$season' = 'season',
#'meanTe$year' = 'year',
#'Tsel.C.y' = 'Tsel.Te'))
#Ex.Constant$Tb.Prcnt<-Ex.Constant$Tsel.Tb/24
#Ex.Constant$Te.Prcnt<-Ex.Constant$Tsel.Te/24
#Ex.Constant$Ex<-Ex.Constant$Tb.Prcnt/Ex.Constant$Te.Prcnt
#Ex.Constant
#Dynamic Calculation
#Tb.Ex.D<-rename(Tb.Ex.D, c('cut(meanTb$date, "1 day")' = "date"))
#Te.Ex.D<-rename(Te.Ex.D, c('cut(meanTe$date, "1 day")' = "date"))
#Ex.Dynamic<-merge(Tb.Ex.D, Te.Ex.D, by='date')
#head(Ex.Dynamic)
#Ex.Dynamic<-rename(Ex.Dynamic, c("Tsel.D.x" = "Tsel.Tb",
#'meanTe$season' = 'season',
#'meanTe$year' = 'year',
#'Tsel.D.y' = 'Tsel.Te'))
#Ex.Dynamic$Tb.Prcnt<-Ex.Dynamic$Tsel.Tb/24
#Ex.Dynamic$Te.Prcnt<-Ex.Dynamic$Tsel.Te/24
#Ex.Dynamic$Ex<-Ex.Dynamic$Tb.Prcnt/Ex.Dynamic$Te.Prcnt
#Ex.Dynamic
#View(Ex.Dynamic)
Here's where it gets a little tricky with the Thermal exploitation. Issues that we run into are: 1. Division by zero, where Te in Tsel is not observed in a 24-h period yet Tb in Tsel is (possibly finding thermal niches that the models weren't placed in? Don't know, we placed a lot of models) 2. Tb in Tsel being greater than Te in Tsel (values over 100%)
8-15: I think I'm going to omit the previous chunk of code.
In general, habitat selected doesn't reflect thermal preference because open habitats are significantly higher (%Te in Tsel) than closed habitats.
#Objective 3: Thermoregulatory strategy Box turtles would rely on thermoconformity given their limited capacity to seek out basking sites.
I. How accurate are box turtles at maintaining thermal preference (thermal accuracy (db))?
*Report the mean +/- SE of db to compare with other studies of turtles
II. How are box turtles thermoregulating? (db plotted against de, m = 1 is thermoconformity, m = 0 thermoregulation) (Effectiveness of Thermoregulation)
Statistics done by Hank, with some modifications
library(dplyr) # data manipulation
##
## Attaching package: 'dplyr'
##
## The following object is masked from 'package:adehabitatLT':
##
## id
##
## The following object is masked from 'package:MASS':
##
## select
##
## The following objects are masked from 'package:plyr':
##
## arrange, desc, failwith, id, mutate, summarise, summarize
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lattice) # graphics
library(ggplot2) # graphics
library(lme4) # stats for mixed models
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:reshape':
##
## expand
##
## The following objects are masked from 'package:base':
##
## crossprod, tcrossprod
##
## Loading required package: Rcpp
#library(R2jags) # Bayesian analysis - no idea how Hank does it, can't get package to work
df <- read.csv("2014 2015 Tb Tex db CORRECTED.csv",
header=T,
sep='\t')
## Assess only the complete data set
df <- df[complete.cases(df), ]
# transform and relabel for convenience
df$y <- df$year - 2014 # this will be useful when I do the Bayesian model
df$yf <- as.factor(df$y)
df$season <- factor(df$season, levels=c("spring", "summer", "fall"))
# check structure
# with(df, table(season,turtle, year))
# Note: there are different turtles in different seasons and years. We have to assume that turtles do not vary systematically across years and seasons.
# Convert time and date into variables that are more useful
t <- strptime(df$date, "%Y-%m-%d %H:%M:%S")
## samples is the relative time (H/M) within a d day
df$samples <- as.numeric(format(t, "%H")) + as.numeric(format(t, "%M"))/60 +
as.numeric(format(t, "%S"))/(60*60)
## day is the date
df$day <- as.numeric(format(t, "%y")) +
as.numeric(format(t, "%m"))/12 + as.numeric(format(t, "%d")) / 365
# xyplot( Tb ~ Tex|turtle, groups = day, data=df , type="smooth")
# log ratio
# > 0 means body temp > shell
# < 0 mean body temp < shell
df$log.ratio <- log( df$Tb/df$Tex )
#A few graphs to get the picture of the times series.
# one day, one turtle
#xyplot(db.NEW.signs ~ samples, groups = turtle, data = df, type="l",
# subset = day == min(day) )
# one turtle
#r1l3 <- subset(df, turtle == "R1L3")
#xyplot(db.NEW.signs ~ samples, groups = day, data = r1l3, type="l")
# ggplot(data=r1l3, aes(samples, db.NEW.setp,
# colour = as.factor(day),
# group=as.factor(day))) + geom_path()
Removing some of the misc. graphs to focus on the model assumptions/testing to see if I can get something to report for the manuscript in the results.
#Aggregate within turtles and days
#This will make summarize turtle teps for each day,
#including average dbdb and standard deviation of body
#and shell temperatures.
a1 <- select(df, turtle, season, yf, samples, day, Tb, Tex,
db.NEW.signs, db.OLD.signs, log.ratio)
# It was suggested we not use fall data...
# a1s <- filter(a1, season != "fall")
a1s <- a1
a2 <- group_by(a1s, turtle, day, season, yf)
a3 <- summarise(a2,
mn.b = mean(Tb),
mn.x = mean(Tex),
sd.b = sd(Tb),
sd.x = sd( Tex ),
sd.xb = sd.x - sd.b,
mn.db.new = mean( abs( db.NEW.signs ) ),
mn.db.old = mean( abs( db.OLD.signs ) ),
sd.db.new = sd(db.NEW.signs ),
sd.db.old = sd( db.OLD.signs )
)
#Does average daily body temp vary with season? Here we address this question using a graph and stats.
#Plot of mean Body Temperature for year and season
ggplot(a3, aes(season, mn.b, colour = yf) ) + geom_boxplot()
#Stats.
Tbm0 <- lmer(mn.b ~ season * yf + (1|turtle), data = a3)
Tbm1<- update(Tbm0, . ~ . - season:yf)
anova(Tbm0, Tbm1)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Tbm1: mn.b ~ season + yf + (1 | turtle)
## Tbm0: mn.b ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Tbm1 6 1465 1488 -727 1453
## Tbm0 8 1448 1479 -716 1432 21 2 2.8e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Tbm0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.b ~ season * yf + (1 | turtle)
## Data: a3
##
## REML criterion at convergence: 1431
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.877 -0.573 0.140 0.646 2.475
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.592 0.769
## Residual 5.109 2.260
## Number of obs: 317, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 20.819 0.457 45.5
## seasonsummer -0.368 0.672 -0.5
## seasonfall -5.422 0.629 -8.6
## yf1 -0.929 0.687 -1.4
## seasonsummer:yf1 2.284 0.946 2.4
## seasonfall:yf1 5.353 1.081 5.0
##
## Correlation of Fixed Effects:
## (Intr) ssnsmm ssnfll yf1 ssns:1
## seasonsummr -0.680
## seasonfall -0.727 0.495
## yf1 -0.666 0.453 0.484
## ssnsmmr:yf1 0.483 -0.711 -0.352 -0.726
## sesnfll:yf1 0.423 -0.288 -0.582 -0.636 0.461
Depends on the interaction of season and year: p < 0.05 Need to report estimate, std error, t-value, and p-value?
# Create a variable that includes each season x year combo as a different factor levels (3 seasons x 2 y).
sxy <- interaction(a3$season, a3$yf, drop=TRUE)
# Set fall of the first year as the control. This allows all the model coefficients to test whether the other levels differ from fall 2014
sxy <- relevel(sxy, "fall.0")
# test whether some year-seasons differ from fall 2014 [using bayesian analysis?]
Tbm0b <- lmer(mn.b ~ sxy + (1|turtle), data = a3)
Tb.bm0 <- confint(Tbm0b, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
Tb.bm0
## 2.5 % 97.5 %
## sd_(Intercept)|turtle 0.2944 1.174
## sigma 2.0760 2.431
## (Intercept) 14.5890 16.225
## sxyspring.0 4.1997 6.584
## sxysummer.0 3.7802 6.254
## sxyspring.1 3.1720 5.737
## sxysummer.1 5.2289 7.583
## sxyfall.1 2.7035 6.267
All season:year combos are greater than fall 2014 (or sigma? I'm pretty sure). Random effect (.sig01, or sd_(Intercept)|turtle) is small.
Does daily shell temperature (Tshell) vary by season, year, or the interaction?
#Plot of mean Shell Temperature for year and season
ggplot(a3, aes(season, mn.x, colour = yf) ) + geom_boxplot()
#Stats.
Texm0 <- lmer(mn.x ~ season * yf + (1|turtle), data = a3)
Texm1<- update(Texm0, . ~ . - season:yf)
anova(Texm0, Texm1)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Texm1: mn.x ~ season + yf + (1 | turtle)
## Texm0: mn.x ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Texm1 6 1538 1561 -763 1526
## Texm0 8 1520 1550 -752 1504 22.3 2 1.4e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Texm0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.x ~ season * yf + (1 | turtle)
## Data: a3
##
## REML criterion at convergence: 1503
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.411 -0.542 0.139 0.667 2.448
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.377 0.614
## Residual 6.590 2.567
## Number of obs: 317, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 20.263 0.438 46.2
## seasonsummer -0.386 0.639 -0.6
## seasonfall -5.814 0.596 -9.8
## yf1 -0.700 0.659 -1.1
## seasonsummer:yf1 2.246 0.905 2.5
## seasonfall:yf1 5.327 1.030 5.2
##
## Correlation of Fixed Effects:
## (Intr) ssnsmm ssnfll yf1 ssns:1
## seasonsummr -0.686
## seasonfall -0.735 0.504
## yf1 -0.666 0.456 0.490
## ssnsmmr:yf1 0.484 -0.706 -0.356 -0.727
## sesnfll:yf1 0.426 -0.292 -0.579 -0.639 0.465
Depends on the interaction of season and year: p < 0.05. Similar to Tb. Interaction is denoted by the asterik (*) in the output.
# test whether some year-seasons differ from fall 2014 [using bayesian analysis?]
Texm0b <- lmer(mn.x ~ sxy + (1|turtle), data = a3)
Tex.bm0 <- confint(Texm0b, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
Tex.bm0
## 2.5 % 97.5 %
## sd_(Intercept)|turtle 0.000 1.001
## sigma 2.362 2.795
## (Intercept) 13.609 15.203
## sxyspring.0 4.824 6.949
## sxysummer.0 4.303 6.701
## sxyspring.1 3.831 6.291
## sxysummer.1 5.825 8.110
## sxyfall.1 3.146 6.238
All season:year combos are greater than fall 2014 Random effect (.sig01, or sd_(Intercept)|turtle) is small.
Does daily db vary by season, year, or the interaction? Dynamic Tsel Calculations:
#Plot
ggplot(a3, aes(season, mn.db.new, colour = yf) ) + geom_boxplot()
Dbm0d <- lmer(mn.db.new ~ season * yf + (1|turtle), data = a3)
qplot( fitted(Dbm0d), resid(Dbm0d)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
qqnorm(resid(Dbm0d))
qqline(resid(Dbm0d))
#Data conform reasonably well to model assumptions. Test season by year effect.
Dbm1d <- update(Dbm0d, . ~ . - season:yf)
anova(Dbm0d, Dbm1d)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Dbm1d: mn.db.new ~ season + yf + (1 | turtle)
## Dbm0d: mn.db.new ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Dbm1d 6 1440 1462 -714 1428
## Dbm0d 8 1420 1450 -702 1404 23.9 2 6.6e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dbm0d)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.db.new ~ season * yf + (1 | turtle)
## Data: a3
##
## REML criterion at convergence: 1404
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.138 -0.663 -0.144 0.478 2.873
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.371 0.609
## Residual 4.749 2.179
## Number of obs: 317, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 7.420 0.398 18.64
## seasonsummer 0.340 0.583 0.58
## seasonfall 5.082 0.544 9.34
## yf1 0.881 0.598 1.47
## seasonsummer:yf1 -2.265 0.823 -2.75
## seasonfall:yf1 -5.112 0.937 -5.45
##
## Correlation of Fixed Effects:
## (Intr) ssnsmm ssnfll yf1 ssns:1
## seasonsummr -0.683
## seasonfall -0.732 0.500
## yf1 -0.666 0.455 0.487
## ssnsmmr:yf1 0.484 -0.708 -0.354 -0.727
## sesnfll:yf1 0.425 -0.290 -0.580 -0.638 0.463
Yes, it depends on the interaction - As per tradition.
# test whether some year-seasons differ from fall 2014
Db.m0d <- lmer(mn.db.new ~ sxy + (1|turtle), data = a3)
dm0 <- confint(Db.m0d, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
dm0
## 2.5 % 97.5 %
## sd_(Intercept)|turtle 0.000 0.9962
## sigma 1.996 2.3643
## (Intercept) 11.714 13.1951
## sxyspring.0 -6.092 -4.0189
## sxysummer.0 -5.890 -3.5601
## sxyspring.1 -5.362 -2.9951
## sxysummer.1 -7.168 -5.1123
## sxyfall.1 -5.649 -2.7434
The db in all seasons differs from db of fall 2014. They're also all negative (other seasons, not sigma/fall 2014)
Constant Tsel Calculations
#Plot
ggplot(a3, aes(season, mn.db.old, colour = yf) ) + geom_boxplot()
Dbm0c <- lmer(mn.db.old ~ season * yf + (1|turtle), data = a3)
qplot( fitted(Dbm0c), resid(Dbm0c)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is <1000, so using loess. Use 'method = x' to change the smoothing method.
qqnorm(resid(Dbm0c))
qqline(resid(Dbm0c))
#Data conform reasonably well to model assumptions. Test season by year effect.
Dbm1c <- update(Dbm0c, . ~ . - season:yf)
anova(Dbm0c, Dbm1c)
## refitting model(s) with ML (instead of REML)
## Data: a3
## Models:
## Dbm1c: mn.db.old ~ season + yf + (1 | turtle)
## Dbm0c: mn.db.old ~ season * yf + (1 | turtle)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Dbm1c 6 1437 1459 -712 1425
## Dbm0c 8 1419 1449 -701 1403 21.8 2 1.9e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dbm0c)
## Linear mixed model fit by REML ['lmerMod']
## Formula: mn.db.old ~ season * yf + (1 | turtle)
## Data: a3
##
## REML criterion at convergence: 1402
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.295 -0.681 -0.145 0.555 2.958
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.489 0.699
## Residual 4.677 2.163
## Number of obs: 317, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 8.747 0.425 20.56
## seasonsummer 0.287 0.625 0.46
## seasonfall 5.166 0.584 8.85
## yf1 0.837 0.639 1.31
## seasonsummer:yf1 -2.205 0.880 -2.51
## seasonfall:yf1 -5.112 1.004 -5.09
##
## Correlation of Fixed Effects:
## (Intr) ssnsmm ssnfll yf1 ssns:1
## seasonsummr -0.681
## seasonfall -0.728 0.496
## yf1 -0.666 0.454 0.485
## ssnsmmr:yf1 0.484 -0.710 -0.352 -0.726
## sesnfll:yf1 0.424 -0.289 -0.582 -0.636 0.462
Yes, it depends on the interaction - As per tradition.
# test whether some year-seasons differ from fall 2014
Db.m0c <- lmer(mn.db.old ~ sxy + (1|turtle), data = a3)
cm0 <- confint(Db.m0c, method = "boot")
## Computing bootstrap confidence intervals ...
## Computing bootstrap confidence intervals ...
cm0
## 2.5 % 97.5 %
## sd_(Intercept)|turtle 1.254e-07 1.052
## sigma 1.990e+00 2.350
## (Intercept) 1.317e+01 14.705
## sxyspring.0 -6.267e+00 -4.052
## sxysummer.0 -6.053e+00 -3.672
## sxyspring.1 -5.534e+00 -3.250
## sxysummer.1 -7.394e+00 -5.178
## sxyfall.1 -5.841e+00 -2.745
Same with the dynamic calculation, all seasons differ from fall 2014.
#Does environment temp vary by habitat, year, and season?
Environmental data management.
env <- read.table("2014 2015 Te de CORRECTED.csv", header=T, sep=",")
#str(env)
env$yf <- as.factor( env$year - 2014 )
env$season <- factor(env$season, levels=c("spring", "summer", "fall"))
t <- strptime(env$date, "%Y-%m-%d %H:%M:%S")
env$samples <- as.numeric(format(t, "%H")) +
as.numeric(format(t, "%M"))/60 +
as.numeric(format(t, "%S"))/60^2
env$day <- as.numeric(format(t, "%y")) +
as.numeric(format(t, "%m"))/12 + as.numeric(format(t, "%d")) / 365
#summary(env)
#Take a peak at the two different ways of assessing environmental quality.
#env14 <- subset( env, year == 2014 & day == min(day) )
#dim(env14)
#names(env14)
#xyplot(de.NEW.signs ~ samples, groups = probe, data = env14, type="l")
#xyplot(de.OLD.signs ~ samples, groups = probe, data = env14, type="l")
#xyplot(de.NEW.signs ~ de.OLD.signs, groups = probe, data = env14, type="p")
Going to use Hank's aggregation and try to include the open/closed, plus closed analyses.
#Aggregate within day and operant model ('probe').
e1 <- select(env, probe, season, yf, habitat, samples, day,
Te, de.NEW.signs, de.OLD.signs)
e2 <- group_by(e1, probe, day, season, yf)
e3 <- summarise(e2,
habitat = unique(habitat),
mn.e = mean(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
#ggplot(e3, aes(season, mn.e, colour = yf) ) + geom_boxplot()
#ggplot(e3, aes(season, mn.de.new, colour = yf) ) + geom_boxplot()
#ggplot(e3, aes(season, mn.de.old, colour = yf) ) + geom_boxplot()
#Does daily temperature quality vary by season, year, or the interaction? Going to test this with open and closed habitats, or try.
Dynamic Tsel Calculation
Dem0d <- lmer(log(mn.de.new) ~ season * yf + (1|probe), data = e3)
qplot(fitted(Dem0d), resid(Dem0d)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
qqnorm(resid(Dem0d)); qqline(resid(Dem0d))
#Assumptions reasonably met.
Dem1d <- update(Dem0d, . ~ . - season:yf)
anova(Dem0d, Dem1d)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## Dem1d: log(mn.de.new) ~ season + yf + (1 | probe)
## Dem0d: log(mn.de.new) ~ season * yf + (1 | probe)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Dem1d 6 1353 1393 -671 1341
## Dem0d 8 1089 1141 -536 1073 269 2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dem0d)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(mn.de.new) ~ season * yf + (1 | probe)
## Data: e3
##
## REML criterion at convergence: 1115
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.317 -0.650 0.078 0.698 3.201
##
## Random effects:
## Groups Name Variance Std.Dev.
## probe (Intercept) 0.00563 0.075
## Residual 0.06948 0.264
## Number of obs: 5411, groups: probe, 90
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.0229 0.0133 152.3
## seasonsummer 0.0093 0.0133 0.7
## seasonfall 0.5549 0.0129 42.8
## yf1 -0.1130 0.0207 -5.4
## seasonsummer:yf1 -0.2203 0.0174 -12.6
## seasonfall:yf1 -0.2814 0.0182 -15.4
##
## Correlation of Fixed Effects:
## (Intr) ssnsmm ssnfll yf1 ssns:1
## seasonsummr -0.470
## seasonfall -0.482 0.501
## yf1 -0.640 0.301 0.309
## ssnsmmr:yf1 0.359 -0.763 -0.382 -0.410
## sesnfll:yf1 0.343 -0.356 -0.711 -0.389 0.473
Depends on the interaction. Thermal quality de depends on season x year interaction of all models combined for the dynamic calculation.
Constant Tsel Calculation:
Dem0c <- lmer(log(mn.de.old) ~ season * yf + (1|probe), data = e3)
qplot(fitted(Dem0c), resid(Dem0c)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
qqnorm(resid(Dem0c)); qqline(resid(Dem0c))
# assumptions reasonably met.
Dem1c <- update(Dem0c, . ~ . - season:yf)
anova(Dem0c, Dem1c)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## Dem1c: log(mn.de.old) ~ season + yf + (1 | probe)
## Dem0c: log(mn.de.old) ~ season * yf + (1 | probe)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Dem1c 6 21 60.6 -4.5 9
## Dem0c 8 -188 -134.8 101.8 -204 213 2 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Dem0c)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log(mn.de.old) ~ season * yf + (1 | probe)
## Data: e3
##
## REML criterion at convergence: -161.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.305 -0.629 0.096 0.696 3.461
##
## Random effects:
## Groups Name Variance Std.Dev.
## probe (Intercept) 0.00769 0.0877
## Residual 0.05446 0.2334
## Number of obs: 5411, groups: probe, 90
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 2.19117 0.01390 157.7
## seasonsummer 0.00996 0.01182 0.8
## seasonfall 0.49997 0.01151 43.4
## yf1 -0.11971 0.02239 -5.3
## seasonsummer:yf1 -0.17232 0.01548 -11.1
## seasonfall:yf1 -0.22231 0.01617 -13.8
##
## Correlation of Fixed Effects:
## (Intr) ssnsmm ssnfll yf1 ssns:1
## seasonsummr -0.398
## seasonfall -0.408 0.503
## yf1 -0.621 0.247 0.253
## ssnsmmr:yf1 0.304 -0.764 -0.384 -0.337
## sesnfll:yf1 0.290 -0.358 -0.712 -0.319 0.474
Again, depends on the interaction. Thermal quality de depends on season x year interaction of all models combined for the constant calculation.
But wait, there's more!
Dem0c.h <- lmer(log(mn.de.old) ~ season * yf * habitat + (1|probe), data = e3)
qplot(fitted(Dem0c.h), resid(Dem0c.h)) + geom_smooth()
## geom_smooth: method="auto" and size of largest group is >=1000, so using gam with formula: y ~ s(x, bs = "cs"). Use 'method = x' to change the smoothing method.
qqnorm(resid(Dem0c.h)); qqline(resid(Dem0c.h))
# assumptions reasonably met.
Dem1c.h <- update(Dem0c.h, . ~ . - season:yf:habitat)
anova(Dem0c.h, Dem1c.h)
## refitting model(s) with ML (instead of REML)
## Data: e3
## Models:
## Dem1c.h: log(mn.de.old) ~ season + yf + habitat + (1 | probe) + season:yf +
## Dem1c.h: season:habitat + yf:habitat
## Dem0c.h: log(mn.de.old) ~ season * yf * habitat + (1 | probe)
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## Dem1c.h 12 -405 -326 214 -429
## Dem0c.h 14 -414 -322 221 -442 12.9 2 0.0016 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
If I'm interpretting the results of including habitat type into the lmer(), then although there is a significant difference, it's with the removal of season:yf:habitat and just the additive (season + yf + habitat, not other).
#How does thermal accuracy (db) vary with calculation method? Wild attempt because, why not?
t.test(df$db.NEW.signs, df$db.OLD.signs, paired=T)
##
## Paired t-test
##
## data: df$db.NEW.signs and df$db.OLD.signs
## t = 178.4, df = 88987, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.566 1.601
## sample estimates:
## mean of the differences
## 1.584
t.test(df$db.NEW, df$db.OLD, paired=T)
##
## Paired t-test
##
## data: df$db.NEW and df$db.OLD
## t = 150, df = 88987, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.378 1.415
## sample estimates:
## mean of the differences
## 1.397
No idea
#Does E differ from zero?
db=|Tsel-Tb| de=|Tsel-Te| E'=de-db
#Use only operants from closed habitat. We need to justify this.
#justification:
#Box turtles remain in closed habitats according to chi-square of LULC values.
e4 <- group_by(e1, day, season, habitat, yf)
e5 <- summarise(e4,
mn.e = mean(Te),
sd.e = sd(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
# DAILY MEAN TEMPS
#ggplot(e5, aes(season, mn.e, colour = yf) ) +
#geom_boxplot() +
#facet_grid(.~habitat)
# DAILY STANDARD DEV OF TEMPS
#ggplot(e5, aes(season, sd.e, colour = yf) ) +
#geom_boxplot() +
#facet_grid(.~habitat)
Q: How does this (or something else) justify not using operants from the open?
#Analysis using only probes from closed habitat.
e1ss <- filter(e1, habitat == "closed")
e2a <- group_by(e1ss, day, season, yf)
e3a <- summarise(e2a,
mn.e = mean(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
a4 <- select(a3, turtle, day, season, yf, mn.db.new, mn.db.old, sd.db.new, sd.db.old)
e4 <- subset(e3a, select=c(day, mn.de.new, mn.de.old, sd.de.new, sd.de.old))
ae4 <- left_join(a4, e4, by = "day")
#ae3 <- merge(a4, e4, by = "day", keep.x=TRUE)
dim(a4)
## [1] 317 8
dim(ae4)
## [1] 317 12
ae4$E.old <- (ae4$mn.de.old - ae4$mn.db.old)
ae4$V.old <- (ae4$sd.de.old - ae4$sd.db.old)
ae4$E.new <- (ae4$mn.de.new - ae4$mn.db.new)
ae4$V.new <- (ae4$sd.de.new - ae4$sd.db.new)
hist(ae4$E.new, main = "Efficiency based on closed habitat probes")
hist(ae4$E.old, main = "Efficiency based on closed habitat probes")
hist(ae4$V.new)
hist(ae4$V.old)
ggplot(data=ae4, aes(season, E.new, colour=yf)) + geom_boxplot()
em1 <- lmer(E.new ~ 1 + (1|turtle), data = ae4)
# check assumptions.
plot(fitted(em1), resid(em1))
abline(h=0, lty=2, col=2)
qqnorm(resid(em1))
qqline(resid(em1))
# Assumptions met
# Test
summary(em1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: E.new ~ 1 + (1 | turtle)
## Data: ae4
##
## REML criterion at convergence: 806.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.188 -0.513 0.015 0.614 2.690
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.174 0.417
## Residual 0.665 0.816
## Number of obs: 316, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.5701 0.0939 6.07
emV1 <- lmer(V.new ~ 1 + (1|turtle), data = ae4)
# check assumptions.
plot(fitted(emV1), resid(emV1))
abline(h=0, lty=2, col=2)
qqnorm(resid(emV1))
qqline(resid(emV1))
# Assumptions met
# Test
summary(emV1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: V.new ~ 1 + (1 | turtle)
## Data: ae4
##
## REML criterion at convergence: 971
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.767 -0.650 0.059 0.588 3.523
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.778 0.882
## Residual 1.045 1.022
## Number of obs: 316, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.465 0.183 -2.55
#Analysis using all probes.
#all.e1ss <- filter(e1, habitat == "closed")
all.e2a <- group_by(e1, day, season, yf)
all.e3a <- summarise(e2a,
mn.e = mean(Te),
mn.de.new = mean(abs(de.NEW.signs)),
mn.de.old = mean(abs(de.OLD.signs)),
sd.de.new = sd(de.NEW.signs),
sd.de.old = sd(de.OLD.signs)
)
all.a4 <- select(a3, turtle, day, season, yf, mn.db.new, mn.db.old, sd.db.new, sd.db.old)
all.e4 <- subset(all.e3a, select=c(day, mn.de.new, mn.de.old, sd.de.new, sd.de.old))
all.ae4 <- left_join(all.a4, all.e4, by = "day")
#ae3 <- merge(a4, e4, by = "day", keep.x=TRUE)
dim(all.a4)
## [1] 317 8
dim(all.ae4)
## [1] 317 12
all.ae4$E.old <- (ae4$mn.de.old - ae4$mn.db.old)
all.ae4$V.old <- (ae4$sd.de.old - ae4$sd.db.old)
all.ae4$E.new <- (ae4$mn.de.new - ae4$mn.db.new)
all.ae4$V.new <- (ae4$sd.de.new - ae4$sd.db.new)
hist(all.ae4$E.new, main = "Efficiency based on all habitat probes")
hist(all.ae4$E.old, main = "Efficiency based on all habitat probes")
hist(all.ae4$V.new)
hist(all.ae4$V.old)
ggplot(data=all.ae4, aes(season, E.new, colour=yf)) + geom_boxplot()
all.em1 <- lmer(E.new ~ 1 + (1|turtle), data = all.ae4)
# check assumptions.
plot(fitted(all.em1), resid(all.em1))
abline(h=0, lty=2, col=2)
qqnorm(resid(all.em1))
qqline(resid(all.em1))
# Assumptions met
# Test
summary(all.em1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: E.new ~ 1 + (1 | turtle)
## Data: all.ae4
##
## REML criterion at convergence: 806.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.188 -0.513 0.015 0.614 2.690
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.174 0.417
## Residual 0.665 0.816
## Number of obs: 316, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 0.5701 0.0939 6.07
all.emV1 <- lmer(V.new ~ 1 + (1|turtle), data = all.ae4)
# check assumptions.
plot(fitted(all.emV1), resid(all.emV1))
abline(h=0, lty=2, col=2)
qqnorm(resid(all.emV1))
qqline(resid(all.emV1))
# Assumptions met
# Test
summary(all.emV1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: V.new ~ 1 + (1 | turtle)
## Data: all.ae4
##
## REML criterion at convergence: 971
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.767 -0.650 0.059 0.588 3.523
##
## Random effects:
## Groups Name Variance Std.Dev.
## turtle (Intercept) 0.778 0.882
## Residual 1.045 1.022
## Number of obs: 316, groups: turtle, 26
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.465 0.183 -2.55
#Thermoregulation according to db plotted against de
CB <- read.csv("Cost-Benefit Data - Copy.csv", sep=",", header=T)
#head(CB) #26 turtles, 2 years, 3 seasons.
#str(CB)
#Year to factor
CB$yf <- factor(CB$year)
#Thermal Quality (TQ or de) is based on when the turtles were in the field, all
#other corresponding values have been omitted. Thermal accuracy is (TA or db).
#str(CB)
#factor-integer-numeric-numeric-numeric-factor
Dynamic Calculation (done with Hank's Help)
qplot(de.NEW, db.NEW, data=CB, colour=yf)
lm.CB<-lm(db.NEW~de.NEW + yf +de.NEW:yf, data=CB)
summary(lm.CB)
##
## Call:
## lm(formula = db.NEW ~ de.NEW + yf + de.NEW:yf, data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.846 -0.432 0.147 0.313 0.974
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.63169 0.47157 1.34 0.19
## de.NEW 0.86942 0.05146 16.89 4.4e-14 ***
## yf2015 0.49616 0.77647 0.64 0.53
## de.NEW:yf2015 -0.00278 0.10968 -0.03 0.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.505 on 22 degrees of freedom
## Multiple R-squared: 0.953, Adjusted R-squared: 0.947
## F-statistic: 148 on 3 and 22 DF, p-value: 9.52e-15
summary.aov(lm.CB)
## Df Sum Sq Mean Sq F value Pr(>F)
## de.NEW 1 112.4 112.4 441.05 4.8e-16 ***
## yf 1 1.1 1.1 4.27 0.051 .
## de.NEW:yf 1 0.0 0.0 0.00 0.980
## Residuals 22 5.6 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
No difference in slopes (2014 = 2015 for TA v. TQ slope)
#ANCOVA testing against a null slope of m=1
lm.CB2<-lm(db.NEW~de.NEW+year, data=CB)
lm.CB3<-lm(db.NEW~de.NEW, data=CB)
Testing if slope differs from 0 or 1. The offset is used as a coefficient applied to the x-value.
#Dynamic
lm.0<-lm(db.NEW~de.NEW +offset(0*de.NEW), data=CB)
summary(lm.0)
##
## Call:
## lm(formula = db.NEW ~ de.NEW + offset(0 * de.NEW), data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1206 -0.3695 0.0543 0.3435 1.1314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2209 0.3270 3.73 0.001 **
## de.NEW 0.8209 0.0409 20.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.528 on 24 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.941
## F-statistic: 403 on 1 and 24 DF, p-value: <2e-16
lm.1<-lm(db.NEW~de.NEW +offset(1*de.NEW), data=CB)
summary(lm.0)
##
## Call:
## lm(formula = db.NEW ~ de.NEW + offset(0 * de.NEW), data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1206 -0.3695 0.0543 0.3435 1.1314
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.2209 0.3270 3.73 0.001 **
## de.NEW 0.8209 0.0409 20.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.528 on 24 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.941
## F-statistic: 403 on 1 and 24 DF, p-value: <2e-16
#Constant
lm.0c<-lm(db.OLD~de.OLD +offset(0*de.OLD), data=CB)
summary(lm.0c)
##
## Call:
## lm(formula = db.OLD ~ de.OLD + offset(0 * de.OLD), data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0161 -0.2939 -0.0195 0.2188 1.2025
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9321 0.2848 3.27 0.0032 **
## de.OLD 0.8176 0.0421 19.41 3.5e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.549 on 24 degrees of freedom
## Multiple R-squared: 0.94, Adjusted R-squared: 0.938
## F-statistic: 377 on 1 and 24 DF, p-value: 3.52e-16
lm.1c<-lm(db.OLD~de.OLD +offset(1*de.OLD), data=CB)
summary(lm.0c)
##
## Call:
## lm(formula = db.OLD ~ de.OLD + offset(0 * de.OLD), data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.0161 -0.2939 -0.0195 0.2188 1.2025
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9321 0.2848 3.27 0.0032 **
## de.OLD 0.8176 0.0421 19.41 3.5e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.549 on 24 degrees of freedom
## Multiple R-squared: 0.94, Adjusted R-squared: 0.938
## F-statistic: 377 on 1 and 24 DF, p-value: 3.52e-16
No idea how to really interpret other than they differ from a slope of 0 and 1. Should we investigate at varying degrees of slope? such as 0.5, 0.25, 0.75?
# parameter estimates and their standard errors
coefs <- coef( summary(lm.CB) )
coefs
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.631693 0.47157 1.33955 1.941e-01
## de.NEW 0.869422 0.05146 16.89494 4.384e-14
## yf2015 0.496164 0.77647 0.63900 5.294e-01
## de.NEW:yf2015 -0.002778 0.10968 -0.02533 9.800e-01
# note here that the slope is about 0.87 (std. error = 0.05)
# looks as though it differs from 1.0
B1 <- coefs["de.NEW", "Estimate"]
B1.se <- coefs["de.NEW", "Std. Error"]
# Use the definition of a t-test
t <- ( B1 - 1 )/B1.se
t
## [1] -2.537
# degrees of freedom = n - p, where p = no. of parameters
df2 <- summary(lm.CB)$df[2]
# Use the cumulative t-distribution
pt(t, df2)
## [1] 0.009382
#[1] 0.009382075
yes it does differ from one.
another look.
summary(lm.CB)
##
## Call:
## lm(formula = db.NEW ~ de.NEW + yf + de.NEW:yf, data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.846 -0.432 0.147 0.313 0.974
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.63169 0.47157 1.34 0.19
## de.NEW 0.86942 0.05146 16.89 4.4e-14 ***
## yf2015 0.49616 0.77647 0.64 0.53
## de.NEW:yf2015 -0.00278 0.10968 -0.03 0.98
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.505 on 22 degrees of freedom
## Multiple R-squared: 0.953, Adjusted R-squared: 0.947
## F-statistic: 148 on 3 and 22 DF, p-value: 9.52e-15
summary.aov(lm.CB)
## Df Sum Sq Mean Sq F value Pr(>F)
## de.NEW 1 112.4 112.4 441.05 4.8e-16 ***
## yf 1 1.1 1.1 4.27 0.051 .
## de.NEW:yf 1 0.0 0.0 0.00 0.980
## Residuals 22 5.6 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qplot(de.NEW, db.NEW, data=CB, colour=yf) + geom_smooth(method="lm") +
geom_abline(intercept = 0, slope = 1, lty=3)
qplot(de.NEW, db.NEW, data=CB) + geom_smooth(method="lm") +
geom_abline(intercept = 0, slope = 1, lty=3) +
geom_point(aes(colour = yf))
Now for the Constant Calculation
qplot(de.OLD, db.OLD, data=CB, colour=yf)
lm.CBc<-lm(db.OLD~de.OLD + yf +de.OLD:yf, data=CB)
summary(lm.CBc)
##
## Call:
## lm(formula = db.OLD ~ de.OLD + yf + de.OLD:yf, data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8264 -0.3658 0.0945 0.3718 1.0301
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3556 0.4266 0.83 0.41
## de.OLD 0.8728 0.0538 16.23 1e-13 ***
## yf2015 0.6365 0.6595 0.97 0.34
## de.OLD:yf2015 -0.0283 0.1129 -0.25 0.80
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.527 on 22 degrees of freedom
## Multiple R-squared: 0.949, Adjusted R-squared: 0.942
## F-statistic: 138 on 3 and 22 DF, p-value: 2.1e-14
summary.aov(lm.CBc)
## Df Sum Sq Mean Sq F value Pr(>F)
## de.OLD 1 113.4 113.4 408.72 1.1e-15 ***
## yf 1 1.1 1.1 3.97 0.059 .
## de.OLD:yf 1 0.0 0.0 0.06 0.804
## Residuals 22 6.1 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
No difference in slopes (2014 = 2015 for TA v. TQ slope)
#ANCOVA testing against a null slope of m=1
lm.CB2c<-lm(db.OLD~de.OLD+year, data=CB)
lm.CB3c<-lm(db.OLD~de.OLD, data=CB)
#Google "R test whether slope differs from a constant"
# parameter estimates and their standard errors
coefs <- coef( summary(lm.CBc) )
coefs
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.35556 0.42658 0.8335 4.135e-01
## de.OLD 0.87283 0.05378 16.2284 9.978e-14
## yf2015 0.63652 0.65951 0.9651 3.450e-01
## de.OLD:yf2015 -0.02829 0.11288 -0.2507 8.044e-01
# note here that the slope is about 0.87 (std. error = 0.05)
# looks as though it differs from 1.0
B1.c <- coefs["de.OLD", "Estimate"]
B1.se.c <- coefs["de.OLD", "Std. Error"]
# Use the definition of a t-test
t <- ( B1.c - 1 )/B1.se.c
t
## [1] -2.364
# degrees of freedom = n - p, where p = no. of parameters
df2.c <- summary(lm.CBc)$df[2]
# Use the cumulative t-distribution
pt(t, df2.c)
## [1] 0.01365
yes it does differ from one.
another look.
summary(lm.CBc)
##
## Call:
## lm(formula = db.OLD ~ de.OLD + yf + de.OLD:yf, data = CB)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8264 -0.3658 0.0945 0.3718 1.0301
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3556 0.4266 0.83 0.41
## de.OLD 0.8728 0.0538 16.23 1e-13 ***
## yf2015 0.6365 0.6595 0.97 0.34
## de.OLD:yf2015 -0.0283 0.1129 -0.25 0.80
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.527 on 22 degrees of freedom
## Multiple R-squared: 0.949, Adjusted R-squared: 0.942
## F-statistic: 138 on 3 and 22 DF, p-value: 2.1e-14
summary.aov(lm.CBc)
## Df Sum Sq Mean Sq F value Pr(>F)
## de.OLD 1 113.4 113.4 408.72 1.1e-15 ***
## yf 1 1.1 1.1 3.97 0.059 .
## de.OLD:yf 1 0.0 0.0 0.06 0.804
## Residuals 22 6.1 0.3
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
qplot(de.OLD, db.OLD, data=CB, colour=yf) + geom_smooth(method="lm") +
geom_abline(intercept = 0, slope = 1, lty=3)
qplot(de.OLD, db.OLD, data=CB) + geom_smooth(method="lm") +
geom_abline(intercept = 0, slope = 1, lty=3) +
geom_point(aes(colour = yf))