Pulling in my data

wild.CTmin<-read.csv("~/Documents/Classes/FALL 17/BIO 570/WILD.SUMMER.CTMIN.csv")
wild.CTmax<-read.csv("~/Documents/Classes/FALL 17/BIO 570/WILD.SUMMER.CTMAX.csv")
lab.CTmin<-read.csv("~/Documents/Classes/FALL 17/BIO 570/LAB.PRECOLD.CTMIN.csv")
lab.CTmax<-read.csv("~/Documents/Classes/FALL 17/BIO 570/LAB.PRECOLD.CTMAX.csv")
lab.EWL<-read.csv("~/Documents/Classes/FALL 17/BIO 570/LAB.PRECOLD.EWL.csv")

Wild P. siculus during Summer 2017

Looking at CTmin

#Checking normality 
qqnorm(wild.CTmin$CTmin)
qqline(wild.CTmin$CTmin)

shapiro.test(wild.CTmin$CTmin)
## 
##  Shapiro-Wilk normality test
## 
## data:  wild.CTmin$CTmin
## W = 0.91984, p-value = 0.1916
#Checking Relationships
plot(wild.CTmin$CTmin~wild.CTmin$Mass*wild.CTmin$Class)

scatter.smooth(wild.CTmin$Mass,wild.CTmin$CTmin)

Did we start checking CTmin too late in trial 2?

as.factor(wild.CTmin$Trial)
##  [1] 1 1 1 1 1 2 2 2 2 2 3 3 3 3 3
## Levels: 1 2 3
plot(wild.CTmin$CTmin~wild.CTmin$Trial)

summary(aov(wild.CTmin$CTmin~wild.CTmin$Trial))
##                  Df Sum Sq Mean Sq F value Pr(>F)  
## wild.CTmin$Trial  1  24.15  24.149   4.981 0.0439 *
## Residuals        13  63.03   4.848                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p=0.0439 for anova looking at effect of trial; consider removing trial 2

summer.CTmin<-wild.CTmin[-6:-10,1:5]
as.factor(summer.CTmin$Trial)
##  [1] 1 1 1 1 1 3 3 3 3 3
## Levels: 1 3

checking more assumptions

#Equal variance between males and females?
male.CTmin<-(summer.CTmin[c(1,3,5,6:7),5])
female.CTmin<-(summer.CTmin[c(2,4,8:10),5])
var.test(male.CTmin,female.CTmin)
## 
##  F test to compare two variances
## 
## data:  male.CTmin and female.CTmin
## F = 1.5659, num df = 4, denom df = 4, p-value = 0.6746
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.1630348 15.0394666
## sample estimates:
## ratio of variances 
##           1.565872

GLM model with mass, class and trial

summer.CTmin.glm1<-glm(summer.CTmin$CTmin~summer.CTmin$Class+summer.CTmin$Trial+summer.CTmin$Mass+summer.CTmin$Class*summer.CTmin$Trial)
summary.glm(summer.CTmin.glm1)
## 
## Call:
## glm(formula = summer.CTmin$CTmin ~ summer.CTmin$Class + summer.CTmin$Trial + 
##     summer.CTmin$Mass + summer.CTmin$Class * summer.CTmin$Trial)
## 
## Deviance Residuals: 
##       1        2        3        4        5        6        7        8  
## -0.5919   0.2157  -0.3370  -0.2157   0.9290   1.3631  -1.3631   3.9614  
##       9       10  
## -2.9345  -1.0269  
## 
## Coefficients:
##                                        Estimate Std. Error t value
## (Intercept)                              3.9524     3.7864   1.044
## summer.CTmin$ClassM                     -1.9982     3.7158  -0.538
## summer.CTmin$Trial                       1.3354     1.1307   1.181
## summer.CTmin$Mass                        0.5392     0.4235   1.273
## summer.CTmin$ClassM:summer.CTmin$Trial   0.4596     1.6286   0.282
##                                        Pr(>|t|)
## (Intercept)                               0.344
## summer.CTmin$ClassM                       0.614
## summer.CTmin$Trial                        0.291
## summer.CTmin$Mass                         0.259
## summer.CTmin$ClassM:summer.CTmin$Trial    0.789
## 
## (Dispersion parameter for gaussian family taken to be 6.098956)
## 
##     Null deviance: 69.338  on 9  degrees of freedom
## Residual deviance: 30.495  on 5  degrees of freedom
## AIC: 51.528
## 
## Number of Fisher Scoring iterations: 2

“Trial” no longer significant. Consider removing “Trial” as a factor.

Proposed final GLM model for CTmin?

summer.CTmin.glm2<-glm(summer.CTmin$CTmin~summer.CTmin$Class+summer.CTmin$Mass, family=gaussian())
summary.glm(summer.CTmin.glm2)
## 
## Call:
## glm(formula = summer.CTmin$CTmin ~ summer.CTmin$Class + summer.CTmin$Mass, 
##     family = gaussian())
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.967  -1.690  -1.194   0.556   5.130  
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           6.3040     2.9566   2.132   0.0704 .
## summer.CTmin$ClassM  -2.1094     2.5522  -0.826   0.4358  
## summer.CTmin$Mass     0.6408     0.4648   1.379   0.2104  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 7.735783)
## 
##     Null deviance: 69.338  on 9  degrees of freedom
## Residual deviance: 54.150  on 7  degrees of freedom
## AIC: 53.271
## 
## Number of Fisher Scoring iterations: 2

Looking at CTmax

qqnorm(wild.CTmax$Ctmax)
qqline(wild.CTmax$Ctmax)

shapiro.test((wild.CTmax$Ctmax))
## 
##  Shapiro-Wilk normality test
## 
## data:  (wild.CTmax$Ctmax)
## W = 0.86223, p-value = 0.03269
##not normally distributed

scatter.smooth(wild.CTmax$Mass,wild.CTmax$Ctmax)

##size effect?

hist(wild.CTmax$Ctmax)

##left skewed. transform?

CTmax transformations

log.trans.summer.CTmax<-log(wild.CTmax$Ctmax)
hist(log.trans.summer.CTmax)

qqnorm(log.trans.summer.CTmax)
qqline(log.trans.summer.CTmax)

shapiro.test(log.trans.summer.CTmax)
## 
##  Shapiro-Wilk normality test
## 
## data:  log.trans.summer.CTmax
## W = 0.85186, p-value = 0.02354
##still not normally distributed

sqr.trans.wild.CTmax<-sqrt(wild.CTmax$Ctmax)
hist(sqr.trans.wild.CTmax)

qqnorm(sqr.trans.wild.CTmax)
qqline(sqr.trans.wild.CTmax)

shapiro.test(sqr.trans.wild.CTmax)
## 
##  Shapiro-Wilk normality test
## 
## data:  sqr.trans.wild.CTmax
## W = 0.8571, p-value = 0.02777
##nope

inverse.summer.CTmax<-1/(wild.CTmax$Ctmax)
hist(inverse.summer.CTmax)

qqnorm(inverse.summer.CTmax)
qqline(inverse.summer.CTmax)

shapiro.test(inverse.summer.CTmax)
## 
##  Shapiro-Wilk normality test
## 
## data:  inverse.summer.CTmax
## W = 0.84106, p-value = 0.01683
##nope

power.summer.CTmax<-(wild.CTmax$Ctmax)^15
hist(power.summer.CTmax)

qqnorm(power.summer.CTmax)
qqline(power.summer.CTmax)

shapiro.test(power.summer.CTmax)
## 
##  Shapiro-Wilk normality test
## 
## data:  power.summer.CTmax
## W = 0.9497, p-value = 0.5561
plot(power.summer.CTmax~wild.CTmax$Ctmax)

##this works but is not intuitive
##consider non-parametric? Though I don't know how to include mass as a covariate in a non-parametric GLM

kruskal.test(wild.CTmax$Ctmax~wild.CTmax$Class)
## 
##  Kruskal-Wallis rank sum test
## 
## data:  wild.CTmax$Ctmax by wild.CTmax$Class
## Kruskal-Wallis chi-squared = 0.11111, df = 1, p-value = 0.7389
boxplot(wild.CTmax$Ctmax~wild.CTmax$Class)

##probably un-equal variance

male.CTmax<-(wild.CTmax[c(1,3:7,12:14),5])
female.CTmax<-(wild.CTmax[c(2,8:11),5])
var.test(male.CTmax,female.CTmax)
## 
##  F test to compare two variances
## 
## data:  male.CTmax and female.CTmax
## F = 7.3538, num df = 8, denom df = 4, p-value = 0.07122
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##   0.8189437 37.1558984
## sample estimates:
## ratio of variances 
##           7.353771
##p=0.07122. moving forward with assumption of equal variance
##using transformed data (to 15th power)
##model looking at mass and class
summer.CTmax.glm1<-glm(power.summer.CTmax~wild.CTmax$Class+wild.CTmax$Mass)
summary(summer.CTmax.glm1)
## 
## Call:
## glm(formula = power.summer.CTmax ~ wild.CTmax$Class + wild.CTmax$Mass)
## 
## Deviance Residuals: 
##        Min          1Q      Median          3Q         Max  
## -1.936e+24  -6.299e+23  -1.394e+23   9.629e+23   1.796e+24  
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        7.097e+24  8.288e+23   8.563  3.4e-06 ***
## wild.CTmax$ClassM  9.030e+23  7.549e+23   1.196  0.25675    
## wild.CTmax$Mass   -3.676e+23  9.913e+22  -3.708  0.00345 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for gaussian family taken to be 1.4854e+48)
## 
##     Null deviance: 3.7076e+49  on 13  degrees of freedom
## Residual deviance: 1.6339e+49  on 11  degrees of freedom
## AIC: 1597.2
## 
## Number of Fisher Scoring iterations: 2

mass has an effect on CTmax (p=0.00345), class does not (p=.2568).

##Negative correlation between mass and CTmax
scatter.smooth(wild.CTmax$Ctmax~wild.CTmax$Mass)

##largest individuals are clearly males. Was there an outlier?
interaction.plot(wild.CTmax$Mass,wild.CTmax$Class,wild.CTmax$Ctmax,type="b")

## a 40C CTmax doesnt seem normal. Checking excel file...