Packages

library(tidyverse)
library(dplyr) 
library(ggpubr)
library(rcompanion)
library(car)
library(googlesheets4)
library(plotly)

DATA “species.txt”

gs4_deauth()
ss= "https://docs.google.com/spreadsheets/d/18J313ymQP0xj3Lf3P4WHnuqXzyUEGJGW2whdJgmUp0c/edit?usp=sharing"
hoja= "species"
rango = "B2:D92"
sp <- read_sheet(ss,
                    sheet= hoja,
                    range= rango,
                    col_names= TRUE
                    )
## ✔ Reading from "Deviance_Ancova_Reg_Glm".
## ✔ Range ''species'!B2:D92'.
names(sp)
## [1] "pH"      "Biomass" "Species"
sp$pH <- as.factor(sp$pH)
str(sp)
## tibble [90 × 3] (S3: tbl_df/tbl/data.frame)
##  $ pH     : Factor w/ 3 levels "a.low","b.mid",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ Biomass: num [1:90] 0.469 1.731 2.09 3.926 4.367 ...
##  $ Species: num [1:90] 30 39 44 35 25 29 23 18 19 12 ...
sp
## # A tibble: 90 × 3
##    pH     Biomass Species
##    <fct>    <dbl>   <dbl>
##  1 c.high   0.469      30
##  2 c.high   1.73       39
##  3 c.high   2.09       44
##  4 c.high   3.93       35
##  5 c.high   4.37       25
##  6 c.high   5.48       29
##  7 c.high   6.68       23
##  8 c.high   7.51       18
##  9 c.high   8.13       19
## 10 c.high   9.57       12
## # ℹ 80 more rows
Distribution

Ho: Normal distribution

ggdensity(sp$Species)

ggplot(data=sp, aes(x=Species)) + geom_histogram(aes(y=after_stat(density)), binwidth=.5,colour="black", fill="gray") + geom_density(alpha=.3, fill="#FF6666")

shapiro.test(sp$Species)        # Ho accepted
## 
##  Shapiro-Wilk normality test
## 
## data:  sp$Species
## W = 0.97805, p-value = 0.1317

Note: Response variable is counts.

Plot
p <- ggplot(aes(x = Biomass, y = Species, color = pH), data= sp)
p <- p + geom_point() + 
  geom_smooth(method = "lm")             # ŷ from lm()

ggplotly(p)
## `geom_smooth()` using formula = 'y ~ x'
ancova <- lm(sp$Species ~ sp$Biomass *
               sp$pH)
summary(ancova)
## 
## Call:
## lm(formula = sp$Species ~ sp$Biomass * sp$pH)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.290 -2.554 -0.124  2.208 15.677 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)            17.84740    1.22510  14.568  < 2e-16 ***
## sp$Biomass             -2.82778    0.45357  -6.235 1.74e-08 ***
## sp$pHb.mid             11.18359    1.76813   6.325 1.17e-08 ***
## sp$pHc.high            22.75667    1.83564  12.397  < 2e-16 ***
## sp$Biomass:sp$pHb.mid   0.26268    0.54557   0.481    0.631    
## sp$Biomass:sp$pHc.high  0.02733    0.51248   0.053    0.958    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.818 on 84 degrees of freedom
## Multiple R-squared:  0.8531, Adjusted R-squared:  0.8444 
## F-statistic: 97.58 on 5 and 84 DF,  p-value: < 2.2e-16

*** Intercepts significantly different, not slopes
No pH Biomass interaction*

PERO ESTOS SON CONTEOS !!!!!

los valores ajustados (de predicción) pueden ser “no realistas”.

Además, ¿Supuestos de regresión?

# RESIDUALS NOT NORMAL
hist(ancova$residuals)

shapiro.test(ancova$residuals)
## 
##  Shapiro-Wilk normality test
## 
## data:  ancova$residuals
## W = 0.95839, p-value = 0.005724

Mejor basarse en errores con una distribución que corresponda más a la variable (e.g., Poisson)

Eventualmente medir la Devianza, no la varianza.


Poisson errors

“decay.txt”
dec <- read.table("Crawley/decay.txt", header =TRUE)
dec$y <- round(dec$y, 0)
head(dec);tail(dec)
##   x   y
## 1 0 125
## 2 1 100
## 3 2  70
## 4 3  83
## 5 4 100
## 6 5  66
##     x  y
## 26 25 12
## 27 26 25
## 28 27  8
## 29 28 17
## 30 29 24
## 31 30 18
p <- ggplot(aes(x = x, y = y), data= dec)
p + geom_point() + 
  geom_vline(xintercept = mean(dec$x), col=2, linetype= "dashed") +
  geom_hline(yintercept = mean(dec$y), col=2, linetype= "dashed") +
  geom_segment(aes(x=x, y= mean(y), xend= x, yend=y)) +
  geom_segment(aes(x= x, y= lm(dec$y~dec$x)$fitted.values, xend= x, yend= y), col= "green") +
  geom_smooth(method ='lm', linetype= "dashed", se= FALSE)
## `geom_smooth()` using formula = 'y ~ x'

p + geom_point() +
  geom_smooth(method= "lm", se=FALSE) + 
  geom_segment(aes(x=x, y= lm(dec$y~dec$x)$fitted.values, xend= x, yend=y))
## `geom_smooth()` using formula = 'y ~ x'

Poisson error
p + geom_point() + 
  geom_smooth(method= "lm", se=FALSE, col="gray") +
  geom_segment(aes(x=x, y= glm(dec$y~dec$x, family=poisson)$fitted.values, xend= x, yend=y), col= "gray") +
  geom_smooth(method ='glm',method.args = list(family = 'poisson'))

Poisson errors for Species.txt

# Poisson errors plot "Species"
p <- ggplot(aes(x = Biomass, y = Species, color = pH), data= sp)
p + geom_point() + 
  geom_smooth(method ='glm',method.args = list(family = 'poisson'))

#### Ancova design analyzed with GLMs.

# GLM "Species" ANCOVA
ancova.glm <- glm(sp$Species ~ sp$Biomass *
               sp$pH, family= poisson)
summary(ancova.glm)
## 
## Call:
## glm(formula = sp$Species ~ sp$Biomass * sp$pH, family = poisson)
## 
## Coefficients:
##                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)             2.95255    0.08240  35.833  < 2e-16 ***
## sp$Biomass             -0.26216    0.03803  -6.893 5.47e-12 ***
## sp$pHb.mid              0.48411    0.10723   4.515 6.34e-06 ***
## sp$pHc.high             0.81557    0.10284   7.931 2.18e-15 ***
## sp$Biomass:sp$pHb.mid   0.12314    0.04270   2.884 0.003927 ** 
## sp$Biomass:sp$pHc.high  0.15503    0.04003   3.873 0.000108 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 452.346  on 89  degrees of freedom
## Residual deviance:  83.201  on 84  degrees of freedom
## AIC: 514.39
## 
## Number of Fisher Scoring iterations: 4

*** Biomass Vrs. pH interaction

No more R2, Deviance used instead