Ancova_conteos

Packages

library(tidyverse)

## Warning: package 'ggplot2' was built under R version 4.0.4

library(dplyr) 
library(ggpubr)
library(rcompanion)
library(car)

DATA “species.txt”

EL folder de “Crawley” se puede encontrar aquí: Google-Drive

sp <- read.table("Crawley/species.txt", header= TRUE)
names(sp)

## [1] "pH"      "Biomass" "Species"

head(sp)

##     pH   Biomass Species
## 1 high 0.4692972      30
## 2 high 1.7308704      39
## 3 high 2.0897785      44
## 4 high 3.9257871      35
## 5 high 4.3667927      25
## 6 high 5.4819747      29

tail(sp)

##     pH  Biomass Species
## 85 low 1.507954       9
## 86 low 2.325963       8
## 87 low 2.995706      12
## 88 low 3.538199      14
## 89 low 4.364541       7
## 90 low 4.870508       3

Distribution

Ho: Normal distribution

ggdensity(sp$Species)

ggplot(data=sp, aes(x=Species)) + geom_histogram(aes(y=..density..), binwidth=.5,colour="black", fill="gray") + geom_density(alpha=.3, fill="#FF6666")

shapiro.test(sp$Species)        # Ho accepted

## 
##  Shapiro-Wilk normality test
## 
## data:  sp$Species
## W = 0.97805, p-value = 0.1317

Note: Response variable is counts.

Plot

p <- ggplot(aes(x = Biomass, y = Species, color = pH), data= sp)
p + geom_point() + 
  geom_smooth(method = "lm")             # ŷ from lm()

## `geom_smooth()` using formula 'y ~ x'

ancova <- lm(sp$Species ~ sp$Biomass *
               sp$pH)
summary(ancova)

## 
## Call:
## lm(formula = sp$Species ~ sp$Biomass * sp$pH)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.290 -2.554 -0.124  2.208 15.677 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          40.60407    1.36701  29.703  < 2e-16 ***
## sp$Biomass           -2.80045    0.23856 -11.739  < 2e-16 ***
## sp$pHlow            -22.75667    1.83564 -12.397  < 2e-16 ***
## sp$pHmid            -11.57307    1.86926  -6.191  2.1e-08 ***
## sp$Biomass:sp$pHlow  -0.02733    0.51248  -0.053    0.958    
## sp$Biomass:sp$pHmid   0.23535    0.38579   0.610    0.543    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.818 on 84 degrees of freedom
## Multiple R-squared:  0.8531, Adjusted R-squared:  0.8444 
## F-statistic: 97.58 on 5 and 84 DF,  p-value: < 2.2e-16

*** Intercepts significantly different, not slopes
– No pH Biomass interaction*

PERO ESTOS SON CONTEOS !!!!!

los valores ajustados (de predicción) pueden ser “no realistas”.

Además, ¿Supuestos de regresión?

# RESIDUALS NOT NORMAL
hist(ancova$residuals)

shapiro.test(ancova$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  ancova$residuals
## W = 0.95839, p-value = 0.005724

Mejor basarse en errores con una distribución que corresponda más a la variable (e.g., Poisson)

Eventualmente medir la Devianza, no la varianza.

Poisson errors

“decay.txt”

dec <- read.table("Crawley/decay.txt", header =TRUE)
dec$y <- round(dec$y, 0)
head(dec);tail(dec)

##   x   y
## 1 0 125
## 2 1 100
## 3 2  70
## 4 3  83
## 5 4 100
## 6 5  66

##     x  y
## 26 25 12
## 27 26 25
## 28 27  8
## 29 28 17
## 30 29 24
## 31 30 18

p <- ggplot(aes(x = x, y = y), data= dec)
p + geom_point() + 
  geom_vline(xintercept = mean(dec$x), col=2, linetype= "dashed") +
  geom_hline(yintercept = mean(dec$y), col=2, linetype= "dashed") +
  geom_segment(aes(x=x, y= mean(y), xend= x, yend=y)) +
  geom_segment(aes(x= x, y= lm(dec$y~dec$x)$fitted.values, xend= x, yend= y), col= "green") +
  geom_smooth(method ='lm', linetype= "dashed", se= FALSE)

## `geom_smooth()` using formula 'y ~ x'

p + geom_point() +
  geom_smooth(method= "lm", se=FALSE) + 
  geom_segment(aes(x=x, y= lm(dec$y~dec$x)$fitted.values, xend= x, yend=y))

## `geom_smooth()` using formula 'y ~ x'

Poisson error

p + geom_point() + 
  geom_smooth(method= "lm", se=FALSE, col="gray") +
  geom_segment(aes(x=x, y= glm(dec$y~dec$x, family=poisson)$fitted.values, xend= x, yend=y), col= "gray") +
  geom_smooth(method ='glm',method.args = list(family = 'poisson'))

Poisson errors for Species.txt

# Poisson errors plot "Species"
p <- ggplot(aes(x = Biomass, y = Species, color = pH), data= sp)
p + geom_point() + 
  geom_smooth(method ='glm',method.args = list(family = 'poisson'))

#### Ancova design analyzed with GLMs.

# GLM "Species" ANCOVA
ancova.glm <- glm(sp$Species ~ sp$Biomass *
               sp$pH, family= poisson)
summary(ancova.glm)

## 
## Call:
## glm(formula = sp$Species ~ sp$Biomass * sp$pH, family = poisson)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4978  -0.7485  -0.0402   0.5575   3.2297  
## 
## Coefficients:
##                     Estimate Std. Error z value Pr(>|z|)    
## (Intercept)          3.76812    0.06153  61.240  < 2e-16 ***
## sp$Biomass          -0.10713    0.01249  -8.577  < 2e-16 ***
## sp$pHlow            -0.81557    0.10284  -7.931 2.18e-15 ***
## sp$pHmid            -0.33146    0.09217  -3.596 0.000323 ***
## sp$Biomass:sp$pHlow -0.15503    0.04003  -3.873 0.000108 ***
## sp$Biomass:sp$pHmid -0.03189    0.02308  -1.382 0.166954    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for poisson family taken to be 1)
## 
##     Null deviance: 452.346  on 89  degrees of freedom
## Residual deviance:  83.201  on 84  degrees of freedom
## AIC: 514.39
## 
## Number of Fisher Scoring iterations: 4

*** Biomass Vrs. pH interaction

No more R², Deviance used instead