Packages
library(googlesheets4); gs4_deauth()
library(tidyverse)
library(ggpubr)
# Growth ~ tannin
reg <- read.table("Crawley/regression.txt", header=TRUE)
names(reg)
## [1] "growth" "tannin"
summary(reg)
## growth tannin
## Min. : 2.000 Min. :0
## 1st Qu.: 3.000 1st Qu.:2
## Median : 7.000 Median :4
## Mean : 6.889 Mean :4
## 3rd Qu.:10.000 3rd Qu.:6
## Max. :12.000 Max. :8
p <- ggplot(aes(x=tannin, y=growth, col=), data= reg)
p + geom_point() +
geom_vline(xintercept = mean(reg$tannin), col=2, linetype= "dashed") +
geom_hline(yintercept = mean(reg$growth), col=2, linetype= "dashed") +
geom_segment(aes(x=tannin, y= mean(growth), xend= tannin, yend=growth))
En la figura arriba vemos las diferencias ỹ - y que elevadas al cuadrado y sumadas hacen sumatoria de cuadrados total - SST
lm()rl <- lm(growth ~ tannin , data=reg)
coef(rl)
## (Intercept) tannin
## 11.755556 -1.216667
ŷ - y (es diferente que ỹ - y).p + geom_point() +
geom_vline(xintercept = mean(reg$tannin), col=2, linetype= "dashed") +
geom_hline(yintercept = mean(reg$growth), col=2, linetype= "dashed") +
geom_segment(aes(x=tannin, y= mean(growth), xend= tannin, yend=growth)) +
geom_segment(aes(x=tannin, y= rl$fitted.values, xend= tannin, yend=growth), col= "green") +
geom_smooth(method= "lm", se=FALSE)
SSR = SST - SSE, (SSE en verde, SSR en negro). SSR representa la variación en Y que se debe a la varianza de X
p + geom_point() +
geom_smooth(method= "lm", se=FALSE) +
geom_segment(aes(x=tannin, y= rl$fitted.values, xend= tannin, yend=growth))
Y y X se asume lineal.rl$fitted.values
## 1 2 3 4 5 6 7 8
## 11.755556 10.538889 9.322222 8.105556 6.888889 5.672222 4.455556 3.238889
## 9
## 2.022222
reg$growth
## [1] 12 10 8 11 6 7 2 3 3
reg$growth - rl$fitted.values # residuals calculados "a mano"
## 1 2 3 4 5 6 7
## 0.2444444 -0.5388889 -1.3222222 2.8944444 -0.8888889 1.3277778 -2.4555556
## 8 9
## -0.2388889 0.9777778
rl$residuals
## 1 2 3 4 5 6 7
## 0.2444444 -0.5388889 -1.3222222 2.8944444 -0.8888889 1.3277778 -2.4555556
## 8 9
## -0.2388889 0.9777778
resid <- rl$residuals # residuals directos desde el modelo lineal
plot(resid ~ rl$fitted.values, pch=20, cex=2)
abline(h=0, col="gray", lty="dashed", lwd=2)
# option directa en R
plot(rl,1, pch=20, cex=2)
Arriba; la figura etiqueta a los “top tres” valores más extremos, del set de datos.
shapiro.test(reg$growth) # Normalidad de la variable respuesta
##
## Shapiro-Wilk normality test
##
## data: reg$growth
## W = 0.9291, p-value = 0.4728
rl$fitted.values
## 1 2 3 4 5 6 7 8
## 11.755556 10.538889 9.322222 8.105556 6.888889 5.672222 4.455556 3.238889
## 9
## 2.022222
reg$growth
## [1] 12 10 8 11 6 7 2 3 3
reg$growth - rl$fitted.values # residuals
## 1 2 3 4 5 6 7
## 0.2444444 -0.5388889 -1.3222222 2.8944444 -0.8888889 1.3277778 -2.4555556
## 8 9
## -0.2388889 0.9777778
rl$residuals
## 1 2 3 4 5 6 7
## 0.2444444 -0.5388889 -1.3222222 2.8944444 -0.8888889 1.3277778 -2.4555556
## 8 9
## -0.2388889 0.9777778
resid <- rl$residuals # residuals
hist(resid)
El supuesto más importante de la regresión lineal: Normalidad de los residuos
shapiro.test( rl$residuals)
##
## Shapiro-Wilk normality test
##
## data: rl$residuals
## W = 0.98794, p-value = 0.9926
plot(rl, 3) # Buscamos siempre lo más horizontal
summary(rl)
##
## Call:
## lm(formula = growth ~ tannin, data = reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4556 -0.8889 -0.2389 0.9778 2.8944
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.7556 1.0408 11.295 9.54e-06 ***
## tannin -1.2167 0.2186 -5.565 0.000846 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.693 on 7 degrees of freedom
## Multiple R-squared: 0.8157, Adjusted R-squared: 0.7893
## F-statistic: 30.97 on 1 and 7 DF, p-value: 0.0008461
aov()SST <- var(reg$growth) * 8; SST # SST
## [1] 108.8889
SSE <- sum( (reg$growth -
rl$fitted.values)^2 ) ; SSE # SSE
## [1] 20.07222
SSR <- SST - SSE ; SSR # SSR
## [1] 88.81667
aov <- aov(growth ~ tannin , data=reg)
summary(aov)
## Df Sum Sq Mean Sq F value Pr(>F)
## tannin 1 88.82 88.82 30.97 0.000846 ***
## Residuals 7 20.07 2.87
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
aov()SSR/SST # R-squared
## [1] 0.8156633