set.seed(123)
data = expand.grid(x=1:10, y=1:10)
data$rto = rnorm(100, 3, 0.3)
data$rto = sort(data$rto) + runif(100, 0, 0.1)
data$mo = rnorm(100, 2.5, 0.1)
data$mo = sort(data$mo) + runif(100, 0, 0.1)
library(ggplot2)
g1 = ggplot(data)+
aes(x,y, fill=rto)+
geom_tile()
g2 = ggplot(data)+
aes(x,y, fill=mo)+
geom_tile()
gridExtra::grid.arrange(g1, g2, nrow=1)
plot(x = data$mo,
y = data$rto,
pch=16)
\[\hat{y} = \beta_0 + \beta_1x + \epsilon\]
mod1 = lm(rto ~ mo, data)
summary(mod1)
##
## Call:
## lm(formula = rto ~ mo, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34934 -0.05963 -0.00802 0.06565 0.21694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.47622 0.24748 -14.05 <2e-16 ***
## mo 2.56595 0.09685 26.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09624 on 98 degrees of freedom
## Multiple R-squared: 0.8775, Adjusted R-squared: 0.8762
## F-statistic: 702 on 1 and 98 DF, p-value: < 2.2e-16
plot(data$mo, data$rto, pch=16)
abline(mod1, col = "red", lwd=3)
\[\widehat{RTO} = 3.48 + 2.57MO\] # INTERPRETACIÓN
RTO: -3.48 + 2.57
Intercepto no tiene sentido práctico
El valor de la MO (pendiente) (valor 2.57) quiere decir que por cada unidad que aumente la MO, el valor de rendimiento crece en 2.57
El p.value de la pendiente es menor al 5%. Se rechaza la hipotesis nula de que la pendiente es igual a 0
R^2 ajustado: 87,62%, cuando este R cuadrado supera el 70% nos indica que es un buen modelo
# Normalidad de residuos
## Se espera que los residuos tengan comportamiento normal
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.98357, p-value = 0.2495
# Homosedasticidad
plot(mod1$residuals, pch = 16)
Es una combinación entre regresión y analisis de varianza. Cuando se tienen covariables se hace un ANCOVA: ANÁLISIS DE COVARIANZA
# FSCA = FACTORIAL SIMPLE COMPLETAMENTE ALEATORIZADO
set.seed(123)
data = expand.grid(x=1:10, y=1:10)
data$rto = rnorm(100, 3, 0.3)
data$rto = sort(data$rto) + runif(100, 0, 0.1)
data$mo = rnorm(100, 2.5, 0.1)
data$mo = sort(data$mo) + runif(100, 0, 0.1)
data$trt = gl(4, 25, 100,
c("s0", "sf", "si", "sfi"))
data
## x y rto mo trt
## 1 1 1 2.331122 2.399377 s0
## 2 2 1 2.506251 2.404943 s0
## 3 3 1 2.554129 2.397630 s0
## 4 4 1 2.586877 2.379526 s0
## 5 5 1 2.660638 2.410530 s0
## 6 6 1 2.708506 2.392674 s0
## 7 7 1 2.670194 2.429978 s0
## 8 8 1 2.687383 2.431847 s0
## 9 9 1 2.680132 2.480773 s0
## 10 10 1 2.695680 2.424533 s0
## 11 1 2 2.727857 2.440139 s0
## 12 2 2 2.717370 2.403335 s0
## 13 3 2 2.713824 2.484451 s0
## 14 4 2 2.761865 2.438728 s0
## 15 5 2 2.786099 2.474339 s0
## 16 6 2 2.857325 2.427638 s0
## 17 7 2 2.826777 2.458925 s0
## 18 8 2 2.834492 2.498158 s0
## 19 9 2 2.876039 2.427405 s0
## 20 10 2 2.903514 2.503355 s0
## 21 1 3 2.840741 2.455981 s0
## 22 2 3 2.916033 2.462614 s0
## 23 3 3 2.904697 2.458905 s0
## 24 4 3 2.901885 2.432015 s0
## 25 5 3 2.854587 2.481421 s0
## 26 6 3 2.892213 2.501424 sf
## 27 7 3 2.905947 2.529207 sf
## 28 8 3 2.916029 2.443091 sf
## 29 9 3 2.936128 2.541469 sf
## 30 10 3 2.970703 2.472891 sf
## 31 1 4 2.947694 2.542307 sf
## 32 2 4 2.931644 2.518541 sf
## 33 3 4 2.954246 2.494774 sf
## 34 4 4 2.908068 2.533270 sf
## 35 5 4 2.934297 2.456325 sf
## 36 6 4 2.951194 2.528067 sf
## 37 7 4 2.934343 2.490756 sf
## 38 8 4 3.012483 2.541239 sf
## 39 9 4 2.946235 2.512120 sf
## 40 10 4 3.012611 2.489996 sf
## 41 1 5 2.988537 2.542851 sf
## 42 2 5 3.000839 2.472799 sf
## 43 3 5 2.954795 2.548246 sf
## 44 4 5 3.021638 2.495217 sf
## 45 5 5 3.006176 2.547271 sf
## 46 6 5 3.053882 2.569901 sf
## 47 7 5 3.027033 2.484519 sf
## 48 8 5 3.088372 2.563898 sf
## 49 9 5 3.098469 2.560506 sf
## 50 10 5 3.088572 2.520225 sf
## 51 1 6 3.046874 2.514855 si
## 52 2 6 3.055384 2.508549 si
## 53 3 6 3.096461 2.546841 si
## 54 4 6 3.065538 2.579774 si
## 55 5 6 3.099119 2.547569 si
## 56 6 6 3.132920 2.555289 si
## 57 7 6 3.081589 2.570352 si
## 58 8 6 3.112059 2.558964 si
## 59 9 6 3.123153 2.557514 si
## 60 10 6 3.177869 2.510090 si
## 61 1 7 3.192105 2.514229 si
## 62 2 7 3.196142 2.604860 si
## 63 3 7 3.181311 2.598373 si
## 64 4 7 3.210601 2.541562 si
## 65 5 7 3.171876 2.589395 si
## 66 6 7 3.185591 2.595199 si
## 67 7 7 3.164188 2.570573 si
## 68 8 7 3.169195 2.613661 si
## 69 9 7 3.140277 2.587923 si
## 70 10 7 3.199636 2.625410 si
## 71 1 8 3.251623 2.597230 si
## 72 2 8 3.166805 2.634257 si
## 73 3 8 3.182590 2.607292 si
## 74 4 8 3.209734 2.594033 si
## 75 5 8 3.283625 2.649278 si
## 76 6 8 3.283925 2.627203 sfi
## 77 7 8 3.331177 2.601401 sfi
## 78 8 8 3.293122 2.563635 sfi
## 79 9 8 3.258775 2.620330 sfi
## 80 10 8 3.328322 2.619765 sfi
## 81 1 9 3.344397 2.663273 sfi
## 82 2 9 3.290391 2.658189 sfi
## 83 3 9 3.337710 2.664184 sfi
## 84 4 9 3.324220 2.625255 sfi
## 85 5 9 3.313467 2.685092 sfi
## 86 6 9 3.368641 2.620405 sfi
## 87 7 9 3.351135 2.639166 sfi
## 88 8 9 3.384977 2.677663 sfi
## 89 9 9 3.372687 2.720986 sfi
## 90 10 9 3.443173 2.684592 sfi
## 91 1 10 3.437970 2.683900 sfi
## 92 2 10 3.420653 2.655684 sfi
## 93 3 10 3.462132 2.761995 sfi
## 94 4 10 3.547827 2.679593 sfi
## 95 5 10 3.543037 2.699073 sfi
## 96 6 10 3.596180 2.783606 sfi
## 97 7 10 3.634288 2.750558 sfi
## 98 8 10 3.625385 2.746595 sfi
## 99 9 10 3.660591 2.809314 sfi
## 100 10 10 3.736083 2.827301 sfi
\[H_0: \theta = 0 \]
\[H_0: \mu_{s0} = \mu_{sf} = \mu_{si} = \mu_{sfi} \]
\[y_{ij} = \mu + \tau_i + \theta(x_{ij}-\bar{x}) + \epsilon_{ij}\]
mod2 = aov(rto ~ mo + trt, data)
summary(mod2)
## Df Sum Sq Mean Sq F value Pr(>F)
## mo 1 6.502 6.502 989.51 < 2e-16 ***
## trt 3 0.283 0.094 14.38 8.45e-08 ***
## Residuals 95 0.624 0.007
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
boxplot(rto ~ trt, data)
** P value de los tratamientos fue de 8.45e-08 < 0.05, por tanto rechazo que los tratamientos son iguales.
# Normalidad
shapiro.test(mod2$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod2$residuals
## W = 0.96113, p-value = 0.004843
# Homocedasticidad
hist(mod2$residuals)
boxplot(mod2$residuals, pch = 16)
# Revisando posible dato atipico
boxplot(rto ~ trt, data, pch = 16)
which.min(data$rto)
## [1] 1
data[which.min(data$rto), ]
## x y rto mo trt
## 1 1 1 2.331122 2.399377 s0
library(outliers)
grubbs.test(mod2$residuals)
##
## Grubbs test for one outlier
##
## data: mod2$residuals
## G.1 = 4.36012, U = 0.80603, p-value = 0.0002265
## alternative hypothesis: lowest value -0.346216964518422 is an outlier
# Medidas remediables para el atipico
# 1. Imputar
# 2. Eliminar
# 3. Repetir experimento para esa parcela
# Media por tratamiento
med_trt = tapply(data$rto,
data$trt,
mean)
med_trt
## s0 sf si sfi
## 2.740161 2.979286 3.155851 3.427611
data2 = data
data2$rto[1] = med_trt["0"]
head(data2)
## x y rto mo trt
## 1 1 1 NA 2.399377 s0
## 2 2 1 2.506251 2.404943 s0
## 3 3 1 2.554129 2.397630 s0
## 4 4 1 2.586877 2.379526 s0
## 5 5 1 2.660638 2.410530 s0
## 6 6 1 2.708506 2.392674 s0
# De nuevo ANCOVA en el dato atipico imoutado
boxplot(rto ~ trt, data2)
mod3 = aov(rto ~ mo + trt, data2)
summary(mod3)
## Df Sum Sq Mean Sq F value Pr(>F)
## mo 1 6.068 6.068 1144.7 < 2e-16 ***
## trt 3 0.283 0.094 17.8 3.13e-09 ***
## Residuals 94 0.498 0.005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 1 observation deleted due to missingness
# Normalidad residuales para el modelo con dato imputado
shapiro.test(mod3$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod3$residuals
## W = 0.98876, p-value = 0.5729