set.seed(123)
data = expand.grid(x=1:10, y=1:10)
data$rto = rnorm(100, 3, 0.3)
data$rto = sort(data$rto) + runif(100, 0, 0.1)
data$mo = rnorm(100, 2.5, 0.1)
data$mo = sort(data$mo) + runif(100, 0, 0.1)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.2
g1 = ggplot(data)+
aes(x,y, fill=rto)+
geom_tile()
g2 = ggplot(data)+
aes(x,y, fill=mo)+
geom_tile()
gridExtra::grid.arrange(g1, g2, nrow=1)
plot(x = data$mo, y = data$rto, pch=16)
\[\hat{y} = \beta_o + \beta_1x + \epsilon\]
Técnica que me permite hayar los interceptos y la pendiente de la recta para poder determinar el error.
mod1 = lm(rto ~ mo, data)
summary(mod1)
##
## Call:
## lm(formula = rto ~ mo, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34934 -0.05963 -0.00802 0.06565 0.21694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.47622 0.24748 -14.05 <2e-16 ***
## mo 2.56595 0.09685 26.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09624 on 98 degrees of freedom
## Multiple R-squared: 0.8775, Adjusted R-squared: 0.8762
## F-statistic: 702 on 1 and 98 DF, p-value: < 2.2e-16
plot(data$mo, data$rto, pch=16)
abline(mod1, col='red', lwd=3)
\[\widehat{RTO} = -3.48 + 2.57MO\] \[\hat{\beta} = 2.57 \text{ (Significa cuanto mejora el rendimiento)}\]
\[R^2 - ajustado = 87.62 \% \text{ (Indica la tendencia de los datos.)}\\ R^2 > 70\% \text{ (Es bueno, indica una tendencia positiva.)}\]
# Normalidad de residuos.
# Se espera que los residuos tengan comportamiento normal.
shapiro.test(mod1$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod1$residuals
## W = 0.98357, p-value = 0.2495
# Homosedastiicidad.
plot(mod1$residuals, pch = 16)
# Mezcla de regresión y análisis de varianza.
# Factorial simple con arreglo completamente al azar.
set.seed(123)
data = expand.grid(x=1:10, y=1:10)
data$rto = rnorm(100, 3, 0.3)
data$rto = sort(data$rto) + runif(100, 0, 0.1)
data$mo = rnorm(100, 2.5, 0.1)
data$mo = sort(data$mo) + runif(100, 0, 0.1)
data$trt = gl(4, 25, 100,
c('s0', 'sf', 'si', 'sfi'))
data
\[y_{ij} = \mu + \tau_i + \theta(x_{ij} - \bar{x}) + \epsilon_{ij}\]
\[H_0: \theta = 0\]
\[H_0: \mu_{s0} = \mu_{sf} = \mu_{si} =\mu_{sfi}\]
mod2 = aov(rto ~ mo + trt, data)
summary(mod1)
##
## Call:
## lm(formula = rto ~ mo, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34934 -0.05963 -0.00802 0.06565 0.21694
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.47622 0.24748 -14.05 <2e-16 ***
## mo 2.56595 0.09685 26.49 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09624 on 98 degrees of freedom
## Multiple R-squared: 0.8775, Adjusted R-squared: 0.8762
## F-statistic: 702 on 1 and 98 DF, p-value: < 2.2e-16
boxplot(rto ~ trt, data)
# Normalidad.
shapiro.test(mod2$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod2$residuals
## W = 0.96113, p-value = 0.004843
hist(mod2$residuals)
boxplot(mod2$residuals, pch=16)
# Revisando posible dato atípico.
boxplot(rto ~ trt, data, pch = 16)
which.min(data$rto)
## [1] 1
data[which.min(data$rto), ]
#libreria que sirve para detectar a típicos.
library(outliers)
grubbs.test(mod2$residuals)
##
## Grubbs test for one outlier
##
## data: mod2$residuals
## G.1 = 4.36012, U = 0.80603, p-value = 0.0002265
## alternative hypothesis: lowest value -0.346216964518421 is an outlier
# 2.33 es una medición extraña.
# Camino remediales para el atípico.
# 1. Imputar.
# 2. Eliminar.
# 3. Repetir experimento para esa parcela.
#Media de cada tratamiento.
med_trt = tapply(data$rto,
data$trt,
mean)
med_trt
## s0 sf si sfi
## 2.740161 2.979286 3.155851 3.427611
data2 = data
data2$rto[1] = med_trt['s0']
head(data2)
# Corriendo nuevamente ANCOVA en el dato atípico imputado.
boxplot(rto ~ trt, data2)
mod3 = aov(rto ~ mo + trt, data2)
summary(mod3)
## Df Sum Sq Mean Sq F value Pr(>F)
## mo 1 6.182 6.182 1174.12 < 2e-16 ***
## trt 3 0.283 0.094 17.93 2.65e-09 ***
## Residuals 95 0.500 0.005
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Normalidad residuales para el modelo con dato imputado.
shapiro.test(mod3$residuals)
##
## Shapiro-Wilk normality test
##
## data: mod3$residuals
## W = 0.98906, p-value = 0.5891