Asignación
. Poner como faltantes NA a los 5 datos mas extremos 1:11:58 . Con la libreria mice rellenar los datos faltantes . volver a ajustar el modelo lineal y comparar el \(R^2\)
#Análisis de covarianza
set.seed(123)
id_parcela = 1:36
xy = expand.grid(x=1:6, y=1:6)
# Fertilizante Foliar
ff = gl(4, 9, 36, c('D0','D5','D10','D15'))
ff = sample(ff, 36, F)
plot(xy, pch = 15, asp=1, col = ff)
set.seed(123)
# Covariable
MO = sort.int(rnorm(36, 3, 0.6), 6)
# Respuesta
prot = sort.int(rnorm(36, 6, 0.8), 6)
prot
## [1] 4.152665 4.760998 5.101513 4.987683 5.142567 5.185140 5.695623 5.444234
## [9] 5.733434 5.755230 5.677692 5.626676 5.598141 5.607175 5.819383 5.977163
## [17] 5.965704 7.094882 6.202655 7.213176 5.933305 6.467691 6.099083 6.172753
## [25] 6.303712 6.623972 6.966370 7.735165 5.833666 6.242823 6.358568 6.042403
## [33] 6.737814 7.640068 5.950471 6.443134
library(readxl)
Actividad4_74 <- read_excel("C:/Users/juanc/OneDrive/Escritorio/2021-1/Suelos/disenio/Actividad4.74.xlsx")
Actividad4_74
## # A tibble: 36 x 1
## Proteina
## <dbl>
## 1 NA
## 2 4.76
## 3 5.10
## 4 4.99
## 5 5.14
## 6 5.19
## 7 5.70
## 8 5.44
## 9 5.73
## 10 5.76
## # ... with 26 more rows
plot(xy, pch = 15, asp=1, col = ff, cex = MO, ylim = c(0.7, 6.3))
points(xy, pch = 16, cex = prot/3, col = 'white')
Análisis exploratorio \[[y_i = \beta_0 + \beta_1x_i+\epsilon_i]\] \[[P_i = \beta_0 + \beta_1 MO_i+\epsilon_i]\] \[[E[P_i|MO_i] = \beta_0 + \beta_1 MO_i]\] \[[\widehat{P_i} = \beta_0 + \beta_1 MO_i]\] Ajustando el modelo de recta con: Minimos Cuadrados Oridnarios
mod1 = lm(prot~MO)
summary(mod1)
##
## Call:
## lm(formula = prot ~ MO)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2232 -0.3775 -0.0395 0.2418 1.4580
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.5876 0.5783 6.204 4.68e-07 ***
## MO 0.7980 0.1875 4.255 0.000155 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.625 on 34 degrees of freedom
## Multiple R-squared: 0.3475, Adjusted R-squared: 0.3283
## F-statistic: 18.11 on 1 and 34 DF, p-value: 0.0001548
coef = round(mod1$coefficients, 2)
plot(MO, prot, pch = 16, cex = 0.1, xlim=c(0,4.2), ylim = c(3,8))
abline(mod1, col ='blue')
points(0, coef[1], pch = 16, col = 'red')
points(MO[which.min(prot)], min(prot), col = 'blue')
points(MO[which.max(prot)], max(prot), col = 'blue')
points(3.5, 3.59 + 0.8 * 3.5, col = 'blue', pch = 16)
abline(v = c(2, 3), col = 'red')
segments(2,5.19, 3,5.19, col = 'red')
text(3,5.5, expression(Delta * y), pos = 4)
text(2.5,5, expression(Delta * x))
atan(coef[2])*180/pi
## MO
## 38.65981
# Atípicos multivariantes
library(mvoutlier)
## Warning: package 'mvoutlier' was built under R version 4.0.5
## Loading required package: sgeostat
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
## sROC 0.1-2 loaded
corr.plot(MO, prot)
## $cor.cla
## [1] 0.5895007
##
## $cor.rob
## [1] 0.7768588
chisq.plot(cbind(MO, prot))
## Remove outliers with left-click, stop with right-click on plotting device
## $outliers
## NULL
color.plot(cbind(MO, prot))
## $outliers
## [1] TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE TRUE FALSE TRUE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE TRUE FALSE FALSE FALSE FALSE FALSE TRUE FALSE FALSE
##
## $md
## [1] 2.8003443 1.6226401 1.7049265 1.2771635 1.0727430 0.9986864 0.6868258
## [8] 0.6331154 0.5958550 0.3915004 0.6452692 1.0491439 1.1632570 0.7793705
## [15] 0.6205014 0.1649309 0.1252440 3.0416731 0.9590475 3.5119837 0.8253402
## [22] 1.5391063 0.4675643 0.4513191 0.6171409 1.1040143 1.7066404 3.3701653
## [29] 1.8142731 1.2480420 0.7016250 1.9215895 1.2470030 3.1138448 2.3856867
## [36] 1.6308059
##
## $euclidean
## [1] 1.0266015 0.9848566 1.2752560 1.6517757 1.7420698 1.8640326 2.5780633
## [8] 2.3592633 2.6861066 2.8326097 3.2105531 3.3672144 3.3819109 3.1380249
## [15] 2.8199948 3.4874196 3.4132617 4.3736091 3.4290535 4.4278291 2.9343058
## [22] 3.7152824 3.8664018 3.9018748 3.9943804 4.4991595 4.9277078 5.7119084
## [29] 4.2876812 4.6121044 4.2454007 4.7343716 4.7500567 5.6448866 4.8837781
## [36] 5.1639279
prot2 = prot[order(prot)]
MO2 = MO[order(prot)]
prot2[33:36] = coef[1] + coef[2] * MO[33:36]
mod2 = lm(prot2 ~ MO)
summary(mod2)
##
## Call:
## lm(formula = prot2 ~ MO)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.97961 -0.10780 -0.00047 0.12864 0.49014
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.90598 0.25156 11.55 2.56e-13 ***
## MO 0.99345 0.08158 12.18 5.98e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2719 on 34 degrees of freedom
## Multiple R-squared: 0.8135, Adjusted R-squared: 0.808
## F-statistic: 148.3 on 1 and 34 DF, p-value: 5.976e-14
library(mice)
## Warning: package 'mice' was built under R version 4.0.5
##
## Attaching package: 'mice'
## The following object is masked from 'package:stats':
##
## filter
## The following objects are masked from 'package:base':
##
## cbind, rbind
input_data= Actividad4_74
input_data$Proteina=as.factor(input_data$Proteina)
summary(input_data)
## Proteina
## 4.760998: 1
## 4.987683: 1
## 5.101513: 1
## 5.142567: 1
## 5.18514 : 1
## (Other) :26
## NA's : 5
input_data$Proteina<-sapply(input_data$Proteina, function(x) as.numeric(gsub(",", ".", x)))
input_data$Proteina[which(is.na(input_data$Proteina))]= mean(input_data$Proteina, na.rm = T)
Data_normalidad=shapiro.test(input_data$Proteina)
Data_normalidad
##
## Shapiro-Wilk normality test
##
## data: input_data$Proteina
## W = 0.97892, p-value = 0.7089
ifelse(Data_normalidad$p.value<0.05,"Datos no normal","Normalidad")
## [1] "Normalidad"
#Pruebas de varianza
Prueba_varianzas= bartlett.test(input_data$Proteina~ff)
Prueba_varianzas
##
## Bartlett test of homogeneity of variances
##
## data: input_data$Proteina by ff
## Bartlett's K-squared = 5.2644, df = 3, p-value = 0.1534
ifelse(Prueba_varianzas$p.value<0.05,"Heterocedasticidad","Homocedasticidad")
## [1] "Homocedasticidad"
mod1 = lm(input_data$Proteina~MO)
summary(mod1)
##
## Call:
## lm(formula = input_data$Proteina ~ MO)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53841 -0.30422 0.05078 0.20115 0.80816
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1243 0.3337 12.361 3.95e-14 ***
## MO 0.5807 0.1082 5.367 5.74e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3606 on 34 degrees of freedom
## Multiple R-squared: 0.4586, Adjusted R-squared: 0.4427
## F-statistic: 28.8 on 1 and 34 DF, p-value: 5.739e-06
coef = round(mod1$coefficients, 2)
plot(MO, input_data$Proteina, pch = 16, cex = 0.1, xlim=c(0,4.2), ylim = c(3,8))
abline(mod1, col ='blue')
points(0, coef[1], pch = 16, col = 'red')
points(MO[which.min(input_data$Proteina)], min(input_data$Proteina), col = 'blue')
points(MO[which.max(input_data$Proteina)], max(input_data$Proteina), col = 'blue')
points(3.5, 3.59 + 0.8 * 3.5, col = 'blue', pch = 16)
abline(v = c(2, 3), col = 'red')
segments(2,5.19, 3,5.19, col = 'red')
text(3,5.5, expression(Delta * y), pos = 4)
text(2.5,5, expression(Delta * x))
El \(R^2\) aumenta algunas unidades despues de autocompletar los datos por la libreria mice
Los órdenes de las variables son importantes al calcular ANCOVA. Primero desea eliminar el efecto de la covariable, es decir, desea controlarla, antes de ingresar su variable principal o interés.
¡La covariable va primero (y no hay interacción)!. Si no hace esto en orden, obtendrá resultados diferentes
En otras palabras van primero las covariables y despues el factorvan primero las covariables y despues el factor
Bibliografia