Asignación

. Poner como faltantes NA a los 5 datos mas extremos 1:11:58 . Con la libreria mice rellenar los datos faltantes . volver a ajustar el modelo lineal y comparar el \(R^2\)

#Análisis de covarianza
set.seed(123)

id_parcela = 1:36

xy = expand.grid(x=1:6, y=1:6)

# Fertilizante Foliar
ff = gl(4, 9, 36, c('D0','D5','D10','D15'))
ff = sample(ff, 36, F)

plot(xy, pch = 15, asp=1, col = ff)

set.seed(123)

# Covariable
MO = sort.int(rnorm(36, 3, 0.6), 6)
# Respuesta
prot = sort.int(rnorm(36, 6, 0.8), 6)
prot

##  [1] 4.152665 4.760998 5.101513 4.987683 5.142567 5.185140 5.695623 5.444234
##  [9] 5.733434 5.755230 5.677692 5.626676 5.598141 5.607175 5.819383 5.977163
## [17] 5.965704 7.094882 6.202655 7.213176 5.933305 6.467691 6.099083 6.172753
## [25] 6.303712 6.623972 6.966370 7.735165 5.833666 6.242823 6.358568 6.042403
## [33] 6.737814 7.640068 5.950471 6.443134

library(readxl)
Actividad4_74 <- read_excel("C:/Users/juanc/OneDrive/Escritorio/2021-1/Suelos/disenio/Actividad4.74.xlsx")

Actividad4_74

## # A tibble: 36 x 1
##    Proteina
##       <dbl>
##  1    NA   
##  2     4.76
##  3     5.10
##  4     4.99
##  5     5.14
##  6     5.19
##  7     5.70
##  8     5.44
##  9     5.73
## 10     5.76
## # ... with 26 more rows

plot(xy, pch = 15, asp=1, col = ff, cex = MO, ylim = c(0.7, 6.3))
points(xy, pch = 16, cex = prot/3, col = 'white')

Análisis exploratorio \[[y_i = \beta_0 + \beta_1x_i+\epsilon_i]\] \[[P_i = \beta_0 + \beta_1 MO_i+\epsilon_i]\] \[[E[P_i|MO_i] = \beta_0 + \beta_1 MO_i]\] \[[\widehat{P_i} = \beta_0 + \beta_1 MO_i]\] Ajustando el modelo de recta con: Minimos Cuadrados Oridnarios

mod1 = lm(prot~MO)
summary(mod1)

## 
## Call:
## lm(formula = prot ~ MO)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2232 -0.3775 -0.0395  0.2418  1.4580 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3.5876     0.5783   6.204 4.68e-07 ***
## MO            0.7980     0.1875   4.255 0.000155 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.625 on 34 degrees of freedom
## Multiple R-squared:  0.3475, Adjusted R-squared:  0.3283 
## F-statistic: 18.11 on 1 and 34 DF,  p-value: 0.0001548

coef = round(mod1$coefficients, 2)

plot(MO, prot, pch = 16, cex = 0.1, xlim=c(0,4.2), ylim = c(3,8))
abline(mod1, col ='blue')
points(0, coef[1], pch = 16, col = 'red')
points(MO[which.min(prot)], min(prot), col = 'blue')
points(MO[which.max(prot)], max(prot), col = 'blue')

points(3.5, 3.59 + 0.8 * 3.5, col = 'blue', pch = 16)

abline(v = c(2, 3), col = 'red')
segments(2,5.19, 3,5.19, col = 'red')
text(3,5.5, expression(Delta * y), pos = 4)
text(2.5,5, expression(Delta * x))

atan(coef[2])*180/pi

##       MO 
## 38.65981

# Atípicos multivariantes
library(mvoutlier)

## Warning: package 'mvoutlier' was built under R version 4.0.5

## Loading required package: sgeostat

## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2

## sROC 0.1-2 loaded

corr.plot(MO, prot)

## $cor.cla
## [1] 0.5895007
## 
## $cor.rob
## [1] 0.7768588

chisq.plot(cbind(MO, prot))

## Remove outliers with left-click, stop with right-click on plotting device

## $outliers
## NULL

color.plot(cbind(MO, prot))

## $outliers
##  [1]  TRUE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
## [25] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
## 
## $md
##  [1] 2.8003443 1.6226401 1.7049265 1.2771635 1.0727430 0.9986864 0.6868258
##  [8] 0.6331154 0.5958550 0.3915004 0.6452692 1.0491439 1.1632570 0.7793705
## [15] 0.6205014 0.1649309 0.1252440 3.0416731 0.9590475 3.5119837 0.8253402
## [22] 1.5391063 0.4675643 0.4513191 0.6171409 1.1040143 1.7066404 3.3701653
## [29] 1.8142731 1.2480420 0.7016250 1.9215895 1.2470030 3.1138448 2.3856867
## [36] 1.6308059
## 
## $euclidean
##  [1] 1.0266015 0.9848566 1.2752560 1.6517757 1.7420698 1.8640326 2.5780633
##  [8] 2.3592633 2.6861066 2.8326097 3.2105531 3.3672144 3.3819109 3.1380249
## [15] 2.8199948 3.4874196 3.4132617 4.3736091 3.4290535 4.4278291 2.9343058
## [22] 3.7152824 3.8664018 3.9018748 3.9943804 4.4991595 4.9277078 5.7119084
## [29] 4.2876812 4.6121044 4.2454007 4.7343716 4.7500567 5.6448866 4.8837781
## [36] 5.1639279

prot2 = prot[order(prot)]
MO2 = MO[order(prot)]

prot2[33:36] = coef[1] + coef[2] * MO[33:36]


mod2 = lm(prot2 ~ MO)
summary(mod2)

## 
## Call:
## lm(formula = prot2 ~ MO)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.97961 -0.10780 -0.00047  0.12864  0.49014 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2.90598    0.25156   11.55 2.56e-13 ***
## MO           0.99345    0.08158   12.18 5.98e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2719 on 34 degrees of freedom
## Multiple R-squared:  0.8135, Adjusted R-squared:  0.808 
## F-statistic: 148.3 on 1 and 34 DF,  p-value: 5.976e-14

Codigo para rellenar los datos faltantes

library(mice)

## Warning: package 'mice' was built under R version 4.0.5

## 
## Attaching package: 'mice'

## The following object is masked from 'package:stats':
## 
##     filter

## The following objects are masked from 'package:base':
## 
##     cbind, rbind

input_data= Actividad4_74

input_data$Proteina=as.factor(input_data$Proteina)

summary(input_data)

##      Proteina 
##  4.760998: 1  
##  4.987683: 1  
##  5.101513: 1  
##  5.142567: 1  
##  5.18514 : 1  
##  (Other) :26  
##  NA's    : 5

input_data$Proteina<-sapply(input_data$Proteina, function(x) as.numeric(gsub(",", ".", x)))


input_data$Proteina[which(is.na(input_data$Proteina))]= mean(input_data$Proteina, na.rm = T)

Test de normalidad

Data_normalidad=shapiro.test(input_data$Proteina)
Data_normalidad

## 
##  Shapiro-Wilk normality test
## 
## data:  input_data$Proteina
## W = 0.97892, p-value = 0.7089

ifelse(Data_normalidad$p.value<0.05,"Datos no normal","Normalidad")

## [1] "Normalidad"

#Pruebas de varianza

Prueba_varianzas= bartlett.test(input_data$Proteina~ff)
Prueba_varianzas

## 
##  Bartlett test of homogeneity of variances
## 
## data:  input_data$Proteina by ff
## Bartlett's K-squared = 5.2644, df = 3, p-value = 0.1534

ifelse(Prueba_varianzas$p.value<0.05,"Heterocedasticidad","Homocedasticidad")

## [1] "Homocedasticidad"

mod1 = lm(input_data$Proteina~MO)
summary(mod1)

## 
## Call:
## lm(formula = input_data$Proteina ~ MO)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.53841 -0.30422  0.05078  0.20115  0.80816 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.1243     0.3337  12.361 3.95e-14 ***
## MO            0.5807     0.1082   5.367 5.74e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3606 on 34 degrees of freedom
## Multiple R-squared:  0.4586, Adjusted R-squared:  0.4427 
## F-statistic:  28.8 on 1 and 34 DF,  p-value: 5.739e-06

coef = round(mod1$coefficients, 2)

plot(MO, input_data$Proteina, pch = 16, cex = 0.1, xlim=c(0,4.2), ylim = c(3,8))
abline(mod1, col ='blue')
points(0, coef[1], pch = 16, col = 'red')
points(MO[which.min(input_data$Proteina)], min(input_data$Proteina), col = 'blue')
points(MO[which.max(input_data$Proteina)], max(input_data$Proteina), col = 'blue')

points(3.5, 3.59 + 0.8 * 3.5, col = 'blue', pch = 16)

abline(v = c(2, 3), col = 'red')
segments(2,5.19, 3,5.19, col = 'red')
text(3,5.5, expression(Delta * y), pos = 4)
text(2.5,5, expression(Delta * x))

El \(R^2\) aumenta algunas unidades despues de autocompletar los datos por la libreria mice

Cual es la forma apropiada de introducir las variables en el modelo aov()

Los órdenes de las variables son importantes al calcular ANCOVA. Primero desea eliminar el efecto de la covariable, es decir, desea controlarla, antes de ingresar su variable principal o interés.

¡La covariable va primero (y no hay interacción)!. Si no hace esto en orden, obtendrá resultados diferentes

En otras palabras van primero las covariables y despues el factorvan primero las covariables y despues el factor

Bibliografia

https://www.datanovia.com/en/lessons/ancova-in-r/

Actividad44

Sebastian Castro

16/7/2021

Codigo para rellenar los datos faltantes

Test de normalidad

Cual es la forma apropiada de introducir las variables en el modelo aov()