library(readxl)
datos_atipicos <- read_excel("C:\\Users\\57321\\Documents\\R\\Excel rstudio\\datos atipicos.xlsx",
col_types = c("numeric", "numeric"))
df<-data.frame(datos_atipicos)
summary(df)
## prot MO
## Min. :4.761 Min. :1.820
## 1st Qu.:5.617 1st Qu.:2.654
## Median :5.933 Median :3.072
## Mean :5.886 Mean :3.033
## 3rd Qu.:6.223 3rd Qu.:3.439
## Max. :6.966 Max. :4.072
## NA's :5
library(mice)
df$prot[which(is.na(df$prot))]=mean(df$prot,na.rm = TRUE)
mod_1<-lm(df$prot~df$MO)
summary(mod_1)
##
## Call:
## lm(formula = df$prot ~ df$MO)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.53841 -0.30422 0.05078 0.20115 0.80816
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.1244 0.3337 12.361 3.95e-14 ***
## df$MO 0.5807 0.1082 5.367 5.74e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3606 on 34 degrees of freedom
## Multiple R-squared: 0.4586, Adjusted R-squared: 0.4427
## F-statistic: 28.8 on 1 and 34 DF, p-value: 5.739e-06
#Al hacer el ajuste de los datos con la libreria(mice) a borrar datos extremos e imputar el R cuadrado pasod e 34% a 51,58% acercandolo mas a lo esperado que es 70%.