1. Datos
operarios <- c(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50)
tiempo <- c(45, 47, 48, 50, 52, 53, 55, 57, 58, 60, 62, 63, 65, 67, 68, 70, 72, 73,
75, 77, 78, 80, 82, 83, 85, 87, 88, 90, 92, 93, 95, 97, 98, 100, 102,
103, 105, 107, 108, 110, 112, 113, 115, 117, 118, 120, 122, 123, 125)
Datos <- data.frame(operarios, tiempo)
2. Gráfico de dispersión
ggplot(Datos, aes(x = operarios, y = tiempo)) +
geom_point(size = 3) +
geom_smooth(method = "lm", se = TRUE) +
labs(title = "Horas de capacitación vs Productividad",
x = "Número de operarios",
y = "Tiempo de producción (Horas)")
## `geom_smooth()` using formula = 'y ~ x'

3. Ajuste del modelo
modelo <- lm(tiempo ~ operarios, data = Datos)
summary(modelo)
##
## Call:
## lm(formula = tiempo ~ operarios, data = Datos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.3453 -0.3257 0.0000 0.3257 0.3453
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.680816 0.082199 507.1 <2e-16 ***
## operarios 1.666122 0.002777 599.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2749 on 47 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 3.599e+05 on 1 and 47 DF, p-value: < 2.2e-16
4. Intervalos de confianza
confint(modelo, level = 0.95)
## 2.5 % 97.5 %
## (Intercept) 41.515453 41.84618
## operarios 1.660535 1.67171
5. ANOVA
anova(modelo)
## Analysis of Variance Table
##
## Response: tiempo
## Df Sum Sq Mean Sq F value Pr(>F)
## operarios 1 27204.4 27204.4 359903 < 2.2e-16 ***
## Residuals 47 3.6 0.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
6. SST, SSR, SSE y R²
SST <- sum((Datos$tiempo - mean(Datos$tiempo))^2)
SSE <- sum(residuals(modelo)^2)
SSR <- SST - SSE
R2 <- SSR / SST
c(SST = SST, SSR = SSR, SSE = SSE, R2 = R2)
## SST SSR SSE R2
## 2.720800e+04 2.720445e+04 3.552653e+00 9.998694e-01
7. Gráficos de diagnóstico
par(mfrow = c(2, 2))
plot(modelo)

par(mfrow = c(1, 1))
8. Normalidad de residuos
shapiro.test(residuals(modelo))
##
## Shapiro-Wilk normality test
##
## data: residuals(modelo)
## W = 0.81712, p-value = 2.685e-06
ad.test(residuals(modelo))
##
## Anderson-Darling normality test
##
## data: residuals(modelo)
## A = 3.3961, p-value = 1.262e-08
9. Homoscedasticidad
bptest(modelo)
##
## studentized Breusch-Pagan test
##
## data: modelo
## BP = 3.9235e-29, df = 1, p-value = 1
bptest(modelo, ~ fitted(modelo) + I(fitted(modelo)^2))
##
## studentized Breusch-Pagan test
##
## data: modelo
## BP = 0.23216, df = 2, p-value = 0.8904
10. Independencia / autocorrelación
dwtest(modelo)
##
## Durbin-Watson test
##
## data: modelo
## DW = 3.0025, p-value = 0.9999
## alternative hypothesis: true autocorrelation is greater than 0
durbinWatsonTest(modelo)
## lag Autocorrelation D-W Statistic p-value
## 1 -0.5012755 3.002455 0
## Alternative hypothesis: rho != 0
11. Linealidad
plot(fitted(modelo), residuals(modelo),
xlab = "Valores ajustados", ylab = "Residuos",
main = "Residuos vs Ajustados")
abline(h = 0, col = "red")

resettest(modelo)
##
## RESET test
##
## data: modelo
## RESET = 0.034376, df1 = 2, df2 = 45, p-value = 0.9662
12. Outliers e influencia
cooksd <- cooks.distance(modelo)
which(cooksd > (4 / nrow(Datos)))
## named integer(0)
hatvals <- hatvalues(modelo)
which(hatvals > (2 * length(coef(modelo)) / nrow(Datos)))
## named integer(0)
13. Errores estándar robustos (HC1)
coeftest(modelo, vcov. = vcovHC(modelo, type = "HC1"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.6808163 0.0807612 516.10 < 2.2e-16 ***
## operarios 1.6661224 0.0027141 613.87 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
14. Bootstrap (2000 repeticiones)
boot_fn <- function(d, i) {
ds <- d[i, ]
fit <- lm(tiempo ~ operarios, data = ds)
return(coef(fit))
}
set.seed(123)
boot_res <- boot(data = Datos, statistic = boot_fn, R = 2000)
boot.ci(boot_res, type = "perc", index = 1)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 1)
##
## Intervals :
## Level Percentile
## 95% (41.51, 41.84 )
## Calculations and Intervals on Original Scale
boot.ci(boot_res, type = "perc", index = 2)
## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 2000 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = boot_res, type = "perc", index = 2)
##
## Intervals :
## Level Percentile
## 95% ( 1.661, 1.672 )
## Calculations and Intervals on Original Scale
15. WLS (pesos inversos)
w <- 1 / (fitted(modelo)^2)
modelo_wls <- lm(tiempo ~ operarios, data = Datos, weights = w)
summary(modelo_wls)
##
## Call:
## lm(formula = tiempo ~ operarios, data = Datos, weights = w)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -0.0072096 -0.0029805 0.0000057 0.0032167 0.0067997
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.684737 0.065530 636.1 <2e-16 ***
## operarios 1.665953 0.002965 561.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.003662 on 47 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9998
## F-statistic: 3.157e+05 on 1 and 47 DF, p-value: < 2.2e-16
16. Modelo sin primera observación
modelo_sin1 <- lm(tiempo ~ operarios, data = Datos[-1, ])
summary(modelo_sin1)
##
## Call:
## lm(formula = tiempo ~ operarios, data = Datos[-1, ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.34636 -0.32595 0.00058 0.32537 0.34578
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 41.682009 0.086580 481.4 <2e-16 ***
## operarios 1.666088 0.002895 575.4 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2779 on 46 degrees of freedom
## Multiple R-squared: 0.9999, Adjusted R-squared: 0.9999
## F-statistic: 3.311e+05 on 1 and 46 DF, p-value: < 2.2e-16
17. Regresión robusta
rlm_model <- rlm(tiempo ~ operarios, data = Datos)
summary(rlm_model)
##
## Call: rlm(formula = tiempo ~ operarios, data = Datos)
## Residuals:
## Min 1Q Median 3Q Max
## -0.3453 -0.3257 0.0000 0.3257 0.3453
##
## Coefficients:
## Value Std. Error t value
## (Intercept) 41.6808 0.0822 507.0721
## operarios 1.6661 0.0028 599.9188
##
## Residual standard error: 0.4829 on 47 degrees of freedom