library(car)
library(tidyverse)
colnames(SLID) # Nombre de las columnas.
## [1] "wages" "education" "age" "sex" "language"
data(SLID) # Cargamos los datos.
head(SLID) # Observamos los primeros datos.
## wages education age sex language
## 1 10.56 15.0 40 Male English
## 2 11.00 13.2 19 Male English
## 3 NA 16.0 49 Male Other
## 4 17.76 14.0 46 Male Other
## 5 NA 8.0 71 Male English
## 6 14.00 16.0 50 Female English
SLID = SLID %>%
na.omit(SLID) %>%
select(wages, education, age) # Omitimos datos faltantes y omitimos las ultimas variables.
plot(SLID)

hist(SLID$wages, main = "Wages", breaks = 10, freq = FALSE)

hist(log(SLID$wages), main = "Wages", breaks = 10, freq = FALSE)

modelo = lm(wages~education+age, SLID)
par(mfrow=c(2, 2))
plot(modelo)

SLID$wages = log(SLID$wages) # Corregimos la normaldad aplicando logaritmo.
modelo = lm(wages~education+age, SLID)
par(mfrow=c(2, 2))
plot(modelo)

par(mfrow=c(2, 2))
plot(modelo)

plot(SLID$education, SLID$wages)

pairs(SLID, panel = function(x, y){points(x, y);lines(lowess(x, y), col = "red")}) # Revisar

modelo2 = lm(wages~education+I((education-mean(education))^2) + age + I((age-mean(age))^2), data = SLID) # Revisar.
summary(modelo2) # Revisar.
##
## Call:
## lm(formula = wages ~ education + I((education - mean(education))^2) +
## age + I((age - mean(age))^2), data = SLID)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.99067 -0.25518 0.01857 0.27939 1.89438
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.374e+00 3.801e-02 36.148 < 2e-16 ***
## education 4.623e-02 2.197e-03 21.042 < 2e-16 ***
## I((education - mean(education))^2) 2.116e-03 4.187e-04 5.055 4.5e-07 ***
## age 1.984e-02 5.589e-04 35.488 < 2e-16 ***
## I((age - mean(age))^2) -8.600e-04 4.132e-05 -20.812 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4106 on 3982 degrees of freedom
## Multiple R-squared: 0.3355, Adjusted R-squared: 0.3348
## F-statistic: 502.6 on 4 and 3982 DF, p-value: < 2.2e-16
confint(modelo2) # Intervalos de confianza.
## 2.5 % 97.5 %
## (Intercept) 1.299508749 1.4485536388
## education 0.041926233 0.0505417606
## I((education - mean(education))^2) 0.001295557 0.0029372051
## age 0.018739488 0.0209311270
## I((age - mean(age))^2) -0.000941048 -0.0007790124
modelo3 = lm(wages~education+I((education-mean(education))^2) + age + I((age-mean(age))^2) + I((education-mean(education))^3) + age + I((age-mean(age))^3), data = SLID)
summary(modelo3) # Revisar.
##
## Call:
## lm(formula = wages ~ education + I((education - mean(education))^2) +
## age + I((age - mean(age))^2) + I((education - mean(education))^3) +
## age + I((age - mean(age))^3), data = SLID)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.00412 -0.25483 0.02035 0.27864 1.91655
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.490e+00 6.551e-02 22.745 < 2e-16 ***
## education 4.479e-02 3.288e-03 13.622 < 2e-16 ***
## I((education - mean(education))^2) 2.232e-03 5.529e-04 4.037 5.51e-05 ***
## age 1.734e-02 1.121e-03 15.464 < 2e-16 ***
## I((age - mean(age))^2) -9.206e-04 4.758e-05 -19.349 < 2e-16 ***
## I((education - mean(education))^3) 2.286e-05 6.986e-05 0.327 0.7436
## I((age - mean(age))^3) 7.984e-06 3.120e-06 2.559 0.0105 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4103 on 3980 degrees of freedom
## Multiple R-squared: 0.3366, Adjusted R-squared: 0.3356
## F-statistic: 336.6 on 6 and 3980 DF, p-value: < 2.2e-16
confint(modelo3) # Intervalos de confianza.
## 2.5 % 97.5 %
## (Intercept) 1.361578e+00 1.618446e+00
## education 3.834583e-02 5.124003e-02
## I((education - mean(education))^2) 1.148198e-03 3.316165e-03
## age 1.514143e-02 1.953812e-02
## I((age - mean(age))^2) -1.013918e-03 -8.273455e-04
## I((education - mean(education))^3) -1.141030e-04 1.598132e-04
## I((age - mean(age))^3) 1.868033e-06 1.410063e-05
anova(modelo2, modelo3) # Estadistico F parcial.
## Analysis of Variance Table
##
## Model 1: wages ~ education + I((education - mean(education))^2) + age +
## I((age - mean(age))^2)
## Model 2: wages ~ education + I((education - mean(education))^2) + age +
## I((age - mean(age))^2) + I((education - mean(education))^3) +
## age + I((age - mean(age))^3)
## Res.Df RSS Df Sum of Sq F Pr(>F)
## 1 3982 671.18
## 2 3980 670.07 2 1.1096 3.2952 0.03716 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict(modelo3, newdata = data.frame(age = 50, education = 8), interval = "confidence")
## fit lwr upr
## 1 2.715344 2.671482 2.759206
predict(modelo3, newdata = data.frame(age = 50, education = 8), interval = "prediction")
## fit lwr upr
## 1 2.715344 1.9097 3.520989