library(car)
library(tidyverse)
colnames(SLID)  # Nombre de las columnas.
## [1] "wages"     "education" "age"       "sex"       "language"
data(SLID)  # Cargamos los datos.
head(SLID)  # Observamos los primeros datos.
##   wages education age    sex language
## 1 10.56      15.0  40   Male  English
## 2 11.00      13.2  19   Male  English
## 3    NA      16.0  49   Male    Other
## 4 17.76      14.0  46   Male    Other
## 5    NA       8.0  71   Male  English
## 6 14.00      16.0  50 Female  English
SLID = SLID %>% 
  na.omit(SLID) %>% 
  select(wages, education, age) # Omitimos datos faltantes y omitimos las ultimas variables.
plot(SLID)

hist(SLID$wages, main = "Wages", breaks = 10, freq = FALSE)

hist(log(SLID$wages), main = "Wages", breaks = 10, freq = FALSE)

modelo = lm(wages~education+age, SLID)
par(mfrow=c(2, 2))

plot(modelo)

SLID$wages = log(SLID$wages) #   Corregimos la normaldad aplicando logaritmo.
modelo = lm(wages~education+age, SLID)
par(mfrow=c(2, 2))

plot(modelo)

par(mfrow=c(2, 2))

plot(modelo)

plot(SLID$education, SLID$wages)

pairs(SLID, panel = function(x, y){points(x, y);lines(lowess(x, y), col = "red")})  # Revisar

modelo2 = lm(wages~education+I((education-mean(education))^2) + age + I((age-mean(age))^2), data = SLID)  # Revisar.

summary(modelo2)  # Revisar.
## 
## Call:
## lm(formula = wages ~ education + I((education - mean(education))^2) + 
##     age + I((age - mean(age))^2), data = SLID)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.99067 -0.25518  0.01857  0.27939  1.89438 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         1.374e+00  3.801e-02  36.148  < 2e-16 ***
## education                           4.623e-02  2.197e-03  21.042  < 2e-16 ***
## I((education - mean(education))^2)  2.116e-03  4.187e-04   5.055  4.5e-07 ***
## age                                 1.984e-02  5.589e-04  35.488  < 2e-16 ***
## I((age - mean(age))^2)             -8.600e-04  4.132e-05 -20.812  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4106 on 3982 degrees of freedom
## Multiple R-squared:  0.3355, Adjusted R-squared:  0.3348 
## F-statistic: 502.6 on 4 and 3982 DF,  p-value: < 2.2e-16
confint(modelo2)  # Intervalos de confianza.
##                                           2.5 %        97.5 %
## (Intercept)                         1.299508749  1.4485536388
## education                           0.041926233  0.0505417606
## I((education - mean(education))^2)  0.001295557  0.0029372051
## age                                 0.018739488  0.0209311270
## I((age - mean(age))^2)             -0.000941048 -0.0007790124
modelo3 = lm(wages~education+I((education-mean(education))^2) + age + I((age-mean(age))^2) + I((education-mean(education))^3) + age + I((age-mean(age))^3), data = SLID) 

summary(modelo3)  # Revisar.
## 
## Call:
## lm(formula = wages ~ education + I((education - mean(education))^2) + 
##     age + I((age - mean(age))^2) + I((education - mean(education))^3) + 
##     age + I((age - mean(age))^3), data = SLID)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.00412 -0.25483  0.02035  0.27864  1.91655 
## 
## Coefficients:
##                                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                         1.490e+00  6.551e-02  22.745  < 2e-16 ***
## education                           4.479e-02  3.288e-03  13.622  < 2e-16 ***
## I((education - mean(education))^2)  2.232e-03  5.529e-04   4.037 5.51e-05 ***
## age                                 1.734e-02  1.121e-03  15.464  < 2e-16 ***
## I((age - mean(age))^2)             -9.206e-04  4.758e-05 -19.349  < 2e-16 ***
## I((education - mean(education))^3)  2.286e-05  6.986e-05   0.327   0.7436    
## I((age - mean(age))^3)              7.984e-06  3.120e-06   2.559   0.0105 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4103 on 3980 degrees of freedom
## Multiple R-squared:  0.3366, Adjusted R-squared:  0.3356 
## F-statistic: 336.6 on 6 and 3980 DF,  p-value: < 2.2e-16
confint(modelo3)  # Intervalos de confianza.
##                                            2.5 %        97.5 %
## (Intercept)                         1.361578e+00  1.618446e+00
## education                           3.834583e-02  5.124003e-02
## I((education - mean(education))^2)  1.148198e-03  3.316165e-03
## age                                 1.514143e-02  1.953812e-02
## I((age - mean(age))^2)             -1.013918e-03 -8.273455e-04
## I((education - mean(education))^3) -1.141030e-04  1.598132e-04
## I((age - mean(age))^3)              1.868033e-06  1.410063e-05
anova(modelo2, modelo3)  # Estadistico F parcial.
## Analysis of Variance Table
## 
## Model 1: wages ~ education + I((education - mean(education))^2) + age + 
##     I((age - mean(age))^2)
## Model 2: wages ~ education + I((education - mean(education))^2) + age + 
##     I((age - mean(age))^2) + I((education - mean(education))^3) + 
##     age + I((age - mean(age))^3)
##   Res.Df    RSS Df Sum of Sq      F  Pr(>F)  
## 1   3982 671.18                              
## 2   3980 670.07  2    1.1096 3.2952 0.03716 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
predict(modelo3, newdata = data.frame(age = 50, education = 8), interval = "confidence")
##        fit      lwr      upr
## 1 2.715344 2.671482 2.759206
predict(modelo3, newdata = data.frame(age = 50, education = 8), interval = "prediction")
##        fit    lwr      upr
## 1 2.715344 1.9097 3.520989