#Librería para excel

library(readxl)
library(ggplot2)

1.Cargamos los datos:

mis_datos <- read_xlsx("ch8_cps.xlsx")
set.seed(3)
muestra <- sample(1:nrow(mis_datos), 1000)
mis_datos_muestra <- mis_datos[muestra, ]
head(mis_datos_muestra)
  1. Observamos los datos:
# Primero, asegúrate de correr este bloque para activar las herramientas
library(ggplot2)
Warning: package ‘ggplot2’ was built under R version 4.4.3
# Ahora, el código corregido para la gráfica
ggplot(mis_datos_muestra, aes(x = age, y = ahe)) +
  geom_point(alpha = 0.2, color = "darkblue") +
  labs(x = "Edad", 
       y = "Salario por hora", 
       title = "Relación entre Edad y Salario") +
  theme_minimal()

  1. Modelo Lineal:
# Estimación del modelo lineal
m_lineal <- lm(ahe ~ age, data = mis_datos_muestra)

# Mostrar los coeficientes y estadísticos
summary(m_lineal)

Call:
lm(formula = ahe ~ age, data = mis_datos_muestra)

Residuals:
    Min      1Q  Median      3Q     Max 
-23.561 -10.744  -4.415   5.931 120.591 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept) 17.70465    1.83653   9.640  < 2e-16
age          0.15619    0.04118   3.793 0.000158
               
(Intercept) ***
age         ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 16.51 on 998 degrees of freedom
Multiple R-squared:  0.01421,   Adjusted R-squared:  0.01322 
F-statistic: 14.39 on 1 and 998 DF,  p-value: 0.0001579

Observamos el modelo lineal:

ggplot(mis_datos_muestra, aes(x = age, y = ahe)) +
  geom_point(alpha = 0.1) +
  geom_smooth(method = "lm", formula = y ~ x, se = FALSE, color = "red") +
  labs(x = "Edad", y = "Salario por hora", title = "Ajuste Lineal")

  1. Modelo No Lineal Cuadrático:
# Estimamos el modelo agregando el término de edad al cuadrado
m_cuadratico <- lm(ahe ~ age + I(age^2), data = mis_datos_muestra)

# Mostramos los resultados para ver la significancia de los coeficientes
summary(m_cuadratico)

Call:
lm(formula = ahe ~ age + I(age^2), data = mis_datos_muestra)

Residuals:
    Min      1Q  Median      3Q     Max 
-24.906 -10.494  -4.339   5.832 118.897 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)
(Intercept) -7.564761   5.289163  -1.430    0.153
age          1.394651   0.246874   5.649 2.10e-08
I(age^2)    -0.013919   0.002737  -5.086 4.37e-07
               
(Intercept)    
age         ***
I(age^2)    ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 16.31 on 997 degrees of freedom
Multiple R-squared:  0.03914,   Adjusted R-squared:  0.03721 
F-statistic: 20.31 on 2 and 997 DF,  p-value: 2.271e-09

graficamos el modelo cuadrático:

# Graficamos los puntos y añadimos la curva cuadrática
ggplot(mis_datos_muestra, aes(x = age, y = ahe)) +
  geom_point(alpha = 0.2, color = "gray") +
  geom_smooth(method = "lm", formula = y ~ x + I(x^2), se = FALSE, color = "blue") +
  labs(x = "Edad", 
       y = "Salario por hora", 
       title = "Ajuste del Modelo Cuadrático") +
  theme_minimal()

  1. Modelo No Lineal Polinomial de Grado n:

Comencemos con grado 4:

# Estimamos el polinomio de grado 4
m_poli4 <- lm(ahe ~ poly(age, 4, raw = TRUE), data = mis_datos_muestra)

# Revisamos los coeficientes
summary(m_poli4)

Call:
lm(formula = ahe ~ poly(age, 4, raw = TRUE), data = mis_datos_muestra)

Residuals:
    Min      1Q  Median      3Q     Max 
-24.598 -10.709  -3.870   6.064 117.777 

Coefficients:
                            Estimate Std. Error
(Intercept)               -3.831e+01  3.507e+01
poly(age, 4, raw = TRUE)1  3.538e+00  3.371e+00
poly(age, 4, raw = TRUE)2 -5.655e-02  1.154e-01
poly(age, 4, raw = TRUE)3  1.620e-04  1.672e-03
poly(age, 4, raw = TRUE)4  1.482e-06  8.699e-06
                          t value Pr(>|t|)
(Intercept)                -1.093    0.275
poly(age, 4, raw = TRUE)1   1.050    0.294
poly(age, 4, raw = TRUE)2  -0.490    0.624
poly(age, 4, raw = TRUE)3   0.097    0.923
poly(age, 4, raw = TRUE)4   0.170    0.865

Residual standard error: 16.26 on 995 degrees of freedom
Multiple R-squared:  0.04699,   Adjusted R-squared:  0.04316 
F-statistic: 12.27 on 4 and 995 DF,  p-value: 9.711e-10

Lo graficamos:

# Graficamos el ajuste polinomial de grado 4
ggplot(mis_datos_muestra, aes(x = age, y = ahe)) +
  geom_point(alpha = 0.2, color = "gray") +
  geom_smooth(method = "lm", formula = y ~ poly(x, 4, raw = TRUE), se = FALSE, color = "darkgreen") +
  labs(x = "Edad", 
       y = "Salario por hora", 
       title = "Ajuste Polinomial de Grado 4") +
  theme_minimal()

  1. Modelo logaritmico:

Modelo lin-log:

# Estimamos el modelo: Salario (ahe) contra el logaritmo de la Edad (age)
m_linlog <- lm(ahe ~ log(age), data = mis_datos_muestra)

# Vemos los resultados
summary(m_linlog)

Call:
lm(formula = ahe ~ log(age), data = mis_datos_muestra)

Residuals:
    Min      1Q  Median      3Q     Max 
-23.571 -10.537  -4.299   5.979 120.414 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)   -4.953      6.238  -0.794    0.427
log(age)       7.909      1.676   4.719 2.71e-06
               
(Intercept)    
log(age)    ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 16.45 on 998 degrees of freedom
Multiple R-squared:  0.02183,   Adjusted R-squared:  0.02085 
F-statistic: 22.27 on 1 and 998 DF,  p-value: 2.708e-06
# Si quieres ver cómo se ve la curva logarítmica:
ggplot(mis_datos_muestra, aes(x = age, y = ahe)) +
  geom_point(alpha = 0.2) +
  geom_smooth(method = "lm", formula = y ~ log(x), se = FALSE, color = "purple") +
  labs(x = "Edad (escala original)", y = "Salario por hora", title = "Modelo Lin-Log")

  1. Modelo con variable binaria:
# Estimamos el modelo con una variable continua (age) y una binaria (female)
m_binario <- lm(ahe ~ age + female, data = mis_datos_muestra)

# Mostramos el resumen de los resultados
summary(m_binario)

Call:
lm(formula = ahe ~ age + female, data = mis_datos_muestra)

Residuals:
    Min      1Q  Median      3Q     Max 
-26.610 -10.764  -3.801   5.727 117.850 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)
(Intercept)  20.0796     1.8466  10.874  < 2e-16
age           0.1658     0.0405   4.095 4.57e-05
female       -6.2612     1.0332  -6.060 1.93e-09
               
(Intercept) ***
age         ***
female      ***
---
Signif. codes:  
0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 16.22 on 997 degrees of freedom
Multiple R-squared:  0.04923,   Adjusted R-squared:  0.04732 
F-statistic: 25.81 on 2 and 997 DF,  p-value: 1.177e-11
  1. Modelo con interacción de variable binaria:

Veamos si afecta que sea mujer de noreste:

# El asterisco crea: female + northeast + (female x northeast)
m_interaccion <- lm(ahe ~ age + female * northeast, data = mis_datos_muestra)

# Resultados finales
summary(m_interaccion)

Call:
lm(formula = ahe ~ age + female * northeast, data = mis_datos_muestra)

Residuals:
    Min      1Q  Median      3Q     Max 
-29.270 -10.758  -4.043   5.890 114.260 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      19.82797    1.84107  10.770  < 2e-16 ***
age               0.15781    0.04015   3.931 9.06e-05 ***
female           -6.92271    1.10293  -6.277 5.16e-10 ***
northeast         4.14581    1.95355   2.122   0.0341 *  
female:northeast  5.04327    2.95203   1.708   0.0879 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 16.07 on 995 degrees of freedom
Multiple R-squared:  0.06951,   Adjusted R-squared:  0.06577 
F-statistic: 18.58 on 4 and 995 DF,  p-value: 9.653e-15
LS0tCnRpdGxlOiAiTW9kZWxvcyBObyBMaW5lYWxlcyIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKYGBge3J9CiNMaWJyZXLDrWEgcGFyYSBleGNlbAoKbGlicmFyeShyZWFkeGwpCmxpYnJhcnkoZ2dwbG90MikKYGBgCgoKCjEuQ2FyZ2Ftb3MgbG9zIGRhdG9zOgpgYGB7cn0KbWlzX2RhdG9zIDwtIHJlYWRfeGxzeCgiY2g4X2Nwcy54bHN4IikKc2V0LnNlZWQoMykKbXVlc3RyYSA8LSBzYW1wbGUoMTpucm93KG1pc19kYXRvcyksIDEwMDApCm1pc19kYXRvc19tdWVzdHJhIDwtIG1pc19kYXRvc1ttdWVzdHJhLCBdCmhlYWQobWlzX2RhdG9zX211ZXN0cmEpCmBgYAoKMi4gT2JzZXJ2YW1vcyBsb3MgZGF0b3M6CgpgYGB7cn0KIyBQcmltZXJvLCBhc2Vnw7pyYXRlIGRlIGNvcnJlciBlc3RlIGJsb3F1ZSBwYXJhIGFjdGl2YXIgbGFzIGhlcnJhbWllbnRhcwpsaWJyYXJ5KGdncGxvdDIpCgojIEFob3JhLCBlbCBjw7NkaWdvIGNvcnJlZ2lkbyBwYXJhIGxhIGdyw6FmaWNhCmdncGxvdChtaXNfZGF0b3NfbXVlc3RyYSwgYWVzKHggPSBhZ2UsIHkgPSBhaGUpKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuMiwgY29sb3IgPSAiZGFya2JsdWUiKSArCiAgbGFicyh4ID0gIkVkYWQiLCAKICAgICAgIHkgPSAiU2FsYXJpbyBwb3IgaG9yYSIsIAogICAgICAgdGl0bGUgPSAiUmVsYWNpw7NuIGVudHJlIEVkYWQgeSBTYWxhcmlvIikgKwogIHRoZW1lX21pbmltYWwoKQoKYGBgCgozLiBNb2RlbG8gTGluZWFsOgoKYGBge3J9CiMgRXN0aW1hY2nDs24gZGVsIG1vZGVsbyBsaW5lYWwKbV9saW5lYWwgPC0gbG0oYWhlIH4gYWdlLCBkYXRhID0gbWlzX2RhdG9zX211ZXN0cmEpCgojIE1vc3RyYXIgbG9zIGNvZWZpY2llbnRlcyB5IGVzdGFkw61zdGljb3MKc3VtbWFyeShtX2xpbmVhbCkKYGBgCk9ic2VydmFtb3MgZWwgbW9kZWxvIGxpbmVhbDoKCmBgYHtyfQpnZ3Bsb3QobWlzX2RhdG9zX211ZXN0cmEsIGFlcyh4ID0gYWdlLCB5ID0gYWhlKSkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjEpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBmb3JtdWxhID0geSB+IHgsIHNlID0gRkFMU0UsIGNvbG9yID0gInJlZCIpICsKICBsYWJzKHggPSAiRWRhZCIsIHkgPSAiU2FsYXJpbyBwb3IgaG9yYSIsIHRpdGxlID0gIkFqdXN0ZSBMaW5lYWwiKQpgYGAKCgoKNC4gTW9kZWxvIE5vIExpbmVhbCBDdWFkcsOhdGljbzoKCmBgYHtyfQojIEVzdGltYW1vcyBlbCBtb2RlbG8gYWdyZWdhbmRvIGVsIHTDqXJtaW5vIGRlIGVkYWQgYWwgY3VhZHJhZG8KbV9jdWFkcmF0aWNvIDwtIGxtKGFoZSB+IGFnZSArIEkoYWdlXjIpLCBkYXRhID0gbWlzX2RhdG9zX211ZXN0cmEpCgojIE1vc3RyYW1vcyBsb3MgcmVzdWx0YWRvcyBwYXJhIHZlciBsYSBzaWduaWZpY2FuY2lhIGRlIGxvcyBjb2VmaWNpZW50ZXMKc3VtbWFyeShtX2N1YWRyYXRpY28pCgpgYGAKZ3JhZmljYW1vcyBlbCBtb2RlbG8gY3VhZHLDoXRpY286CmBgYHtyfQojIEdyYWZpY2Ftb3MgbG9zIHB1bnRvcyB5IGHDsWFkaW1vcyBsYSBjdXJ2YSBjdWFkcsOhdGljYQpnZ3Bsb3QobWlzX2RhdG9zX211ZXN0cmEsIGFlcyh4ID0gYWdlLCB5ID0gYWhlKSkgKwogIGdlb21fcG9pbnQoYWxwaGEgPSAwLjIsIGNvbG9yID0gImdyYXkiKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgZm9ybXVsYSA9IHkgfiB4ICsgSSh4XjIpLCBzZSA9IEZBTFNFLCBjb2xvciA9ICJibHVlIikgKwogIGxhYnMoeCA9ICJFZGFkIiwgCiAgICAgICB5ID0gIlNhbGFyaW8gcG9yIGhvcmEiLCAKICAgICAgIHRpdGxlID0gIkFqdXN0ZSBkZWwgTW9kZWxvIEN1YWRyw6F0aWNvIikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCjUuIE1vZGVsbyBObyBMaW5lYWwgUG9saW5vbWlhbCBkZSBHcmFkbyAqbio6CgpDb21lbmNlbW9zIGNvbiBncmFkbyA0OgoKYGBge3J9CiMgRXN0aW1hbW9zIGVsIHBvbGlub21pbyBkZSBncmFkbyA0Cm1fcG9saTQgPC0gbG0oYWhlIH4gcG9seShhZ2UsIDQsIHJhdyA9IFRSVUUpLCBkYXRhID0gbWlzX2RhdG9zX211ZXN0cmEpCgojIFJldmlzYW1vcyBsb3MgY29lZmljaWVudGVzCnN1bW1hcnkobV9wb2xpNCkKYGBgCgpMbyBncmFmaWNhbW9zOgoKYGBge3J9CiMgR3JhZmljYW1vcyBlbCBhanVzdGUgcG9saW5vbWlhbCBkZSBncmFkbyA0CmdncGxvdChtaXNfZGF0b3NfbXVlc3RyYSwgYWVzKHggPSBhZ2UsIHkgPSBhaGUpKSArCiAgZ2VvbV9wb2ludChhbHBoYSA9IDAuMiwgY29sb3IgPSAiZ3JheSIpICsKICBnZW9tX3Ntb290aChtZXRob2QgPSAibG0iLCBmb3JtdWxhID0geSB+IHBvbHkoeCwgNCwgcmF3ID0gVFJVRSksIHNlID0gRkFMU0UsIGNvbG9yID0gImRhcmtncmVlbiIpICsKICBsYWJzKHggPSAiRWRhZCIsIAogICAgICAgeSA9ICJTYWxhcmlvIHBvciBob3JhIiwgCiAgICAgICB0aXRsZSA9ICJBanVzdGUgUG9saW5vbWlhbCBkZSBHcmFkbyA0IikgKwogIHRoZW1lX21pbmltYWwoKQpgYGAKCgo2LiBNb2RlbG8gbG9nYXJpdG1pY286CgpNb2RlbG8gbGluLWxvZzoKCmBgYHtyfQojIEVzdGltYW1vcyBlbCBtb2RlbG86IFNhbGFyaW8gKGFoZSkgY29udHJhIGVsIGxvZ2FyaXRtbyBkZSBsYSBFZGFkIChhZ2UpCm1fbGlubG9nIDwtIGxtKGFoZSB+IGxvZyhhZ2UpLCBkYXRhID0gbWlzX2RhdG9zX211ZXN0cmEpCgojIFZlbW9zIGxvcyByZXN1bHRhZG9zCnN1bW1hcnkobV9saW5sb2cpCgojIFNpIHF1aWVyZXMgdmVyIGPDs21vIHNlIHZlIGxhIGN1cnZhIGxvZ2Fyw610bWljYToKZ2dwbG90KG1pc19kYXRvc19tdWVzdHJhLCBhZXMoeCA9IGFnZSwgeSA9IGFoZSkpICsKICBnZW9tX3BvaW50KGFscGhhID0gMC4yKSArCiAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgZm9ybXVsYSA9IHkgfiBsb2coeCksIHNlID0gRkFMU0UsIGNvbG9yID0gInB1cnBsZSIpICsKICBsYWJzKHggPSAiRWRhZCAoZXNjYWxhIG9yaWdpbmFsKSIsIHkgPSAiU2FsYXJpbyBwb3IgaG9yYSIsIHRpdGxlID0gIk1vZGVsbyBMaW4tTG9nIikKYGBgCjcuIE1vZGVsbyBjb24gdmFyaWFibGUgYmluYXJpYToKCmBgYHtyfQojIEVzdGltYW1vcyBlbCBtb2RlbG8gY29uIHVuYSB2YXJpYWJsZSBjb250aW51YSAoYWdlKSB5IHVuYSBiaW5hcmlhIChmZW1hbGUpCm1fYmluYXJpbyA8LSBsbShhaGUgfiBhZ2UgKyBmZW1hbGUsIGRhdGEgPSBtaXNfZGF0b3NfbXVlc3RyYSkKCiMgTW9zdHJhbW9zIGVsIHJlc3VtZW4gZGUgbG9zIHJlc3VsdGFkb3MKc3VtbWFyeShtX2JpbmFyaW8pCmBgYAoKOC4gTW9kZWxvIGNvbiBpbnRlcmFjY2nDs24gZGUgdmFyaWFibGUgYmluYXJpYToKClZlYW1vcyBzaSBhZmVjdGEgcXVlIHNlYSBtdWplciBkZSBub3Jlc3RlOgpgYGB7cn0KIyBFbCBhc3RlcmlzY28gY3JlYTogZmVtYWxlICsgbm9ydGhlYXN0ICsgKGZlbWFsZSB4IG5vcnRoZWFzdCkKbV9pbnRlcmFjY2lvbiA8LSBsbShhaGUgfiBhZ2UgKyBmZW1hbGUgKiBub3J0aGVhc3QsIGRhdGEgPSBtaXNfZGF0b3NfbXVlc3RyYSkKCiMgUmVzdWx0YWRvcyBmaW5hbGVzCnN1bW1hcnkobV9pbnRlcmFjY2lvbikKYGBgCgo=