Modelo de Regresión Lineal
base <- data.table(fread('kidiq.csv', encoding = 'Latin-1'))
f01 <- formula(base$kid_score~base$mom_iq)
reg1<- lm(f01,data=base)
summary(reg1)
##
## Call:
## lm(formula = f01, data = base)
##
## Residuals:
## Min 1Q Median 3Q Max
## -56.753 -12.074 2.217 11.710 47.691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.79978 5.91741 4.36 1.63e-05 ***
## base$mom_iq 0.60997 0.05852 10.42 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.27 on 432 degrees of freedom
## Multiple R-squared: 0.201, Adjusted R-squared: 0.1991
## F-statistic: 108.6 on 1 and 432 DF, p-value: < 2.2e-16
Generación de Variable con valores predichos
base[,kid_score1 := predict(reg1)]
Grafico Modelo 1 (kid_score ~ mom_iq)
graph1 <- ggplot() +
geom_segment(base, mapping = aes(x=base$mom_iq, xend=base$mom_iq, y=base$kid_score, yend=base$kid_score1)) +
geom_point(base, mapping = aes(x=base$mom_iq, y=base$kid_score, color='Valores reales')) +
geom_smooth(base, mapping = aes(x=base$mom_iq, y=base$kid_score1, color='Valores predichos'), method='lm',se=F, fullrange = T) +
theme_minimal() +
labs(x='Puntaje de los niños',y='IQ de la madre',color=NULL) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels = number_format(scale = 1))+ xlim(0,150)+ ylim(0,150)
graph1
f01 <- formula(base$kid_score~base$mom_iq + base$mom_hs)
reg1<- lm(f01,data=base)
summary(reg1)
##
## Call:
## lm(formula = f01, data = base)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.873 -12.663 2.404 11.356 49.545
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.73154 5.87521 4.380 1.49e-05 ***
## base$mom_iq 0.56391 0.06057 9.309 < 2e-16 ***
## base$mom_hs 5.95012 2.21181 2.690 0.00742 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 18.14 on 431 degrees of freedom
## Multiple R-squared: 0.2141, Adjusted R-squared: 0.2105
## F-statistic: 58.72 on 2 and 431 DF, p-value: < 2.2e-16
Grafico Modelo 2 (mom_hs)
base[,kid_score1 := predict(reg1)]
graph1 <- ggplot() +
geom_segment(base, mapping = aes(x=base$mom_iq, xend=base$mom_iq, y=base$kid_score, yend=base$kid_score1)) +
geom_point(base, mapping = aes(x=base$mom_iq, y=base$kid_score, color='Valores Reales')) +
geom_smooth(base %>% filter(mom_hs == 0), mapping = aes(x=mom_iq, y=kid_score1), color='green', method='lm',se=F, fullrange = T) +
geom_smooth(base %>% filter(mom_hs == 1), mapping = aes(x=mom_iq, y=kid_score1), color='cyan', method='lm',se=F, fullrange = T) +
theme_minimal() +
labs(x='Puntaje de los niños',y='IQ de la madre',color=NULL) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels = number_format(scale = 1))+ xlim(0,150)+ ylim(0,150) + geom_text(aes(x=10,y=55,label="Si mom_hs=1"),color="cyan") + geom_text(aes(x=10,y=17,label="Si mom_hs=0"),color="green")
graph1
Si madre NO termino la escuela
base<-base[base$mom_hs==0]
f02 <- formula(base$kid_score~base$mom_iq)
reg2<- lm(f02,data=base)
base[,kid_score2 := predict(reg2)]
summary(reg2)
##
## Call:
## lm(formula = f02, data = base)
##
## Residuals:
## Min 1Q Median 3Q Max
## -49.452 -9.868 0.661 13.289 43.149
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -11.4820 14.6011 -0.786 0.434
## base$mom_iq 0.9689 0.1574 6.154 2e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 19.07 on 91 degrees of freedom
## Multiple R-squared: 0.2939, Adjusted R-squared: 0.2861
## F-statistic: 37.87 on 1 and 91 DF, p-value: 1.997e-08
Grafico Modelo 3 (mom_hs == 0)
graph1 <- ggplot() +
geom_segment(base, mapping = aes(x=base$mom_iq, xend=base$mom_iq, y=base$kid_score, yend=base$kid_score2)) +
geom_point(base, mapping = aes(x=base$mom_iq, y=base$kid_score, color='Valores reales')) +
geom_smooth(base, mapping = aes(x=base$mom_iq, y=base$kid_score2, color='Valores predichos'), method='lm',se=F, fullrange = T) +
theme_minimal() +
labs(x='Puntaje de los niños',y='IQ de la madre',color=NULL) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels = number_format(scale = 1))+ xlim(0,150)+ ylim(-12,150)
graph1
Si la madre termina la escuela
base<-base[base$mom_hs==1]
f02 <- formula(base$kid_score~base$mom_iq)
reg2<- lm(f02,data=base)
base[,kid_score2 := predict(reg2)]
summary(reg2)
##
## Call:
## lm(formula = f02, data = base)
##
## Residuals:
## Min 1Q Median 3Q Max
## -52.092 -11.904 2.331 10.457 43.880
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 39.78620 6.66341 5.971 5.96e-09 ***
## base$mom_iq 0.48461 0.06452 7.511 5.24e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.66 on 339 degrees of freedom
## Multiple R-squared: 0.1427, Adjusted R-squared: 0.1402
## F-statistic: 56.42 on 1 and 339 DF, p-value: 5.237e-13
Grafico modelo 3 (mom_hs==1)
graph1 <- ggplot() +
geom_segment(base, mapping = aes(x=base$mom_iq, xend=base$mom_iq, y=base$kid_score, yend=base$kid_score2)) +
geom_point(base, mapping = aes(x=base$mom_iq, y=base$kid_score, color='Valores reales')) +
geom_smooth(base, mapping = aes(x=base$mom_iq, y=base$kid_score2, color='Valores predichos'), method='lm',se=F, fullrange = T) +
theme_minimal() +
labs(x='Puntaje de los niños',y='IQ de la madre',color=NULL) +
theme(legend.position = 'bottom') +
scale_y_continuous(labels = number_format(scale = 1))+ xlim(0,150)+ ylim(0,150)
graph1