Survey Data Source: National Household Education Surveys (NHES) Program 2019: Parent and Family Involvement in Education (PFI)
Outcome Variable (Binary) : enjoy_school; Item 50: SEENJOY “How much do you agree or disagree with ‘This child enjoys school’?”
Variable 1: overall_grades; Item 51: SEGRADES “Overall, across all subjects, what grades does this child get?”
Variable 2 (Continuous) : times_participated; Item 61: FSFREQ “During this school year, how many times has any adult in the household gone to meetings or participated in activities at this child’s school?”
Variable 3 (Continuous) : hours_hw_perweek; Item 66: FHWKHRS “In an average week, how many hours does this child spend on homework outside of school?”
Variable 4: parent_educ; PARGRADEX “Parent/guardian highest education”
gamlinear <- gam(enjoy_school ~ overall_grades + times_participated + hours_hw_perweek + parent_educ,
data = hw9data,
family = quasibinomial)
summary(gamlinear)
##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## enjoy_school ~ overall_grades + times_participated + hours_hw_perweek +
## parent_educ
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.861028 0.102529 27.905 < 2e-16 ***
## overall_grades2 Mostly Bs -0.879141 0.068308 -12.870 < 2e-16 ***
## overall_grades3 Mostly Cs -2.061665 0.082371 -25.029 < 2e-16 ***
## overall_grades4 Mostly Ds or Lower -3.199761 0.156242 -20.480 < 2e-16 ***
## times_participated 0.013042 0.003828 3.407 0.000659 ***
## hours_hw_perweek -0.003279 0.005338 -0.614 0.539006
## parent_educ0Less than HS 0.439178 0.165720 2.650 0.008056 **
## parent_educ2Some College -0.063182 0.097532 -0.648 0.517122
## parent_educ3College Grad -0.305212 0.098393 -3.102 0.001926 **
## parent_educ4Grad School -0.286957 0.103544 -2.771 0.005590 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
##
## R-sq.(adj) = 0.0927 Deviance explained = 9.86%
## GCV = 0.64371 Scale est. = 1.0008 n = 13045
# hist(hw9data$times_participated)
# hist(hw9data$hours_hw_perweek)
gamsmoothed <- gam(enjoy_school ~ overall_grades + s(times_participated) + s(hours_hw_perweek) + parent_educ,
data = hw9data,
family = quasibinomial)
summary(gamsmoothed)
##
## Family: quasibinomial
## Link function: logit
##
## Formula:
## enjoy_school ~ overall_grades + s(times_participated) + s(hours_hw_perweek) +
## parent_educ
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.97590 0.09708 30.655 < 2e-16 ***
## overall_grades2 Mostly Bs -0.88564 0.06867 -12.898 < 2e-16 ***
## overall_grades3 Mostly Cs -2.04698 0.08289 -24.696 < 2e-16 ***
## overall_grades4 Mostly Ds or Lower -3.16610 0.15685 -20.185 < 2e-16 ***
## parent_educ0Less than HS 0.46371 0.16625 2.789 0.005291 **
## parent_educ2Some College -0.08657 0.09796 -0.884 0.376833
## parent_educ3College Grad -0.34452 0.09921 -3.473 0.000517 ***
## parent_educ4Grad School -0.33610 0.10471 -3.210 0.001332 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(times_participated) 7.008 7.912 4.389 3.1e-05 ***
## s(hours_hw_perweek) 4.309 5.228 3.376 0.00459 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0958 Deviance explained = 10.3%
## GCV = 0.64115 Scale est. = 1.0015 n = 13045
anova(gamlinear,
gamsmoothed,
test = "Chisq")
Yes! The GAM is fitting the data better than the purely linear model.
plot(gamsmoothed)
options(survey.lonely.psu = "adjust")
hw9design <- svydesign(ids = ~PPSU,
strata = ~PSTRATUM,
weights = ~FPWT,
data = hw9data,
nest = TRUE)
svygamlinear <- svyglm (enjoy_school ~ overall_grades + times_participated + hours_hw_perweek + parent_educ,
hw9design,
family = quasibinomial(link="logit"))
summary(svygamlinear)
##
## Call:
## svyglm(formula = enjoy_school ~ overall_grades + times_participated +
## hours_hw_perweek + parent_educ, design = hw9design, family = quasibinomial(link = "logit"))
##
## Survey design:
## svydesign(ids = ~PPSU, strata = ~PSTRATUM, weights = ~FPWT, data = hw9data,
## nest = TRUE)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.018266 0.145204 20.786 < 2e-16 ***
## overall_grades2 Mostly Bs -0.847372 0.097779 -8.666 < 2e-16 ***
## overall_grades3 Mostly Cs -2.092195 0.125927 -16.614 < 2e-16 ***
## overall_grades4 Mostly Ds or Lower -3.143865 0.229992 -13.669 < 2e-16 ***
## times_participated 0.015042 0.005952 2.527 0.01150 *
## hours_hw_perweek -0.010695 0.008728 -1.225 0.22044
## parent_educ0Less than HS 0.456628 0.258491 1.767 0.07733 .
## parent_educ2Some College -0.053186 0.137343 -0.387 0.69858
## parent_educ3College Grad -0.292910 0.138771 -2.111 0.03481 *
## parent_educ4Grad School -0.397584 0.147161 -2.702 0.00691 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.005668)
##
## Number of Fisher Scoring iterations: 5
svygamsmoothed <- svyglm (enjoy_school ~ overall_grades + bs(times_participated) + bs(hours_hw_perweek) + parent_educ,
hw9design,
family = quasibinomial(link="logit"))
summary(svygamsmoothed)
##
## Call:
## svyglm(formula = enjoy_school ~ overall_grades + bs(times_participated) +
## bs(hours_hw_perweek) + parent_educ, design = hw9design, family = quasibinomial(link = "logit"))
##
## Survey design:
## svydesign(ids = ~PPSU, strata = ~PSTRATUM, weights = ~FPWT, data = hw9data,
## nest = TRUE)
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.71811 0.16850 16.132 < 2e-16 ***
## overall_grades2 Mostly Bs -0.86310 0.09833 -8.778 < 2e-16 ***
## overall_grades3 Mostly Cs -2.10704 0.12689 -16.605 < 2e-16 ***
## overall_grades4 Mostly Ds or Lower -3.13875 0.22976 -13.661 < 2e-16 ***
## bs(times_participated)1 1.59987 0.58154 2.751 0.00595 **
## bs(times_participated)2 -1.00503 1.09182 -0.921 0.35732
## bs(times_participated)3 1.33867 0.75361 1.776 0.07570 .
## bs(hours_hw_perweek)1 2.20094 0.78610 2.800 0.00512 **
## bs(hours_hw_perweek)2 -10.52913 3.32799 -3.164 0.00156 **
## bs(hours_hw_perweek)3 18.70686 9.42576 1.985 0.04720 *
## parent_educ0Less than HS 0.44736 0.25609 1.747 0.08068 .
## parent_educ2Some College -0.08225 0.13762 -0.598 0.55009
## parent_educ3College Grad -0.33650 0.13962 -2.410 0.01596 *
## parent_educ4Grad School -0.44497 0.14854 -2.996 0.00274 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 1.003865)
##
## Number of Fisher Scoring iterations: 7
anova(svygamlinear,
svygamsmoothed,
test = "Chisq")
## Working (Rao-Scott+F) LRT for bs(times_participated) bs(hours_hw_perweek) - times_participated hours_hw_perweek
## in svyglm(formula = enjoy_school ~ overall_grades + bs(times_participated) +
## bs(hours_hw_perweek) + parent_educ, design = hw9design, family = quasibinomial(link = "logit"))
## Working 2logLR = 26.19968 p= 0.00011236
## (scale factors: 1.4 1.2 1 0.38 ); denominator df= 13029
library(emmeans)
rg <- ref_grid(svygamsmoothed, at = list(hours_hw_perweek = seq(0, 75)))
marg_logit <- emmeans(object = rg,
~hours_hw_perweek+parent_educ,
type="response" )
marg_logit %>%
as.data.frame() %>%
ggplot()+
geom_line(aes(x=hours_hw_perweek, y=prob, color=parent_educ, group=parent_educ))
plot(svygamsmoothed)