Empat Kandidat Model GAM
## 'year' dan 'age' linear
gam_1 = gam(wage ~ year + age + education ,data =Wage)
summary(gam_1)
##
## Call: gam(formula = wage ~ year + age + education, data = Wage)
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -113.323 -19.521 -3.964 14.438 219.172
##
## (Dispersion Parameter for gaussian family taken to be 1287.767)
##
## Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3854286 on 2993 degrees of freedom
## AIC: 30004.62
##
## Number of Local Scoring Iterations: 2
##
## Anova for Parametric Effects
## Df Sum Sq Mean Sq F value Pr(>F)
## year 1 22434 22434 17.421 3.08e-05 ***
## age 1 195045 195045 151.460 < 2.2e-16 ***
## education 4 1150320 287580 223.317 < 2.2e-16 ***
## Residuals 2993 3854286 1288
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 'year' linear dan 'age' non-linear
gam_2 = gam(wage ~ year +s(age ,5) + education ,data =Wage)
summary(gam_2)
##
## Call: gam(formula = wage ~ year + s(age, 5) + education, data = Wage)
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -119.959 -19.647 -3.199 13.969 213.562
##
## (Dispersion Parameter for gaussian family taken to be 1235.812)
##
## Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3693842 on 2989 degrees of freedom
## AIC: 29885.06
##
## Number of Local Scoring Iterations: NA
##
## Anova for Parametric Effects
## Df Sum Sq Mean Sq F value Pr(>F)
## year 1 27154 27154 21.973 2.89e-06 ***
## s(age, 5) 1 194535 194535 157.415 < 2.2e-16 ***
## education 4 1069081 267270 216.271 < 2.2e-16 ***
## Residuals 2989 3693842 1236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Anova for Nonparametric Effects
## Npar Df Npar F Pr(F)
## (Intercept)
## year
## s(age, 5) 4 32.46 < 2.2e-16 ***
## education
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 'year' non-linear dan 'age' linear
gam_3 = gam(wage ~ s(year ,4) + age + education ,data =Wage)
summary(gam_3)
##
## Call: gam(formula = wage ~ s(year, 4) + age + education, data = Wage)
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -112.602 -19.551 -3.711 14.353 219.022
##
## (Dispersion Parameter for gaussian family taken to be 1287.453)
##
## Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3849484 on 2990 degrees of freedom
## AIC: 30006.88
##
## Number of Local Scoring Iterations: NA
##
## Anova for Parametric Effects
## Df Sum Sq Mean Sq F value Pr(>F)
## s(year, 4) 1 22434 22434 17.425 3.073e-05 ***
## age 1 195943 195943 152.194 < 2.2e-16 ***
## education 4 1150553 287638 223.417 < 2.2e-16 ***
## Residuals 2990 3849484 1287
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Anova for Nonparametric Effects
## Npar Df Npar F Pr(F)
## (Intercept)
## s(year, 4) 3 1.2433 0.2923
## age
## education
## 'year' non-linear dan 'age' non-linear
gam_4 = gam(wage ~ s(year ,4) + s(age ,5) + education ,data =Wage)
summary(gam_4)
##
## Call: gam(formula = wage ~ s(year, 4) + s(age, 5) + education, data = Wage)
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -119.43 -19.70 -3.33 14.17 213.48
##
## (Dispersion Parameter for gaussian family taken to be 1235.69)
##
## Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3689770 on 2986 degrees of freedom
## AIC: 29887.75
##
## Number of Local Scoring Iterations: NA
##
## Anova for Parametric Effects
## Df Sum Sq Mean Sq F value Pr(>F)
## s(year, 4) 1 27162 27162 21.981 2.877e-06 ***
## s(age, 5) 1 195338 195338 158.081 < 2.2e-16 ***
## education 4 1069726 267432 216.423 < 2.2e-16 ***
## Residuals 2986 3689770 1236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Anova for Nonparametric Effects
## Npar Df Npar F Pr(F)
## (Intercept)
## s(year, 4) 3 1.086 0.3537
## s(age, 5) 4 32.380 <2e-16 ***
## education
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par (mfrow =c(1 ,3) )
plot.Gam(gam_1 , se=TRUE ,col =" blue ")

plot.Gam(gam_2 , se=TRUE ,col =" red ")

plot.Gam(gam_3 , se=TRUE ,col =" green ")

plot.Gam(gam_4 , se=TRUE ,col =" purple ")

Perbandingan Model Melalui ANOVA
anova(gam_1, gam_2, gam_3, gam_4,test ="F")
## Analysis of Deviance Table
##
## Model 1: wage ~ year + age + education
## Model 2: wage ~ year + s(age, 5) + education
## Model 3: wage ~ s(year, 4) + age + education
## Model 4: wage ~ s(year, 4) + s(age, 5) + education
## Resid. Df Resid. Dev Df Deviance F Pr(>F)
## 1 2993 3854286
## 2 2989 3693842 3.9997 160445 32.463 < 2.2e-16 ***
## 3 2990 3849484 -0.9997 -155642 125.994 < 2.2e-16 ***
## 4 2986 3689770 3.9997 159714 32.315 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1