library (ISLR)
library (splines)
library(gam)
attach (Wage )

Empat Kandidat Model GAM

## 'year' dan 'age' linear
gam_1 = gam(wage ~ year + age + education ,data =Wage)
summary(gam_1)
## 
## Call: gam(formula = wage ~ year + age + education, data = Wage)
## Deviance Residuals:
##      Min       1Q   Median       3Q      Max 
## -113.323  -19.521   -3.964   14.438  219.172 
## 
## (Dispersion Parameter for gaussian family taken to be 1287.767)
## 
##     Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3854286 on 2993 degrees of freedom
## AIC: 30004.62 
## 
## Number of Local Scoring Iterations: 2 
## 
## Anova for Parametric Effects
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## year         1   22434   22434  17.421  3.08e-05 ***
## age          1  195045  195045 151.460 < 2.2e-16 ***
## education    4 1150320  287580 223.317 < 2.2e-16 ***
## Residuals 2993 3854286    1288                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 'year' linear dan 'age' non-linear
gam_2 = gam(wage ~ year +s(age ,5) + education ,data =Wage)
summary(gam_2)
## 
## Call: gam(formula = wage ~ year + s(age, 5) + education, data = Wage)
## Deviance Residuals:
##      Min       1Q   Median       3Q      Max 
## -119.959  -19.647   -3.199   13.969  213.562 
## 
## (Dispersion Parameter for gaussian family taken to be 1235.812)
## 
##     Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3693842 on 2989 degrees of freedom
## AIC: 29885.06 
## 
## Number of Local Scoring Iterations: NA 
## 
## Anova for Parametric Effects
##             Df  Sum Sq Mean Sq F value    Pr(>F)    
## year         1   27154   27154  21.973  2.89e-06 ***
## s(age, 5)    1  194535  194535 157.415 < 2.2e-16 ***
## education    4 1069081  267270 216.271 < 2.2e-16 ***
## Residuals 2989 3693842    1236                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Anova for Nonparametric Effects
##             Npar Df Npar F     Pr(F)    
## (Intercept)                             
## year                                    
## s(age, 5)         4  32.46 < 2.2e-16 ***
## education                               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 'year' non-linear dan 'age' linear
gam_3 = gam(wage ~ s(year ,4) + age + education ,data =Wage)
summary(gam_3)
## 
## Call: gam(formula = wage ~ s(year, 4) + age + education, data = Wage)
## Deviance Residuals:
##      Min       1Q   Median       3Q      Max 
## -112.602  -19.551   -3.711   14.353  219.022 
## 
## (Dispersion Parameter for gaussian family taken to be 1287.453)
## 
##     Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3849484 on 2990 degrees of freedom
## AIC: 30006.88 
## 
## Number of Local Scoring Iterations: NA 
## 
## Anova for Parametric Effects
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## s(year, 4)    1   22434   22434  17.425 3.073e-05 ***
## age           1  195943  195943 152.194 < 2.2e-16 ***
## education     4 1150553  287638 223.417 < 2.2e-16 ***
## Residuals  2990 3849484    1287                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Anova for Nonparametric Effects
##             Npar Df Npar F  Pr(F)
## (Intercept)                      
## s(year, 4)        3 1.2433 0.2923
## age                              
## education
## 'year' non-linear dan 'age' non-linear
gam_4 = gam(wage ~ s(year ,4) + s(age ,5) + education ,data =Wage)
summary(gam_4)
## 
## Call: gam(formula = wage ~ s(year, 4) + s(age, 5) + education, data = Wage)
## Deviance Residuals:
##     Min      1Q  Median      3Q     Max 
## -119.43  -19.70   -3.33   14.17  213.48 
## 
## (Dispersion Parameter for gaussian family taken to be 1235.69)
## 
##     Null Deviance: 5222086 on 2999 degrees of freedom
## Residual Deviance: 3689770 on 2986 degrees of freedom
## AIC: 29887.75 
## 
## Number of Local Scoring Iterations: NA 
## 
## Anova for Parametric Effects
##              Df  Sum Sq Mean Sq F value    Pr(>F)    
## s(year, 4)    1   27162   27162  21.981 2.877e-06 ***
## s(age, 5)     1  195338  195338 158.081 < 2.2e-16 ***
## education     4 1069726  267432 216.423 < 2.2e-16 ***
## Residuals  2986 3689770    1236                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Anova for Nonparametric Effects
##             Npar Df Npar F  Pr(F)    
## (Intercept)                          
## s(year, 4)        3  1.086 0.3537    
## s(age, 5)         4 32.380 <2e-16 ***
## education                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
par (mfrow =c(1 ,3) )
plot.Gam(gam_1 , se=TRUE ,col =" blue ")

plot.Gam(gam_2 , se=TRUE ,col =" red ")

plot.Gam(gam_3 , se=TRUE ,col =" green ")

plot.Gam(gam_4 , se=TRUE ,col =" purple ")

Perbandingan Model Melalui ANOVA

anova(gam_1, gam_2, gam_3, gam_4,test ="F")
## Analysis of Deviance Table
## 
## Model 1: wage ~ year + age + education
## Model 2: wage ~ year + s(age, 5) + education
## Model 3: wage ~ s(year, 4) + age + education
## Model 4: wage ~ s(year, 4) + s(age, 5) + education
##   Resid. Df Resid. Dev      Df Deviance       F    Pr(>F)    
## 1      2993    3854286                                       
## 2      2989    3693842  3.9997   160445  32.463 < 2.2e-16 ***
## 3      2990    3849484 -0.9997  -155642 125.994 < 2.2e-16 ***
## 4      2986    3689770  3.9997   159714  32.315 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1