######################################################
# Stat 6801 - Assignment 3
#
# Sarah Rathwell 
#
# Objective - Investigate GLM vs GAM on Fat data.
######################################################
rm(list=ls())

library(ggplot2)
library(GGally)
library(scales)
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-12. For overview type 'help("mgcv-package")'.
library(gridExtra)
library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggplot2':
## 
##     ggsave
library(faraway)
## 
## Attaching package: 'faraway'
## The following object is masked from 'package:GGally':
## 
##     happy
library(plyr)
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:faraway':
## 
##     ozone
library(car)
## 
## Attaching package: 'car'
## The following objects are masked from 'package:faraway':
## 
##     logit, vif
######################################################
cat(" Investigate GLM vs GAM on Fat data ",
    "Last modification:",date(),'\n')
##  Investigate GLM vs GAM on Fat data  Last modification: Sun Nov  6 17:55:52 2016
######################################################

# get data 

data(fat, package='faraway')
str(fat)
## 'data.frame':    252 obs. of  18 variables:
##  $ brozek : num  12.6 6.9 24.6 10.9 27.8 20.6 19 12.8 5.1 12 ...
##  $ siri   : num  12.3 6.1 25.3 10.4 28.7 20.9 19.2 12.4 4.1 11.7 ...
##  $ density: num  1.07 1.09 1.04 1.08 1.03 ...
##  $ age    : int  23 22 22 26 24 24 26 25 25 23 ...
##  $ weight : num  154 173 154 185 184 ...
##  $ height : num  67.8 72.2 66.2 72.2 71.2 ...
##  $ adipos : num  23.7 23.4 24.7 24.9 25.6 26.5 26.2 23.6 24.6 25.8 ...
##  $ free   : num  135 161 116 165 133 ...
##  $ neck   : num  36.2 38.5 34 37.4 34.4 39 36.4 37.8 38.1 42.1 ...
##  $ chest  : num  93.1 93.6 95.8 101.8 97.3 ...
##  $ abdom  : num  85.2 83 87.9 86.4 100 94.4 90.7 88.5 82.5 88.6 ...
##  $ hip    : num  94.5 98.7 99.2 101.2 101.9 ...
##  $ thigh  : num  59 58.7 59.6 60.1 63.2 66 58.4 60 62.9 63.1 ...
##  $ knee   : num  37.3 37.3 38.9 37.3 42.2 42 38.3 39.4 38.3 41.7 ...
##  $ ankle  : num  21.9 23.4 24 22.8 24 25.6 22.9 23.2 23.8 25 ...
##  $ biceps : num  32 30.5 28.8 32.4 32.2 35.7 31.9 30.5 35.9 35.6 ...
##  $ forearm: num  27.4 28.9 25.2 29.4 27.7 30.6 27.8 29 31.1 30 ...
##  $ wrist  : num  17.1 18.2 16.6 18.2 17.7 18.8 17.7 18.8 18.2 19.2 ...
head(fat)
##   brozek siri density age weight height adipos  free neck chest abdom
## 1   12.6 12.3  1.0708  23 154.25  67.75   23.7 134.9 36.2  93.1  85.2
## 2    6.9  6.1  1.0853  22 173.25  72.25   23.4 161.3 38.5  93.6  83.0
## 3   24.6 25.3  1.0414  22 154.00  66.25   24.7 116.0 34.0  95.8  87.9
## 4   10.9 10.4  1.0751  26 184.75  72.25   24.9 164.7 37.4 101.8  86.4
## 5   27.8 28.7  1.0340  24 184.25  71.25   25.6 133.1 34.4  97.3 100.0
## 6   20.6 20.9  1.0502  24 210.25  74.75   26.5 167.0 39.0 104.5  94.4
##     hip thigh knee ankle biceps forearm wrist
## 1  94.5  59.0 37.3  21.9   32.0    27.4  17.1
## 2  98.7  58.7 37.3  23.4   30.5    28.9  18.2
## 3  99.2  59.6 38.9  24.0   28.8    25.2  16.6
## 4 101.2  60.1 37.3  22.8   32.4    29.4  18.2
## 5 101.9  63.2 42.2  24.0   32.2    27.7  17.7
## 6 107.8  66.0 42.0  25.6   35.7    30.6  18.8
fat.test <- fat[ -c(1, 3, 8)]
head(fat.test)
##   siri age weight height adipos neck chest abdom   hip thigh knee ankle
## 1 12.3  23 154.25  67.75   23.7 36.2  93.1  85.2  94.5  59.0 37.3  21.9
## 2  6.1  22 173.25  72.25   23.4 38.5  93.6  83.0  98.7  58.7 37.3  23.4
## 3 25.3  22 154.00  66.25   24.7 34.0  95.8  87.9  99.2  59.6 38.9  24.0
## 4 10.4  26 184.75  72.25   24.9 37.4 101.8  86.4 101.2  60.1 37.3  22.8
## 5 28.7  24 184.25  71.25   25.6 34.4  97.3 100.0 101.9  63.2 42.2  24.0
## 6 20.9  24 210.25  74.75   26.5 39.0 104.5  94.4 107.8  66.0 42.0  25.6
##   biceps forearm wrist
## 1   32.0    27.4  17.1
## 2   30.5    28.9  18.2
## 3   28.8    25.2  16.6
## 4   32.4    29.4  18.2
## 5   32.2    27.7  17.7
## 6   35.7    30.6  18.8
######################################################
# 1a
######################################################

# pairs plots
# note higher correlation with adipose,abdomen,chest and very low with height and smaller measures 
# wrist etc.
# high correlation between some measures (e.g. adipose/weight)

ggpairs(cbind(fat.test[2:15],fat.test[1]), axisLabels = "none", 
        title = "Pairs Plots of Fat Data")

######################################################
# 1b
######################################################

# fit linear models

fat.lm <- lm(siri ~ age+weight+height+adipos+neck+chest+abdom+hip+thigh+knee+ankle+biceps+
               forearm+wrist, data = fat)
summary(fat.lm)
## 
## Call:
## lm(formula = siri ~ age + weight + height + adipos + neck + chest + 
##     abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, 
##     data = fat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1613  -2.8834  -0.0934   3.2166   9.9463 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -18.06781   17.39032  -1.039  0.29988    
## age           0.06219    0.03242   1.918  0.05626 .  
## weight       -0.08961    0.05385  -1.664  0.09743 .  
## height       -0.05600    0.11167  -0.502  0.61647    
## adipos        0.07189    0.30007   0.240  0.81087    
## neck         -0.47935    0.23577  -2.033  0.04316 *  
## chest        -0.03242    0.10557  -0.307  0.75902    
## abdom         0.94717    0.09225  10.267  < 2e-16 ***
## hip          -0.21302    0.14798  -1.440  0.15132    
## thigh         0.23078    0.14634   1.577  0.11614    
## knee          0.02787    0.24809   0.112  0.91065    
## ankle         0.16654    0.22408   0.743  0.45809    
## biceps        0.17655    0.17276   1.022  0.30785    
## forearm       0.45106    0.19957   2.260  0.02472 *  
## wrist        -1.62384    0.53618  -3.029  0.00273 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.314 on 237 degrees of freedom
## Multiple R-squared:  0.7491, Adjusted R-squared:  0.7343 
## F-statistic: 50.55 on 14 and 237 DF,  p-value: < 2.2e-16
fat.lmcooks <- cooks.distance(fat.lm)
fat.cooksdf <- mutate(as.data.frame(fat.lmcooks), outlier = 
                        ifelse(fat.lmcooks>=0.05,rownames(as.data.frame(fat.lmcooks)[]), 
                               NA))


ggplot(fat.cooksdf, aes(1:length(fat.lmcooks), fat.lmcooks))+ 
  geom_point() +
  labs(x = "Observation", y = "Cooks Distance") +
  ggtitle("Cooks Distance for Fat LM - iteration 1") +
  geom_text(aes(label = outlier), na.rm = T, hjust = -0.3, vjust = -0.5)

fat2 <- fat[-c(39, 42, 86),]  

fat.lm2 <- lm(siri ~ age+weight+height+adipos+neck+chest+abdom+hip+thigh+knee+ankle+biceps+
               forearm+wrist, data = fat2)
summary(fat.lm2)
## 
## Call:
## lm(formula = siri ~ age + weight + height + adipos + neck + chest + 
##     abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, 
##     data = fat2)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.394  -3.045  -0.253   2.930   9.742 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -28.845679  40.081420  -0.720  0.47244    
## age           0.063689   0.032775   1.943  0.05319 .  
## weight       -0.100794   0.111730  -0.902  0.36792    
## height        0.251036   0.549963   0.456  0.64848    
## adipos        0.725440   0.781725   0.928  0.35436    
## neck         -0.386519   0.234307  -1.650  0.10036    
## chest        -0.133262   0.110332  -1.208  0.22833    
## abdom         0.888759   0.093109   9.545  < 2e-16 ***
## hip          -0.190680   0.148039  -1.288  0.19900    
## thigh         0.189429   0.146792   1.290  0.19816    
## knee          0.007168   0.246949   0.029  0.97687    
## ankle        -0.079703   0.270812  -0.294  0.76878    
## biceps        0.150630   0.170839   0.882  0.37884    
## forearm       0.262843   0.207555   1.266  0.20664    
## wrist        -1.650352   0.541380  -3.048  0.00256 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.245 on 234 degrees of freedom
## Multiple R-squared:  0.7529, Adjusted R-squared:  0.7382 
## F-statistic: 50.94 on 14 and 234 DF,  p-value: < 2.2e-16
fat.lmcooks2 <- cooks.distance(fat.lm2)
fat.cooksdf2 <- mutate(as.data.frame(fat.lmcooks2), outlier = 
                         ifelse(fat.lmcooks2>=0.05,rownames(as.data.frame(fat.lmcooks2)[]), 
                                NA))


ggplot(fat.cooksdf2, aes(1:length(fat.lmcooks2), fat.lmcooks2))+ 
  geom_point() +
  labs(x = "Observation", y = "Cooks Distance") +
  ggtitle("Cooks Distance for Fat LM - iteration 2") +
  geom_text(aes(label = outlier), na.rm = T, hjust = -0.3, vjust = -0.5)

fat3 <- fat2[-218,]

fat.lm3 <- lm(siri ~ age+weight+height+adipos+neck+chest+abdom+hip+thigh+knee+ankle+biceps+
                forearm+wrist, data = fat3)
summary(fat.lm3)
## 
## Call:
## lm(formula = siri ~ age + weight + height + adipos + neck + chest + 
##     abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, 
##     data = fat3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2544  -3.0252  -0.1335   2.8534   9.5811 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -85.74952   46.77974  -1.833  0.06807 .  
## age           0.06222    0.03248   1.915  0.05667 .  
## weight       -0.26219    0.13104  -2.001  0.04657 *  
## height        0.96341    0.62668   1.537  0.12557    
## adipos        1.61581    0.86580   1.866  0.06326 .  
## neck         -0.33543    0.23324  -1.438  0.15174    
## chest        -0.11448    0.10964  -1.044  0.29746    
## abdom         0.90862    0.09267   9.805  < 2e-16 ***
## hip          -0.19145    0.14670  -1.305  0.19315    
## thigh         0.23140    0.14660   1.578  0.11582    
## knee          0.02471    0.24483   0.101  0.91969    
## ankle        -0.04727    0.26873  -0.176  0.86052    
## biceps        0.15675    0.16931   0.926  0.35552    
## forearm       0.29691    0.20621   1.440  0.15124    
## wrist        -1.51537    0.53967  -2.808  0.00541 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.207 on 233 degrees of freedom
## Multiple R-squared:  0.7578, Adjusted R-squared:  0.7433 
## F-statistic: 52.08 on 14 and 233 DF,  p-value: < 2.2e-16
fat.lmcooks3 <- cooks.distance(fat.lm3)
fat.cooksdf3 <- mutate(as.data.frame(fat.lmcooks3), outlier = 
                         ifelse(fat.lmcooks3>=0.05,rownames(as.data.frame(fat.lmcooks3)[]), 
                                NA))

ggplot(fat.cooksdf3, aes(1:length(fat.lmcooks3), fat.lmcooks3))+ 
  geom_point() +
  labs(x = "Observation", y = "Cooks Distance") +
  ggtitle("Cooks Distance for Fat LM - iteration 3") 

gg1 <- ggplot(fat.cooksdf, aes(1:length(fat.lmcooks), fat.lmcooks))+ 
          geom_point() +
          labs(x = "Observation", y = "Cooks Distance") +
          ggtitle("Iteration 1") +
          geom_text(aes(label = outlier), na.rm = T, hjust = -0.3, vjust = -0.5)
gg2 <- ggplot(fat.cooksdf2, aes(1:length(fat.lmcooks2), fat.lmcooks2))+ 
         geom_point() +
         labs(x = "Observation", y = "Cooks Distance") +
         ggtitle("Iteration 2") +
         geom_text(aes(label = outlier), na.rm = T, hjust = -0.3, vjust = -0.5)
gg3 <- ggplot(fat.cooksdf3, aes(1:length(fat.lmcooks3), fat.lmcooks3))+ 
         geom_point() +
         labs(x = "Observation", y = "Cooks Distance") +
         ggtitle("Iteration 3") 

p <- plot_grid(gg1,gg2,gg3, ncol = 3, nrow = 1)
title2 <- ggdraw() + draw_label("Cooks Distance for Linear Fat Models", fontface='bold')
plot_grid(title2, p, ncol=1, rel_heights=c(0.1, 1))

#influential points:
# 39 -- very high weight (and related measures adipose/neck/chest/abdomen/hip/thigh/biceps)
# 42 -- very low height with higher bodyfat
# 86 -- possible high age? 
# 218 -- very low bodyfat measure

fat.test[c(39,42,86,218),]
##     siri age weight height adipos neck chest abdom   hip thigh knee ankle
## 39  35.2  46 363.15  72.25   48.9 51.2 136.2 148.1 147.7  87.3 49.1  29.6
## 42  32.9  44 205.00  29.50   29.9 36.6 106.0 104.3 115.5  70.6 42.5  23.7
## 86  26.6  67 167.00  67.50   26.0 36.5  98.9  89.7  96.2  54.7 37.8  33.7
## 218  7.5  51 154.50  70.00   22.2 36.9  93.3  81.5  94.4  54.7 39.0  22.6
##     biceps forearm wrist
## 39    45.0    29.0  21.4
## 42    33.6    28.7  17.4
## 86    32.4    27.7  18.2
## 218   27.5    25.9  18.6
# showing significant predictors abdomen (1), wrist (2), and weight (3)
# R^2 0.7578
Anova(fat.lm3, type = 3)
## Anova Table (Type III tests)
## 
## Response: siri
##             Sum Sq  Df F value    Pr(>F)    
## (Intercept)   59.5   1  3.3601  0.068071 .  
## age           64.9   1  3.6685  0.056673 .  
## weight        70.9   1  4.0032  0.046575 *  
## height        41.8   1  2.3633  0.125572    
## adipos        61.6   1  3.4829  0.063261 .  
## neck          36.6   1  2.0682  0.151744    
## chest         19.3   1  1.0904  0.297464    
## abdom       1701.6   1 96.1414 < 2.2e-16 ***
## hip           30.1   1  1.7033  0.193147    
## thigh         44.1   1  2.4915  0.115817    
## knee           0.2   1  0.0102  0.919685    
## ankle          0.5   1  0.0309  0.860525    
## biceps        15.2   1  0.8571  0.355517    
## forearm       36.7   1  2.0733  0.151239    
## wrist        139.5   1  7.8847  0.005408 ** 
## Residuals   4123.8 233                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fat.lm3)
## 
## Call:
## lm(formula = siri ~ age + weight + height + adipos + neck + chest + 
##     abdom + hip + thigh + knee + ankle + biceps + forearm + wrist, 
##     data = fat3)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2544  -3.0252  -0.1335   2.8534   9.5811 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -85.74952   46.77974  -1.833  0.06807 .  
## age           0.06222    0.03248   1.915  0.05667 .  
## weight       -0.26219    0.13104  -2.001  0.04657 *  
## height        0.96341    0.62668   1.537  0.12557    
## adipos        1.61581    0.86580   1.866  0.06326 .  
## neck         -0.33543    0.23324  -1.438  0.15174    
## chest        -0.11448    0.10964  -1.044  0.29746    
## abdom         0.90862    0.09267   9.805  < 2e-16 ***
## hip          -0.19145    0.14670  -1.305  0.19315    
## thigh         0.23140    0.14660   1.578  0.11582    
## knee          0.02471    0.24483   0.101  0.91969    
## ankle        -0.04727    0.26873  -0.176  0.86052    
## biceps        0.15675    0.16931   0.926  0.35552    
## forearm       0.29691    0.20621   1.440  0.15124    
## wrist        -1.51537    0.53967  -2.808  0.00541 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.207 on 233 degrees of freedom
## Multiple R-squared:  0.7578, Adjusted R-squared:  0.7433 
## F-statistic: 52.08 on 14 and 233 DF,  p-value: < 2.2e-16
######################################################
# 1c
######################################################

# fitting smoothed GAMs

fat.gam <- gam(siri ~ s(age)+s(weight)+s(height)+s(adipos)+s(neck)+s(chest)+s(abdom)+s(hip)+
                s(thigh)+s(knee)+s(ankle)+s(biceps)+s(forearm)+s(wrist), data = fat)
summary(fat.gam)
## 
## Family: gaussian 
## Link function: identity 
## 
## Formula:
## siri ~ s(age) + s(weight) + s(height) + s(adipos) + s(neck) + 
##     s(chest) + s(abdom) + s(hip) + s(thigh) + s(knee) + s(ankle) + 
##     s(biceps) + s(forearm) + s(wrist)
## 
## Parametric coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  19.1508     0.2431   78.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##              edf Ref.df      F  p-value    
## s(age)     5.471  6.620  2.168 0.041759 *  
## s(weight)  1.000  1.000  0.214 0.644386    
## s(height)  1.000  1.000  0.129 0.719505    
## s(adipos)  2.454  3.182  0.907 0.491477    
## s(neck)    1.683  2.122  2.509 0.103554    
## s(chest)   1.000  1.000  0.498 0.481020    
## s(abdom)   6.573  7.439 15.317  < 2e-16 ***
## s(hip)     6.913  7.701  3.106 0.003829 ** 
## s(thigh)   1.000  1.000  0.945 0.332109    
## s(knee)    1.460  1.793  0.363 0.577900    
## s(ankle)   2.850  3.513  0.855 0.603879    
## s(biceps)  5.011  6.081  2.174 0.047331 *  
## s(forearm) 1.000  1.000  1.893 0.170292    
## s(wrist)   1.776  2.228  7.704 0.000362 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Rank: 125/127
## R-sq.(adj) =  0.787   Deviance explained = 82.1%
## GCV =  17.72  Scale est. = 14.894    n = 252
# predictors - abdomen, wrist, hip, age, biceps

fat.gamcooks <- cooks.distance(fat.gam)
fat.gcooksdf <- mutate(as.data.frame(fat.gamcooks), outlier = ifelse(fat.gamcooks>=0.05,rownames(as.data.frame(fat.gamcooks)[]), NA))


ggplot(fat.gcooksdf, aes(1:length(fat.gamcooks), fat.gamcooks))+ 
  geom_point() +
  labs(x = "Observation", y = "Cooks Distance") +
  ggtitle("Cooks Distance for Additive Fat Model") +
  geom_text(aes(label = outlier), na.rm = T, hjust = -0.3, vjust = -0.5)

fat.test[c(39,42,182,192,216),]
##     siri age weight height adipos neck chest abdom   hip thigh knee ankle
## 39  35.2  46 363.15  72.25   48.9 51.2 136.2 148.1 147.7  87.3 49.1  29.6
## 42  32.9  44 205.00  29.50   29.9 36.6 106.0 104.3 115.5  70.6 42.5  23.7
## 182  0.0  40 118.50  68.00   18.1 33.8  79.3  69.4  85.0  47.2 33.5  20.2
## 192 38.1  42 244.25  76.00   29.8 41.8 115.2 113.7 112.4  68.5 45.0  25.5
## 216 47.5  51 219.00  64.00   37.6 41.2 119.8 122.1 112.8  62.5 36.9  23.6
##     biceps forearm wrist
## 39    45.0    29.0  21.4
## 42    33.6    28.7  17.4
## 182   27.7    24.6  16.5
## 192   37.1    31.2  19.9
## 216   34.7    29.1  18.4
# 39 and 42 were identified previously
# 182 seems to have a missing BF measurement(0) and is low weight and low measures
# 192 has large arm measurements 
# 216 has a very high BF measurement (nearly 50%)

######################################################
# 1d
######################################################

par(mfrow=c(1,5),oma = c(0, 0, 2, 0))

plot(fat.gam, residuals = T, select = 7)
plot(fat.gam, residuals = T, select = 14)
plot(fat.gam, residuals = T, select = 8)
plot(fat.gam, residuals = T, select = 12)
plot(fat.gam, residuals = T, select = 1)

mtext('Predictor Transformations from Additive Fat Model', outer = T, cex = 1.5)

######################################################
# 1e
######################################################

summary(fat.gam)$dev.expl
## [1] 0.8205407
# R^2 - 0.821

fat.gam2 <- gam(siri ~ s(abdom), data = fat)
fat.gam3 <- gam(siri ~ abdom, data = fat)
 
anova(fat.gam3, fat.gam2, test="F")
## Analysis of Deviance Table
## 
## Model 1: siri ~ abdom
## Model 2: siri ~ s(abdom)
##   Resid. Df Resid. Dev     Df Deviance      F    Pr(>F)    
## 1    250.00     5947.5                                     
## 2    245.87     5414.0 4.1348   533.48 5.8593 0.0001326 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# there seems to be a significant difference in the models implying a change in trend

fat.gam3 <- gam(siri ~ s(wrist), data = fat)
fat.gam4 <- gam(siri ~ wrist, data = fat)

anova(fat.gam4, fat.gam3, test="F")
## Analysis of Deviance Table
## 
## Model 1: siri ~ wrist
## Model 2: siri ~ s(wrist)
##   Resid. Df Resid. Dev         Df   Deviance      F    Pr(>F)    
## 1       250      15468                                           
## 2       250      15468 1.0302e-09 1.0397e-07 1.6313 1.047e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
fat.gam5 <- gam(siri ~ s(hip), data = fat)
fat.gam6 <- gam(siri ~ hip, data = fat)

anova(fat.gam6, fat.gam5, test="F")
## Analysis of Deviance Table
## 
## Model 1: siri ~ hip
## Model 2: siri ~ s(hip)
##   Resid. Df Resid. Dev     Df Deviance      F   Pr(>F)   
## 1    250.00      10708                                   
## 2    248.53      10236 1.4715   471.95 7.7875 0.001819 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
######################################################
# 1f
######################################################

fat.pred <- predict.gam(fat.gam, type = "terms")
abdom.pred <- fat.pred[,7]

slope <- (max(abdom.pred) - min(abdom.pred)) / (max(fat$abdom) - min(fat$abdom))
inter <- abdom.pred[(252/2)] - slope * fat$abdom[(252/2)]

par(oma=c(0,0,2,0))
layout(matrix(c(1,2), 1, 2, byrow = TRUE), 
       widths=c(2,1), heights=c(1,1))

plot(fat.gam, residuals = F, select = 7,col = 'blue', sub = "Full Range")
abline(inter, slope, lwd = 2, lty = 3, col = 'blue')
legend("topleft", legend = c("Transformed", "Linear"), lty = c(1,3), lwd = c(1,2), bty = "n")


plot(fat.gam, residuals = F, select = 7,col = 'blue', sub = "Partial Range",
     ylim = c(-20,0), xlim = c(80, 90))
## Warning in rug(as.numeric(P$raw), ...): some values will be clipped
abline(inter, slope, lwd = 2, lty = 3, col = 'blue')
legend("topleft", legend = c("Transformed", "Linear"), lty = c(1,3), lwd = c(1,2), bty = "n")

mtext('Abdom Transformations from Additive Fat Model: Full Range vs. Partial', 
      outer = T, cex = 1.5)

par(mfrow=c(1,2),oma=c(0,0,2,0))

plot(residuals(fat.gam)~predict.gam(fat.gam), xlab = "Predicted", ylab = "Residuals")
abline(h=0)
qqnorm(residuals(fat.gam), main="")
qqline(residuals(fat.gam))

mtext("Plotted Residuals for Additive Fat Model", outer = T, cex = 1.5)