Using Adjustment in Multivariate Analysis

Adjustment is the process of adding addition of a third variable to a regression of a pair of variables to understand the impact on the models. If the first two are highly correlated, another variable may reduce the impact of that correlation by a change in the coefficients.

For this exercise we use the Student Performance dataset from the UCI Machine Learning Dataset Repository (https://archive.ics.uci.edu/ml/datasets/Student+Performance)

glimpse(s)

Observations: 395
Variables: 33
$ school     <chr> "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP", "GP...
$ sex        <chr> "F", "F", "F", "F", "F", "M", "M", "F", "M", "M", "F", "F", "M"...
$ age        <dbl> 18, 17, 15, 15, 16, 16, 16, 17, 15, 15, 15, 15, 15, 15, 15, 16,...
$ address    <chr> "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U", "U"...
$ famsize    <chr> "GT3", "GT3", "LE3", "GT3", "GT3", "LE3", "LE3", "GT3", "LE3", ...
$ Pstatus    <chr> "A", "T", "T", "T", "T", "T", "T", "A", "A", "T", "T", "T", "T"...
$ Medu       <dbl> 4, 1, 1, 4, 3, 4, 2, 4, 3, 3, 4, 2, 4, 4, 2, 4, 4, 3, 3, 4, 4, ...
$ Fedu       <dbl> 4, 1, 1, 2, 3, 3, 2, 4, 2, 4, 4, 1, 4, 3, 2, 4, 4, 3, 2, 3, 3, ...
$ Mjob       <chr> "at_home", "at_home", "at_home", "health", "other", "services",...
$ Fjob       <chr> "teacher", "other", "other", "services", "other", "other", "oth...
$ reason     <chr> "course", "course", "other", "home", "home", "reputation", "hom...
$ guardian   <chr> "mother", "father", "mother", "mother", "father", "mother", "mo...
$ traveltime <dbl> 2, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 3, 1, 2, 1, 1, 1, 3, 1, 1, 1, ...
$ studytime  <dbl> 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 1, 2, 3, 1, 3, 2, 1, 1, 2, ...
$ failures   <dbl> 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 0, 0, ...
$ schoolsup  <chr> "yes", "no", "yes", "no", "no", "no", "no", "yes", "no", "no", ...
$ famsup     <chr> "no", "yes", "no", "yes", "yes", "yes", "no", "yes", "yes", "ye...
$ paid       <chr> "no", "no", "yes", "yes", "yes", "yes", "no", "no", "yes", "yes...
$ activities <chr> "no", "no", "no", "yes", "no", "yes", "no", "no", "no", "yes", ...
$ nursery    <chr> "yes", "no", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "...
$ higher     <chr> "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", "yes", ...
$ internet   <chr> "no", "yes", "yes", "yes", "no", "yes", "yes", "no", "yes", "ye...
$ romantic   <chr> "no", "no", "no", "yes", "no", "no", "no", "no", "no", "no", "n...
$ famrel     <dbl> 4, 5, 4, 3, 4, 5, 4, 4, 4, 5, 3, 5, 4, 5, 4, 4, 3, 5, 5, 3, 4, ...
$ freetime   <dbl> 3, 3, 3, 2, 3, 4, 4, 1, 2, 5, 3, 2, 3, 4, 5, 4, 2, 3, 5, 1, 4, ...
$ goout      <dbl> 4, 3, 2, 2, 2, 2, 4, 4, 2, 1, 3, 2, 3, 3, 2, 4, 3, 2, 5, 3, 1, ...
$ Dalc       <dbl> 1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, ...
$ Walc       <dbl> 1, 1, 3, 1, 2, 2, 1, 1, 1, 1, 2, 1, 3, 2, 1, 2, 2, 1, 4, 3, 1, ...
$ health     <dbl> 3, 3, 3, 5, 5, 5, 3, 1, 1, 5, 2, 4, 5, 3, 3, 2, 2, 4, 5, 5, 1, ...
$ absences   <dbl> 6, 4, 10, 2, 4, 10, 0, 6, 0, 0, 0, 4, 2, 2, 0, 4, 6, 4, 16, 4, ...
$ G1         <dbl> 5, 5, 7, 15, 6, 15, 12, 6, 16, 14, 10, 10, 14, 10, 14, 14, 13, ...
$ G2         <dbl> 6, 5, 8, 14, 10, 15, 12, 5, 18, 15, 8, 12, 14, 10, 16, 14, 14, ...
$ G3         <dbl> 6, 6, 10, 15, 10, 15, 11, 6, 19, 15, 9, 12, 14, 11, 16, 14, 14,...

s <- select(s, -c("G2","G1")) # remove other grades
fit <- lm(G3~. ,data = s) # fit the model
summary(fit)


Call:
lm(formula = G3 ~ ., data = s)

Residuals:
     Min       1Q   Median       3Q      Max 
-13.0442  -1.9028   0.4289   2.7570   8.8874 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)      14.07769    4.48089   3.142  0.00182 ** 
schoolMS          0.72555    0.79157   0.917  0.35997    
sexM              1.26236    0.50003   2.525  0.01202 *  
age              -0.37516    0.21721  -1.727  0.08501 .  
addressU          0.55135    0.58412   0.944  0.34586    
famsizeLE3        0.70281    0.48824   1.439  0.15090    
PstatusT         -0.32010    0.72390  -0.442  0.65862    
Medu              0.45687    0.32317   1.414  0.15833    
Fedu             -0.10458    0.27762  -0.377  0.70663    
Mjobhealth        0.99808    1.11819   0.893  0.37268    
Mjobother        -0.35900    0.71316  -0.503  0.61500    
Mjobservices      0.65832    0.79784   0.825  0.40985    
Mjobteacher      -1.24149    1.03821  -1.196  0.23257    
Fjobhealth        0.34767    1.43796   0.242  0.80909    
Fjobother        -0.61967    1.02304  -0.606  0.54509    
Fjobservices     -0.46577    1.05697  -0.441  0.65972    
Fjobteacher       1.32619    1.29654   1.023  0.30707    
reasonhome        0.07851    0.55380   0.142  0.88735    
reasonother       0.77707    0.81757   0.950  0.34252    
reasonreputation  0.61304    0.57657   1.063  0.28839    
guardianmother    0.06978    0.54560   0.128  0.89830    
guardianother     0.75010    0.99946   0.751  0.45345    
traveltime       -0.24027    0.33897  -0.709  0.47889    
studytime         0.54952    0.28765   1.910  0.05690 .  
failures         -1.72398    0.33291  -5.179 3.75e-07 ***
schoolsupyes     -1.35058    0.66693  -2.025  0.04361 *  
famsupyes        -0.86182    0.47869  -1.800  0.07265 .  
paidyes           0.33975    0.47775   0.711  0.47746    
activitiesyes    -0.32953    0.44494  -0.741  0.45942    
nurseryyes       -0.17730    0.54931  -0.323  0.74706    
higheryes         1.37045    1.07780   1.272  0.20437    
internetyes       0.49813    0.61956   0.804  0.42192    
romanticyes      -1.09449    0.46925  -2.332  0.02024 *  
famrel            0.23155    0.24593   0.942  0.34706    
freetime          0.30242    0.23735   1.274  0.20345    
goout            -0.59367    0.22451  -2.644  0.00855 ** 
Dalc             -0.27223    0.33087  -0.823  0.41120    
Walc              0.26339    0.24801   1.062  0.28896    
health           -0.17678    0.16101  -1.098  0.27297    
absences          0.05629    0.02897   1.943  0.05277 .  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 4.108 on 355 degrees of freedom
Multiple R-squared:  0.2756,    Adjusted R-squared:  0.196 
F-statistic: 3.463 on 39 and 355 DF,  p-value: 3.317e-10

x <- summary(fit)

cor(s$failures, as.numeric(as.factor(s$romantic))) # romantic relationships have some but less

[1] 0.09313704

What is the Relationsh of Grade and Period?

Set up a model that looks at the relationship between Period 2 grade and Period 1 grade. These are correlated.

# now fit the model for romantic relationships 
cor(s$G2, s$G1)

[1] 0.8521181

summary(lm(G2 ~ G1 ,data=s))


Call:
lm(formula = G2 ~ G1, data = s)

Residuals:
     Min       1Q   Median       3Q      Max 
-11.7676  -0.8363   0.1637   1.1637   4.1981 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.17957    0.34110   0.526    0.599    
G1           0.96567    0.02992  32.278   <2e-16 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.971 on 393 degrees of freedom
Multiple R-squared:  0.7261,    Adjusted R-squared:  0.7254 
F-statistic:  1042 on 1 and 393 DF,  p-value: < 2.2e-16

Adjustment Effect

It would appear from the summary above, that they indeed have a significant relationship. However, if we add another variable to adjust this first one, we can see that actually, failures in the subject already, have a much more significant relationship and adjust the co-efficient of the relationships variable from 0.96 to 0.94. This lessens the impact of this variable in the model.

# now fit the model including no of failures in the subject
summary(lm(G2 ~ G1 + failures, data = s))


Call:
lm(formula = G2 ~ G1 + failures, data = s)

Residuals:
     Min       1Q   Median       3Q      Max 
-11.2237  -0.8444   0.0966   1.0966   4.3633 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.55233    0.37999   1.454   0.1469    
G1           0.94101    0.03185  29.549   <2e-16 ***
failures    -0.31034    0.14214  -2.183   0.0296 *  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 1.962 on 392 degrees of freedom
Multiple R-squared:  0.7294,    Adjusted R-squared:  0.728 
F-statistic: 528.3 on 2 and 392 DF,  p-value: < 2.2e-16

LS0tDQp0aXRsZTogIkFkanVzdG1lbnQiDQpvdXRwdXQ6DQogIGh0bWxfbm90ZWJvb2s6IGRlZmF1bHQNCi0tLQ0KIyBVc2luZyBBZGp1c3RtZW50IGluIE11bHRpdmFyaWF0ZSBBbmFseXNpcw0KDQpBZGp1c3RtZW50IGlzIHRoZSBwcm9jZXNzIG9mIGFkZGluZyBhZGRpdGlvbiBvZiBhIHRoaXJkIHZhcmlhYmxlIHRvIGEgcmVncmVzc2lvbiBvZiBhIHBhaXIgb2YgdmFyaWFibGVzIHRvIHVuZGVyc3RhbmQgdGhlIGltcGFjdCBvbiB0aGUgbW9kZWxzLiBJZiB0aGUgZmlyc3QgdHdvIGFyZSBoaWdobHkgY29ycmVsYXRlZCwgYW5vdGhlciB2YXJpYWJsZSBtYXkgcmVkdWNlIHRoZSBpbXBhY3Qgb2YgdGhhdCBjb3JyZWxhdGlvbiBieSBhIGNoYW5nZSBpbiB0aGUgY29lZmZpY2llbnRzLg0KDQpGb3IgdGhpcyBleGVyY2lzZSB3ZSB1c2UgdGhlIFN0dWRlbnQgUGVyZm9ybWFuY2UgZGF0YXNldCBmcm9tIHRoZSBVQ0kgTWFjaGluZSBMZWFybmluZyBEYXRhc2V0IFJlcG9zaXRvcnkgKGh0dHBzOi8vYXJjaGl2ZS5pY3MudWNpLmVkdS9tbC9kYXRhc2V0cy9TdHVkZW50K1BlcmZvcm1hbmNlKQ0KDQpgYGB7cn0NCmxpYnJhcnkoZHBseXIpOyBsaWJyYXJ5KGNhcmV0KTsgbGlicmFyeShyZWFkcik7IGxpYnJhcnkocmVhZHhsKQ0KIyBEYXRhIFNldCBmcm9tIHRoZSBVQ0kgTWFjaGluZSBMZWFybmluZyBEYXRhc2V0IFJlcG9zaXRvcnkNCiMgaHR0cHM6Ly9hcmNoaXZlLmljcy51Y2kuZWR1L21sL2RhdGFzZXRzL1N0dWRlbnQrUGVyZm9ybWFuY2UNCnMgPC0gcmVhZF9leGNlbCgic3R1ZGVudC1tYXQueGxzeCIpDQpnbGltcHNlKHMpDQojIEczIGlzIHRoZSBmaW5hbCBncmFkZSB0aGF0IHdpbGwgYmUgdXNlZA0KIA0KYGBgDQpgYGB7cn0NCiMgcyA8LSBzZWxlY3QocywgLWMoIkcyIiwiRzEiKSkgIyByZW1vdmUgb3RoZXIgZ3JhZGVzDQpmaXQgPC0gbG0oRzN+LiAsZGF0YSA9IHMpICMgZml0IHRoZSBtb2RlbA0Kc3VtbWFyeShmaXQpDQp4IDwtIHN1bW1hcnkoZml0KQ0KYGBgDQoNCg0KYGBge3J9DQojIEdldCB0aGUgY29lZmZpY2llbnRzIGFuZCB2YXJpYWJsZSBpbXBvcnRhbmNlDQpjb2VmIDwtIGFzLmRhdGEuZnJhbWUoZml0JGNvZWZmaWNpZW50cykNCmNvZWYkdmFyIDwtIHJvd25hbWVzKGNvZWYpDQpuYW1lcyhjb2VmKSA8LSBjKCJjb2VmIiwidmFyIikNCmNvZWYgPC0gYXJyYW5nZShjb2VmLCBkZXNjKGNvZWYpKQ0KdmFycyA8LSBhcy5kYXRhLmZyYW1lKHZhckltcChmaXQpKQ0KdmFycyR2YXIgPC0gcm93bmFtZXModmFycykgDQpgYGANCiMjIyBXaGF0IGlzIHRoZSBSZWxhdGlvbnNoIG9mIEdyYWRlIGFuZCBQZXJpb2Q/DQpTZXQgdXAgYSBtb2RlbCB0aGF0IGxvb2tzIGF0IHRoZSByZWxhdGlvbnNoaXAgYmV0d2VlbiBQZXJpb2QgMiBncmFkZSBhbmQgUGVyaW9kIDEgZ3JhZGUuIFRoZXNlIGFyZSBjb3JyZWxhdGVkLg0KDQpgYGB7cn0NCiMgbm93IGZpdCB0aGUgbW9kZWwgZm9yIHJvbWFudGljIHJlbGF0aW9uc2hpcHMgDQojIGNvcihzJEcyLCBzJEcxKQ0KIyAwLjg1MjExODENCnN1bW1hcnkobG0oRzIgfiBHMSAsIGRhdGE9cykpDQpgYGANCg0KIyMjIEFkanVzdG1lbnQgRWZmZWN0DQoNCkl0IHdvdWxkIGFwcGVhciBmcm9tIHRoZSBzdW1tYXJ5IGFib3ZlLCB0aGF0IHRoZXkgaW5kZWVkIGhhdmUgYSBzaWduaWZpY2FudCByZWxhdGlvbnNoaXAuIEhvd2V2ZXIsIGlmIHdlIGFkZCBhbm90aGVyIHZhcmlhYmxlIHRvIGFkanVzdCB0aGlzIGZpcnN0IG9uZSwgd2UgY2FuIHNlZSB0aGF0IGFjdHVhbGx5LCBmYWlsdXJlcyBpbiB0aGUgc3ViamVjdCBhbHJlYWR5LCBoYXZlIGEgbXVjaCBtb3JlIHNpZ25pZmljYW50IHJlbGF0aW9uc2hpcCBhbmQgYWRqdXN0IHRoZSBjby1lZmZpY2llbnQgb2YgdGhlIHJlbGF0aW9uc2hpcHMgdmFyaWFibGUgZnJvbSAwLjk2IHRvIDAuOTQuIFRoaXMgbGVzc2VucyB0aGUgaW1wYWN0IG9mIHRoaXMgdmFyaWFibGUgaW4gdGhlIG1vZGVsLiAgIA0KDQpgYGB7cn0NCiMgbm93IGZpdCB0aGUgbW9kZWwgaW5jbHVkaW5nIG5vIG9mIGZhaWx1cmVzIGluIHRoZSBzdWJqZWN0DQoNCnN1bW1hcnkobG0oRzIgfiBHMSArIGZhaWx1cmVzLCBkYXRhID0gcykpIA0KYGBgDQoNCg==