library(dplyr)

Section-1 Creating Our First Model

§ 1.1 what’s the model R2?

model1 = lm(Temp ~ MEI + CO2 + CH4 + N2O+ CFC11 + CFC12 + TSI + Aerosols, data = trainingset)
summary(model1)

Call:
lm(formula = Temp ~ MEI + CO2 + CH4 + N2O + CFC11 + CFC12 + TSI + 
    Aerosols, data = trainingset)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.25888 -0.05913 -0.00082  0.05649  0.32433 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.246e+02  1.989e+01  -6.265 1.43e-09 ***
MEI          6.421e-02  6.470e-03   9.923  < 2e-16 ***
CO2          6.457e-03  2.285e-03   2.826  0.00505 ** 
CH4          1.240e-04  5.158e-04   0.240  0.81015    
N2O         -1.653e-02  8.565e-03  -1.930  0.05467 .  
CFC11       -6.631e-03  1.626e-03  -4.078 5.96e-05 ***
CFC12        3.808e-03  1.014e-03   3.757  0.00021 ***
TSI          9.314e-02  1.475e-02   6.313 1.10e-09 ***
Aerosols    -1.538e+00  2.133e-01  -7.210 5.41e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09171 on 275 degrees of freedom
Multiple R-squared:  0.7509,    Adjusted R-squared:  0.7436 
F-statistic: 103.6 on 8 and 275 DF,  p-value: < 2.2e-16
print(0.7509)
[1] 0.7509

§ 1.2 Which variables are significant in the model?

summary(model1)

Call:
lm(formula = Temp ~ MEI + CO2 + CH4 + N2O + CFC11 + CFC12 + TSI + 
    Aerosols, data = trainingset)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.25888 -0.05913 -0.00082  0.05649  0.32433 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.246e+02  1.989e+01  -6.265 1.43e-09 ***
MEI          6.421e-02  6.470e-03   9.923  < 2e-16 ***
CO2          6.457e-03  2.285e-03   2.826  0.00505 ** 
CH4          1.240e-04  5.158e-04   0.240  0.81015    
N2O         -1.653e-02  8.565e-03  -1.930  0.05467 .  
CFC11       -6.631e-03  1.626e-03  -4.078 5.96e-05 ***
CFC12        3.808e-03  1.014e-03   3.757  0.00021 ***
TSI          9.314e-02  1.475e-02   6.313 1.10e-09 ***
Aerosols    -1.538e+00  2.133e-01  -7.210 5.41e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09171 on 275 degrees of freedom
Multiple R-squared:  0.7509,    Adjusted R-squared:  0.7436 
F-statistic: 103.6 on 8 and 275 DF,  p-value: < 2.2e-16
print(c("MEI", "CO2", "CFC11", "CFC12", "TSI", "Aerosols"))
[1] "MEI"      "CO2"      "CFC11"    "CFC12"    "TSI"      "Aerosols"

Section-2 Understanding the Model

§ 2.1 Which of the following is the simplest correct explanation for this contradiction?

print("All of the gas concentration variables reflect human development - N2O and CFC.11 are correlated with other variables in the data set.")
[1] "All of the gas concentration variables reflect human development - N2O and CFC.11 are correlated with other variables in the data set."

§ 2.2 Compute the correlations between all the variables in the training set. Which of the following independent variables is N2O highly correlated with (absolute correlation greater than 0.7)? Select all that apply.

cor(trainingset)
                Year         Month           MEI         CO2         CH4         N2O
Year      1.00000000 -0.0279419602 -0.0369876842  0.98274939  0.91565945  0.99384523
Month    -0.02794196  1.0000000000  0.0008846905 -0.10673246  0.01856866  0.01363153
MEI      -0.03698768  0.0008846905  1.0000000000 -0.04114717 -0.03341930 -0.05081978
CO2       0.98274939 -0.1067324607 -0.0411471651  1.00000000  0.87727963  0.97671982
CH4       0.91565945  0.0185686624 -0.0334193014  0.87727963  1.00000000  0.89983864
N2O       0.99384523  0.0136315303 -0.0508197755  0.97671982  0.89983864  1.00000000
CFC-11    0.56910643 -0.0131112236  0.0690004387  0.51405975  0.77990402  0.52247732
CFC-12    0.89701166  0.0006751102  0.0082855443  0.85268963  0.96361625  0.86793078
TSI       0.17030201 -0.0346061935 -0.1544919227  0.17742893  0.24552844  0.19975668
Aerosols -0.34524670  0.0148895406  0.3402377871 -0.35615480 -0.26780919 -0.33705457
Temp      0.78679714 -0.0998567411  0.1724707512  0.78852921  0.70325502  0.77863893
CFC11     0.56910643 -0.0131112236  0.0690004387  0.51405975  0.77990402  0.52247732
CFC12     0.89701166  0.0006751102  0.0082855443  0.85268963  0.96361625  0.86793078
              CFC-11        CFC-12         TSI    Aerosols        Temp       CFC11
Year      0.56910643  0.8970116635  0.17030201 -0.34524670  0.78679714  0.56910643
Month    -0.01311122  0.0006751102 -0.03460619  0.01488954 -0.09985674 -0.01311122
MEI       0.06900044  0.0082855443 -0.15449192  0.34023779  0.17247075  0.06900044
CO2       0.51405975  0.8526896272  0.17742893 -0.35615480  0.78852921  0.51405975
CH4       0.77990402  0.9636162478  0.24552844 -0.26780919  0.70325502  0.77990402
N2O       0.52247732  0.8679307757  0.19975668 -0.33705457  0.77863893  0.52247732
CFC-11    1.00000000  0.8689851828  0.27204596 -0.04392120  0.40771029  1.00000000
CFC-12    0.86898518  1.0000000000  0.25530281 -0.22513124  0.68755755  0.86898518
TSI       0.27204596  0.2553028138  1.00000000  0.05211651  0.24338269  0.27204596
Aerosols -0.04392120 -0.2251312440  0.05211651  1.00000000 -0.38491375 -0.04392120
Temp      0.40771029  0.6875575483  0.24338269 -0.38491375  1.00000000  0.40771029
CFC11     1.00000000  0.8689851828  0.27204596 -0.04392120  0.40771029  1.00000000
CFC12     0.86898518  1.0000000000  0.25530281 -0.22513124  0.68755755  0.86898518
                 CFC12
Year      0.8970116635
Month     0.0006751102
MEI       0.0082855443
CO2       0.8526896272
CH4       0.9636162478
N2O       0.8679307757
CFC-11    0.8689851828
CFC-12    1.0000000000
TSI       0.2553028138
Aerosols -0.2251312440
Temp      0.6875575483
CFC11     0.8689851828
CFC12     1.0000000000
print(c("CO2", "CH4", "CFC-12"))
[1] "CO2"    "CH4"    "CFC-12"

Which of the following independent variables is CFC.11 highly correlated with? Select all that apply?

cor(trainingset)
                Year         Month           MEI         CO2         CH4         N2O
Year      1.00000000 -0.0279419602 -0.0369876842  0.98274939  0.91565945  0.99384523
Month    -0.02794196  1.0000000000  0.0008846905 -0.10673246  0.01856866  0.01363153
MEI      -0.03698768  0.0008846905  1.0000000000 -0.04114717 -0.03341930 -0.05081978
CO2       0.98274939 -0.1067324607 -0.0411471651  1.00000000  0.87727963  0.97671982
CH4       0.91565945  0.0185686624 -0.0334193014  0.87727963  1.00000000  0.89983864
N2O       0.99384523  0.0136315303 -0.0508197755  0.97671982  0.89983864  1.00000000
CFC-11    0.56910643 -0.0131112236  0.0690004387  0.51405975  0.77990402  0.52247732
CFC-12    0.89701166  0.0006751102  0.0082855443  0.85268963  0.96361625  0.86793078
TSI       0.17030201 -0.0346061935 -0.1544919227  0.17742893  0.24552844  0.19975668
Aerosols -0.34524670  0.0148895406  0.3402377871 -0.35615480 -0.26780919 -0.33705457
Temp      0.78679714 -0.0998567411  0.1724707512  0.78852921  0.70325502  0.77863893
CFC11     0.56910643 -0.0131112236  0.0690004387  0.51405975  0.77990402  0.52247732
CFC12     0.89701166  0.0006751102  0.0082855443  0.85268963  0.96361625  0.86793078
              CFC-11        CFC-12         TSI    Aerosols        Temp       CFC11
Year      0.56910643  0.8970116635  0.17030201 -0.34524670  0.78679714  0.56910643
Month    -0.01311122  0.0006751102 -0.03460619  0.01488954 -0.09985674 -0.01311122
MEI       0.06900044  0.0082855443 -0.15449192  0.34023779  0.17247075  0.06900044
CO2       0.51405975  0.8526896272  0.17742893 -0.35615480  0.78852921  0.51405975
CH4       0.77990402  0.9636162478  0.24552844 -0.26780919  0.70325502  0.77990402
N2O       0.52247732  0.8679307757  0.19975668 -0.33705457  0.77863893  0.52247732
CFC-11    1.00000000  0.8689851828  0.27204596 -0.04392120  0.40771029  1.00000000
CFC-12    0.86898518  1.0000000000  0.25530281 -0.22513124  0.68755755  0.86898518
TSI       0.27204596  0.2553028138  1.00000000  0.05211651  0.24338269  0.27204596
Aerosols -0.04392120 -0.2251312440  0.05211651  1.00000000 -0.38491375 -0.04392120
Temp      0.40771029  0.6875575483  0.24338269 -0.38491375  1.00000000  0.40771029
CFC11     1.00000000  0.8689851828  0.27204596 -0.04392120  0.40771029  1.00000000
CFC12     0.86898518  1.0000000000  0.25530281 -0.22513124  0.68755755  0.86898518
                 CFC12
Year      0.8970116635
Month     0.0006751102
MEI       0.0082855443
CO2       0.8526896272
CH4       0.9636162478
N2O       0.8679307757
CFC-11    0.8689851828
CFC-12    1.0000000000
TSI       0.2553028138
Aerosols -0.2251312440
Temp      0.6875575483
CFC11     0.8689851828
CFC12     1.0000000000
print(c("CH4", "CFC-12"))
[1] "CH4"    "CFC-12"

Section-3 Simplifying the Model

§ 3.1 build a model with only MEI, TSI, Aerosols and N2O as independent variables. Remember to use the training set to build the model.

Enter the coefficient of N2O in this reduced model

model2 = lm(Temp ~ MEI + N2O + TSI + Aerosols, data = trainingset)
summary(model2)

Call:
lm(formula = Temp ~ MEI + N2O + TSI + Aerosols, data = trainingset)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.27916 -0.05975 -0.00595  0.05672  0.34195 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.162e+02  2.022e+01  -5.747 2.37e-08 ***
MEI          6.419e-02  6.652e-03   9.649  < 2e-16 ***
N2O          2.532e-02  1.311e-03  19.307  < 2e-16 ***
TSI          7.949e-02  1.487e-02   5.344 1.89e-07 ***
Aerosols    -1.702e+00  2.180e-01  -7.806 1.19e-13 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09547 on 279 degrees of freedom
Multiple R-squared:  0.7261,    Adjusted R-squared:  0.7222 
F-statistic: 184.9 on 4 and 279 DF,  p-value: < 2.2e-16
print("coefficient = 0.02532")
[1] "coefficient = 0.02532"
print(" R2 = 0.7261")
[1] " R2 = 0.7261"

Section4 Automatically Building the Model

§ 4.1 Enter the R2 value of the model produced by the step function

lmodel = step(model1)
Start:  AIC=-1348.16
Temp ~ MEI + CO2 + CH4 + N2O + CFC11 + CFC12 + TSI + Aerosols

           Df Sum of Sq    RSS     AIC
- CH4       1   0.00049 2.3135 -1350.1
<none>                  2.3130 -1348.2
- N2O       1   0.03132 2.3443 -1346.3
- CO2       1   0.06719 2.3802 -1342.0
- CFC12     1   0.11874 2.4318 -1335.9
- CFC11     1   0.13986 2.4529 -1333.5
- TSI       1   0.33516 2.6482 -1311.7
- Aerosols  1   0.43727 2.7503 -1301.0
- MEI       1   0.82823 3.1412 -1263.2

Step:  AIC=-1350.1
Temp ~ MEI + CO2 + N2O + CFC11 + CFC12 + TSI + Aerosols

           Df Sum of Sq    RSS     AIC
<none>                  2.3135 -1350.1
- N2O       1   0.03133 2.3448 -1348.3
- CO2       1   0.06672 2.3802 -1344.0
- CFC12     1   0.13023 2.4437 -1336.5
- CFC11     1   0.13938 2.4529 -1335.5
- TSI       1   0.33500 2.6485 -1313.7
- Aerosols  1   0.43987 2.7534 -1302.7
- MEI       1   0.83118 3.1447 -1264.9
summary(lmodel)

Call:
lm(formula = Temp ~ MEI + CO2 + N2O + CFC11 + CFC12 + TSI + Aerosols, 
    data = trainingset)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.25770 -0.05994 -0.00104  0.05588  0.32203 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept) -1.245e+02  1.985e+01  -6.273 1.37e-09 ***
MEI          6.407e-02  6.434e-03   9.958  < 2e-16 ***
CO2          6.402e-03  2.269e-03   2.821 0.005129 ** 
N2O         -1.602e-02  8.287e-03  -1.933 0.054234 .  
CFC11       -6.609e-03  1.621e-03  -4.078 5.95e-05 ***
CFC12        3.868e-03  9.812e-04   3.942 0.000103 ***
TSI          9.312e-02  1.473e-02   6.322 1.04e-09 ***
Aerosols    -1.540e+00  2.126e-01  -7.244 4.36e-12 ***
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

Residual standard error: 0.09155 on 276 degrees of freedom
Multiple R-squared:  0.7508,    Adjusted R-squared:  0.7445 
F-statistic: 118.8 on 7 and 276 DF,  p-value: < 2.2e-16
print("R2 = 0.7508")
[1] "R2 = 0.7508"
print("ch4")
[1] "ch4"

Section-5 Testing on Unseen Data

Using the model produced from the step function, calculate temperature predictions for the testing data set, using the predict function.

Enter the testing set R2:

print(R2)
[1] 0.6286051
LS0tDQp0aXRsZTogIkFTMi0xIENsaW1hdGUgQ2hhbmdlIg0KYXV0aG9yOiAi6Zmz5q2j6KyAIGxvdWlzYW4xMjMgMjAxOC8wNy8wOSINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCmBgYHtyfQ0KbGlicmFyeShkcGx5cikNCmBgYA0KDQojIyMjICBTZWN0aW9uLTEgQ3JlYXRpbmcgT3VyIEZpcnN0IE1vZGVsDQoNCl9fwqcgMS4xX18gd2hhdCdzIHRoZSBtb2RlbCBSMj8NCmBgYHtyfQ0KdHJhaW5pbmdzZXQgPSBzdWJzZXQoY2xpbWF0ZV9jaGFuZ2UsIFllYXIgPD0gMjAwNikNCm1vZGVsMSA9IGxtKFRlbXAgfiBNRUkgKyBDTzIgKyBDSDQgKyBOMk8rIENGQzExICsgQ0ZDMTIgKyBUU0kgKyBBZXJvc29scywgZGF0YSA9IHRyYWluaW5nc2V0KQ0Kc3VtbWFyeShtb2RlbDEpDQpwcmludCgwLjc1MDkpDQpgYGANCg0KX1/CpyAxLjJfXyBXaGljaCB2YXJpYWJsZXMgYXJlIHNpZ25pZmljYW50IGluIHRoZSBtb2RlbD8gDQpgYGB7cn0NCnN1bW1hcnkobW9kZWwxKQ0KcHJpbnQoYygiTUVJIiwgIkNPMiIsICJDRkMxMSIsICJDRkMxMiIsICJUU0kiLCAiQWVyb3NvbHMiKSkNCmBgYA0KDQoNCiMjIyMgIFNlY3Rpb24tMiBVbmRlcnN0YW5kaW5nIHRoZSBNb2RlbA0KDQpfX8KnIDIuMV9fIFdoaWNoIG9mIHRoZSBmb2xsb3dpbmcgaXMgdGhlIHNpbXBsZXN0IGNvcnJlY3QgZXhwbGFuYXRpb24gZm9yIHRoaXMgY29udHJhZGljdGlvbj8NCmBgYHtyfQ0KcHJpbnQoIkFsbCBvZiB0aGUgZ2FzIGNvbmNlbnRyYXRpb24gdmFyaWFibGVzIHJlZmxlY3QgaHVtYW4gZGV2ZWxvcG1lbnQgLSBOMk8gYW5kIENGQy4xMSBhcmUgY29ycmVsYXRlZCB3aXRoIG90aGVyIHZhcmlhYmxlcyBpbiB0aGUgZGF0YSBzZXQuIikNCmBgYA0KDQpfX8KnIDIuMl9fIENvbXB1dGUgdGhlIGNvcnJlbGF0aW9ucyBiZXR3ZWVuIGFsbCB0aGUgdmFyaWFibGVzIGluIHRoZSB0cmFpbmluZyBzZXQuIFdoaWNoIG9mIHRoZSBmb2xsb3dpbmcgaW5kZXBlbmRlbnQgdmFyaWFibGVzIGlzIE4yTyBoaWdobHkgY29ycmVsYXRlZCB3aXRoIChhYnNvbHV0ZSBjb3JyZWxhdGlvbiBncmVhdGVyIHRoYW4gMC43KT8gU2VsZWN0IGFsbCB0aGF0IGFwcGx5Lg0KYGBge3J9DQpjb3IodHJhaW5pbmdzZXQpDQpwcmludChjKCJDTzIiLCAiQ0g0IiwgIkNGQy0xMiIpKQ0KYGBgDQoNCldoaWNoIG9mIHRoZSBmb2xsb3dpbmcgaW5kZXBlbmRlbnQgdmFyaWFibGVzIGlzIENGQy4xMSBoaWdobHkgY29ycmVsYXRlZCB3aXRoPyBTZWxlY3QgYWxsIHRoYXQgYXBwbHk/DQpgYGB7cn0NCmNvcih0cmFpbmluZ3NldCkNCnByaW50KGMoIkNINCIsICJDRkMtMTIiKSkNCmBgYA0KDQoNCiMjIyMgIFNlY3Rpb24tMyBTaW1wbGlmeWluZyB0aGUgTW9kZWwNCg0KX1/CpyAzLjFfXyBidWlsZCBhIG1vZGVsIHdpdGggb25seSBNRUksIFRTSSwgQWVyb3NvbHMgYW5kIE4yTyBhcyBpbmRlcGVuZGVudCB2YXJpYWJsZXMuIFJlbWVtYmVyIHRvIHVzZSB0aGUgdHJhaW5pbmcgc2V0IHRvIGJ1aWxkIHRoZSBtb2RlbC4NCg0KRW50ZXIgdGhlIGNvZWZmaWNpZW50IG9mIE4yTyBpbiB0aGlzIHJlZHVjZWQgbW9kZWwNCmBgYHtyfQ0KbW9kZWwyID0gbG0oVGVtcCB+IE1FSSArIE4yTyArIFRTSSArIEFlcm9zb2xzLCBkYXRhID0gdHJhaW5pbmdzZXQpDQpzdW1tYXJ5KG1vZGVsMikNCnByaW50KCJjb2VmZmljaWVudCA9IDAuMDI1MzIiKQ0KcHJpbnQoIiBSMiA9IDAuNzI2MSIpDQpgYGANCg0KIyMjIyBTZWN0aW9uNCAgQXV0b21hdGljYWxseSBCdWlsZGluZyB0aGUgTW9kZWwNCg0KX1/CpyA0LjFfXyBFbnRlciB0aGUgUjIgdmFsdWUgb2YgdGhlIG1vZGVsIHByb2R1Y2VkIGJ5IHRoZSBzdGVwIGZ1bmN0aW9uDQpgYGB7cn0NCmxtb2RlbCA9IHN0ZXAobW9kZWwxKQ0Kc3VtbWFyeShsbW9kZWwpDQpwcmludCgiUjIgPSAwLjc1MDgiKQ0KcHJpbnQoImNoNCIpDQpgYGANCg0KIyMjIyAgU2VjdGlvbi01IFRlc3Rpbmcgb24gVW5zZWVuIERhdGENCg0KVXNpbmcgdGhlIG1vZGVsIHByb2R1Y2VkIGZyb20gdGhlIHN0ZXAgZnVuY3Rpb24sIGNhbGN1bGF0ZSB0ZW1wZXJhdHVyZSBwcmVkaWN0aW9ucyBmb3IgdGhlIHRlc3RpbmcgZGF0YSBzZXQsIHVzaW5nIHRoZSBwcmVkaWN0IGZ1bmN0aW9uLg0KDQpFbnRlciB0aGUgdGVzdGluZyBzZXQgUjI6DQpgYGB7cn0NCnByZWRpY3Rtb2RlbCA9IHByZWRpY3QobG1vZGVsLCBuZXdkYXRhID0gdGVzdGluZ3NldCkNClNTRSA9IHN1bSgodGVzdGluZ3NldCRUZW1wIC0gcHJlZGljdG1vZGVsKV4yKQ0KU1NUID0gc3VtKCh0ZXN0aW5nc2V0JFRlbXAgLSBtZWFuKHRyYWluaW5nc2V0JFRlbXApKV4yKQ0KUjIgPSAxIC0gU1NFL1NTVA0KcHJpbnQoUjIpDQpgYGANCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQo=