library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ purrr   0.2.4
## ✔ tibble  1.3.4     ✔ dplyr   0.7.4
## ✔ tidyr   0.7.2     ✔ stringr 1.2.0
## ✔ readr   1.1.1     ✔ forcats 0.2.0
## ── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library("dplyr", warn.conflicts = FALSE)

Load car for the VIF functionality

if (!require(car)) {
    install.packages("car")
    require(car)
}
if (!require(ggplot2)) {
    install.packages("ggplot2")
    require(ggplot2)
}

Load this for visualizing correlations

if (!require(GGally)) {
    install.packages("GGally")
    require(GGally)
}

Load MASS for stepwise regression

if (!require(MASS)) {
    install.packages("MASS")
    require(MASS)
}

Load olss for stepwise regression

if (!require(olsrr)) {
    install.packages("olsrr")
    require(olsrr)
}

Part A (35 marks)

Read data for regression

mydf <- read_csv("buildingenergy.csv")
## Parsed with column specification:
## cols(
##   RC = col_double(),
##   SA = col_double(),
##   WA = col_double(),
##   RA = col_double(),
##   OH = col_double(),
##   OR = col_integer(),
##   GA = col_double(),
##   GD = col_integer(),
##   HL = col_double(),
##   CL = col_double()
## )
head(mydf)
## # A tibble: 6 x 10
##      RC    SA    WA     RA    OH    OR    GA    GD    HL    CL
##   <dbl> <dbl> <dbl>  <dbl> <dbl> <int> <dbl> <int> <dbl> <dbl>
## 1  0.98 514.5 294.0 110.25     7     2     0     0 15.55 21.33
## 2  0.98 514.5 294.0 110.25     7     3     0     0 15.55 21.33
## 3  0.98 514.5 294.0 110.25     7     4     0     0 15.55 21.33
## 4  0.98 514.5 294.0 110.25     7     5     0     0 15.55 21.33
## 5  0.90 563.5 318.5 122.50     7     2     0     0 20.84 28.28
## 6  0.90 563.5 318.5 122.50     7     3     0     0 21.46 25.38

Linear regression basics

We start by setting up the model and estimating it. This assumes that the data are clean and that there are no other issues with the data.

regModel1 <- lm(HL ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
regModel2 <- lm(CL ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)

Summary of regression model

  • What are residuals
  • Make sure you understand the section on coefficients and how to interpret them
  • How do we interpret the stars?
  • What is residual standard error?
  • What is the difference between R2 and Adjusted-R2?
  • What does the p-value at the end tell us?
summary(regModel1)
## 
## Call:
## lm(formula = HL ~ RC + SA + WA + RA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8965 -1.3196 -0.0252  1.3532  7.7052 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  84.014521  19.033607   4.414 1.16e-05 ***
## RC          -64.773991  10.289445  -6.295 5.19e-10 ***
## SA           -0.087290   0.017075  -5.112 4.04e-07 ***
## WA            0.060813   0.006648   9.148  < 2e-16 ***
## RA                  NA         NA      NA       NA    
## OH            4.169939   0.337990  12.337  < 2e-16 ***
## OR           -0.023328   0.094705  -0.246  0.80550    
## GA           19.932680   0.813986  24.488  < 2e-16 ***
## GD            0.203772   0.069918   2.914  0.00367 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.934 on 760 degrees of freedom
## Multiple R-squared:  0.9162, Adjusted R-squared:  0.9154 
## F-statistic:  1187 on 7 and 760 DF,  p-value: < 2.2e-16
summary(regModel2)
## 
## Call:
## lm(formula = CL ~ RC + SA + WA + RA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6940 -1.5606 -0.2668  1.3968 11.1775 
## 
## Coefficients: (1 not defined because of singularities)
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  97.245749  20.764711   4.683 3.34e-06 ***
## RC          -70.787707  11.225269  -6.306 4.85e-10 ***
## SA           -0.088245   0.018628  -4.737 2.59e-06 ***
## WA            0.044682   0.007253   6.161 1.17e-09 ***
## RA                  NA         NA      NA       NA    
## OH            4.283843   0.368730  11.618  < 2e-16 ***
## OR            0.121510   0.103318   1.176    0.240    
## GA           14.717068   0.888018  16.573  < 2e-16 ***
## GD            0.040697   0.076277   0.534    0.594    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.201 on 760 degrees of freedom
## Multiple R-squared:  0.8878, Adjusted R-squared:  0.8868 
## F-statistic: 859.1 on 7 and 760 DF,  p-value: < 2.2e-16

Regression diagnostics

plot(regModel1)

plot(regModel2)

Remove the variable RA

regModel1 <- lm(HL ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
regModel2 <- lm(CL ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
summary(regModel1)
## 
## Call:
## lm(formula = HL ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.8965 -1.3196 -0.0252  1.3532  7.7052 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  84.014521  19.033607   4.414 1.16e-05 ***
## RC          -64.773991  10.289445  -6.295 5.19e-10 ***
## SA           -0.087290   0.017075  -5.112 4.04e-07 ***
## WA            0.060813   0.006648   9.148  < 2e-16 ***
## OH            4.169939   0.337990  12.337  < 2e-16 ***
## OR           -0.023328   0.094705  -0.246  0.80550    
## GA           19.932680   0.813986  24.488  < 2e-16 ***
## GD            0.203772   0.069918   2.914  0.00367 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.934 on 760 degrees of freedom
## Multiple R-squared:  0.9162, Adjusted R-squared:  0.9154 
## F-statistic:  1187 on 7 and 760 DF,  p-value: < 2.2e-16
summary(regModel2)
## 
## Call:
## lm(formula = CL ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.6940 -1.5606 -0.2668  1.3968 11.1775 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  97.245749  20.764711   4.683 3.34e-06 ***
## RC          -70.787707  11.225269  -6.306 4.85e-10 ***
## SA           -0.088245   0.018628  -4.737 2.59e-06 ***
## WA            0.044682   0.007253   6.161 1.17e-09 ***
## OH            4.283843   0.368730  11.618  < 2e-16 ***
## OR            0.121510   0.103318   1.176    0.240    
## GA           14.717068   0.888018  16.573  < 2e-16 ***
## GD            0.040697   0.076277   0.534    0.594    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.201 on 760 degrees of freedom
## Multiple R-squared:  0.8878, Adjusted R-squared:  0.8868 
## F-statistic: 859.1 on 7 and 760 DF,  p-value: < 2.2e-16

Regression diagnostics after romoving the variable RA

plot(regModel1)

plot(regModel2)

By the way, how is HL and CL distributed?

hist(mydf$HL)

hist(mydf$CL)

It makes sense to use the log transform as the dependent variable

Heating Load

regModel1 <- lm(log(HL) ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
summary(regModel1)
## 
## Call:
## lm(formula = log(HL) ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35472 -0.05718  0.00176  0.06585  0.31204 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.5787874  0.7806251  -0.741   0.4587    
## RC           0.5623864  0.4220009   1.333   0.1830    
## SA           0.0014251  0.0007003   2.035   0.0422 *  
## WA           0.0015991  0.0002727   5.865 6.70e-09 ***
## OH           0.2665273  0.0138620  19.227  < 2e-16 ***
## OR          -0.0008389  0.0038841  -0.216   0.8290    
## GA           1.0117131  0.0333840  30.305  < 2e-16 ***
## GD           0.0159934  0.0028675   5.577 3.39e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1203 on 760 degrees of freedom
## Multiple R-squared:  0.9368, Adjusted R-squared:  0.9362 
## F-statistic:  1610 on 7 and 760 DF,  p-value: < 2.2e-16
plot(regModel1)

Cooling Load

regModel2 <- lm(log(CL) ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
summary(regModel2)
## 
## Call:
## lm(formula = log(CL) ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28014 -0.06793 -0.00703  0.04112  0.32881 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.0597218  0.7272638   4.207 2.89e-05 ***
## RC          -1.1278100  0.3931541  -2.869  0.00424 ** 
## SA          -0.0011130  0.0006524  -1.706  0.08842 .  
## WA           0.0013456  0.0002540   5.298 1.54e-07 ***
## OH           0.2046199  0.0129144  15.844  < 2e-16 ***
## OR           0.0045359  0.0036186   1.254  0.21041    
## GA           0.6341835  0.0311020  20.390  < 2e-16 ***
## GD           0.0030144  0.0026715   1.128  0.25953    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1121 on 760 degrees of freedom
## Multiple R-squared:  0.9192, Adjusted R-squared:  0.9185 
## F-statistic:  1236 on 7 and 760 DF,  p-value: < 2.2e-16
plot(regModel2)

Variance inflation factor

  • The variance inflation factor (VIF) provides a measure of how much the variance is inflated.
  • Variance of what?
  • We know that the standard errors (and, therefore the variances) of the estimated regression coefficients are inflated when multicollinearity exists.
  • So, the VIF for the estimated coefficient bk (denoted VIFk) is the factor by which the variance is inflated.
vif(regModel1)
##         RC         SA         WA         OH         OR         GA 
## 105.524054 201.531134   7.492984  31.205474   1.000000   1.047508 
##         GD 
##   1.047508

Visualize the correlations

library(GGally)
X <- subset(mydf,select=-c(HL, CL))
ggpairs(X)

We will use the VIF scores to delete the variable with the highest VIF — SA

Heating Load

HL1 <- lm(log(HL) ~ RC+WA+OH+OR+GA+GD,data = mydf)
summary(HL1)
## 
## Call:
## lm(formula = log(HL) ~ RC + WA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35568 -0.05824  0.00023  0.05892  0.31448 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.9940620  0.1095921   9.071  < 2e-16 ***
## RC          -0.2578535  0.1252015  -2.060   0.0398 *  
## WA           0.0020209  0.0001775  11.387  < 2e-16 ***
## OH           0.2430700  0.0077149  31.507  < 2e-16 ***
## OR          -0.0008389  0.0038921  -0.216   0.8294    
## GA           1.0117131  0.0334528  30.243  < 2e-16 ***
## GD           0.0159934  0.0028734   5.566 3.61e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1206 on 761 degrees of freedom
## Multiple R-squared:  0.9365, Adjusted R-squared:  0.936 
## F-statistic:  1870 on 6 and 761 DF,  p-value: < 2.2e-16
plot(HL1)

Cooling Load

CL1 <- lm(log(CL) ~ RC+WA+OH+OR+GA+GD,data = mydf)
summary(CL1)
## 
## Call:
## lm(formula = log(CL) ~ RC + WA + OH + OR + GA + GD, data = mydf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28207 -0.06701 -0.00228  0.04210  0.32691 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.8312778  0.1020184  17.950  < 2e-16 ***
## RC          -0.4871773  0.1165491  -4.180 3.25e-05 ***
## WA           0.0010162  0.0001652   6.151 1.25e-09 ***
## OH           0.2229408  0.0071817  31.043  < 2e-16 ***
## OR           0.0045359  0.0036232   1.252    0.211    
## GA           0.6341835  0.0311410  20.365  < 2e-16 ***
## GD           0.0030144  0.0026749   1.127    0.260    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1123 on 761 degrees of freedom
## Multiple R-squared:  0.9189, Adjusted R-squared:  0.9183 
## F-statistic:  1438 on 6 and 761 DF,  p-value: < 2.2e-16
plot(CL1)

Check out the VIFs

vif(HL1)
##       RC       WA       OH       OR       GA       GD 
## 9.250283 3.161934 9.626103 1.000000 1.047508 1.047508
vif(CL1)
##       RC       WA       OH       OR       GA       GD 
## 9.250283 3.161934 9.626103 1.000000 1.047508 1.047508

Alternate approach (stepwise regression)

library(MASS)
fitstepHL <- lm(log(HL) ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
step <- stepAIC(fitstepHL, direction="both")
## Start:  AIC=-3244.36
## log(HL) ~ RC + SA + WA + RA + OH + OR + GA + GD
## 
## 
## Step:  AIC=-3244.36
## log(HL) ~ RC + SA + WA + OH + OR + GA + GD
## 
##        Df Sum of Sq    RSS     AIC
## - OR    1    0.0007 11.008 -3246.3
## - RC    1    0.0257 11.033 -3244.6
## <none>              11.007 -3244.4
## - SA    1    0.0600 11.067 -3242.2
## - GD    1    0.4505 11.458 -3215.5
## - WA    1    0.4982 11.505 -3212.4
## - OH    1    5.3541 16.361 -2941.9
## - GA    1   13.3013 24.308 -2637.9
## 
## Step:  AIC=-3246.31
## log(HL) ~ RC + SA + WA + OH + GA + GD
## 
##        Df Sum of Sq    RSS     AIC
## - RC    1    0.0257 11.033 -3246.5
## <none>              11.008 -3246.3
## + OR    1    0.0007 11.007 -3244.4
## - SA    1    0.0600 11.068 -3244.1
## - GD    1    0.4505 11.458 -3217.5
## - WA    1    0.4982 11.506 -3214.3
## - OH    1    5.3541 16.362 -2943.9
## - GA    1   13.3013 24.309 -2639.9
## 
## Step:  AIC=-3246.52
## log(HL) ~ SA + WA + OH + GA + GD
## 
##        Df Sum of Sq    RSS     AIC
## <none>              11.033 -3246.5
## + RC    1    0.0257 11.008 -3246.3
## + OR    1    0.0007 11.033 -3244.6
## - SA    1    0.0959 11.129 -3241.9
## - GD    1    0.4505 11.484 -3217.8
## - WA    1    0.9421 11.976 -3185.6
## - OH    1    8.2720 19.305 -2818.9
## - GA    1   13.3013 24.335 -2641.0
step$anova # display results
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## log(HL) ~ RC + SA + WA + RA + OH + OR + GA + GD
## 
## Final Model:
## log(HL) ~ SA + WA + OH + GA + GD
## 
## 
##   Step Df     Deviance Resid. Df Resid. Dev       AIC
## 1                            760   11.00704 -3244.356
## 2 - RA  0 0.0000000000       760   11.00704 -3244.356
## 3 - OR  1 0.0006756823       761   11.00772 -3246.308
## 4 - RC  1 0.0257217145       762   11.03344 -3246.516
fitstepCL <- lm(log(CL) ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
step <- stepAIC(fitstepCL, direction="both")
## Start:  AIC=-3353.11
## log(CL) ~ RC + SA + WA + RA + OH + OR + GA + GD
## 
## 
## Step:  AIC=-3353.11
## log(CL) ~ RC + SA + WA + OH + OR + GA + GD
## 
##        Df Sum of Sq     RSS     AIC
## - GD    1    0.0160  9.5697 -3353.8
## - OR    1    0.0198  9.5734 -3353.5
## <none>               9.5537 -3353.1
## - SA    1    0.0366  9.5902 -3352.2
## - RC    1    0.1034  9.6571 -3346.8
## - WA    1    0.3528  9.9064 -3327.3
## - OH    1    3.1557 12.7094 -3135.9
## - GA    1    5.2265 14.7801 -3020.0
## 
## Step:  AIC=-3353.83
## log(CL) ~ RC + SA + WA + OH + OR + GA
## 
##        Df Sum of Sq     RSS     AIC
## - OR    1    0.0198  9.5894 -3354.2
## <none>               9.5697 -3353.8
## + GD    1    0.0160  9.5537 -3353.1
## - SA    1    0.0366  9.6062 -3352.9
## - RC    1    0.1034  9.6731 -3347.6
## - WA    1    0.3528  9.9224 -3328.0
## - OH    1    3.1557 12.7254 -3136.9
## - GA    1    5.6046 15.1743 -3001.8
## 
## Step:  AIC=-3354.24
## log(CL) ~ RC + SA + WA + OH + GA
## 
##        Df Sum of Sq     RSS     AIC
## <none>               9.5894 -3354.2
## + OR    1    0.0198  9.5697 -3353.8
## + GD    1    0.0160  9.5734 -3353.5
## - SA    1    0.0366  9.6260 -3353.3
## - RC    1    0.1034  9.6929 -3348.0
## - WA    1    0.3528  9.9422 -3328.5
## - OH    1    3.1557 12.7452 -3137.8
## - GA    1    5.6046 15.1940 -3002.8
step$anova # display results
## Stepwise Model Path 
## Analysis of Deviance Table
## 
## Initial Model:
## log(CL) ~ RC + SA + WA + RA + OH + OR + GA + GD
## 
## Final Model:
## log(CL) ~ RC + SA + WA + OH + GA
## 
## 
##   Step Df   Deviance Resid. Df Resid. Dev       AIC
## 1                          760   9.553656 -3353.113
## 2 - RA  0 0.00000000       760   9.553656 -3353.113
## 3 - GD  1 0.01600475       761   9.569660 -3353.827
## 4 - OR  1 0.01975180       762   9.589412 -3354.244

Lets estimate what stepwise suggested

HL2 <- lm(log(HL) ~ SA + WA + OH + GA + GD, data = mydf)
summary(HL2)
## 
## Call:
## lm(formula = log(HL) ~ SA + WA + OH + GA + GD, data = mydf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35668 -0.06061  0.00220  0.06112  0.31164 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 0.4428329  0.1346474   3.289  0.00105 ** 
## SA          0.0005337  0.0002073   2.574  0.01024 *  
## WA          0.0018063  0.0002239   8.066 2.81e-15 ***
## OH          0.2547154  0.0106568  23.902  < 2e-16 ***
## GA          1.0117131  0.0333801  30.309  < 2e-16 ***
## GD          0.0159934  0.0028672   5.578 3.38e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1203 on 762 degrees of freedom
## Multiple R-squared:  0.9367, Adjusted R-squared:  0.9363 
## F-statistic:  2254 on 5 and 762 DF,  p-value: < 2.2e-16
CL2 <- lm(log(CL) ~ RC + SA + WA + OH + GA, data = mydf)
summary(CL2)
## 
## Call:
## lm(formula = log(CL) ~ RC + SA + WA + OH + GA, data = mydf)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.28460 -0.06733 -0.00694  0.04061  0.32844 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.0823240  0.7275319   4.237 2.55e-05 ***
## RC          -1.1278100  0.3933719  -2.867  0.00426 ** 
## SA          -0.0011130  0.0006528  -1.705  0.08860 .  
## WA           0.0013456  0.0002542   5.295 1.56e-07 ***
## OH           0.2046199  0.0129216  15.836  < 2e-16 ***
## GA           0.6416573  0.0304053  21.103  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1122 on 762 degrees of freedom
## Multiple R-squared:  0.9189, Adjusted R-squared:  0.9184 
## F-statistic:  1728 on 5 and 762 DF,  p-value: < 2.2e-16

Variable selection

Best Subset Regression

library("olsrr")
myModelHL <- lm(log(HL) ~ RC+SA+WA+RA+OH+OR+GA+GD, data = mydf)
HL3 <- ols_best_subset(myModelHL)
## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length
HL3
##        Best Subsets Regression        
## --------------------------------------
## Model Index    Predictors
## --------------------------------------
##      1         OH                      
##      2         OH GA                   
##      3         WA OH GA                
##      4         WA OH GA GD             
##      5         SA WA OH GA GD          
##      6         RC WA RA OH GA GD       
##      7         RC WA RA OH OR GA GD    
##      8         RC SA WA RA OH OR GA GD 
## --------------------------------------
## 
##                                                    Subsets Regression Summary                                                    
## ---------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                           
## Model    R-Square    R-Square    R-Square      C(p)          AIC        SBIC       SBC         MSEP      FPE       HSP      APC  
## ---------------------------------------------------------------------------------------------------------------------------------
##   1        0.8058      0.8055      0.8047    1572.9415     -212.1870      NA     -198.2556    0.0443    0.0443    1e-04    0.1953 
##   2        0.8921      0.8918      0.8911     536.0051     -661.7821      NA     -643.2070    0.0247    0.0247    0.0000    0.1087 
##   3        0.9335      0.9333      0.9328      39.5542    -1031.9008      NA    -1008.6819    0.0152    0.0152    0.0000    0.0672 
##   4        0.9361      0.9358      0.9352      10.4467    -1060.3774      NA    -1032.5147    0.0147    0.0147    0.0000    0.0647 
##   5        0.9367      0.9363      0.9356       5.8227    -1065.0263      NA    -1032.5198    0.0146    0.0146    0.0000    0.0643 
##   6        0.9368      0.9363      0.9356       6.0467    -1064.8188      NA    -1027.6685    0.0146    0.0146    0.0000    0.0643 
##   7        0.9368      0.9362      0.9355       8.0000    -1062.8659      NA    -1021.0718    0.0146    0.0146    0.0000    0.0645 
##   8        0.9368      0.9362      0.9355       8.0000    -1060.8659      NA    -1014.4280    0.0146    0.0146    0.0000    0.0645 
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria
myModelCL <- lm(log(CL) ~ RC+SA+WA+RA+OH+OR+GA+GD, data = mydf)
CL3 <- ols_best_subset(myModelCL)
## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length

## Warning in b * sx: longer object length is not a multiple of shorter object
## length
CL3
##        Best Subsets Regression        
## --------------------------------------
## Model Index    Predictors
## --------------------------------------
##      1         OH                      
##      2         OH GA                   
##      3         WA OH GA                
##      4         RC RA OH GA             
##      5         RC RA OH OR GA          
##      6         RC RA OH OR GA GD       
##      7         RC SA WA OH OR GA GD    
##      8         RC SA WA RA OH OR GA GD 
## --------------------------------------
## 
##                                                    Subsets Regression Summary                                                    
## ---------------------------------------------------------------------------------------------------------------------------------
##                        Adj.        Pred                                                                                           
## Model    R-Square    R-Square    R-Square      C(p)         AIC        SBIC       SBC         MSEP      FPE       HSP       APC  
## ---------------------------------------------------------------------------------------------------------------------------------
##   1        0.8412      0.8410      0.8403    730.7225     -664.1655      NA     -650.2342    0.0246    0.0246    0.0000    0.1597 
##   2        0.8886      0.8883      0.8876    286.8731     -934.2097      NA     -915.6346    0.0173    0.0173    0.0000    0.1123 
##   3        0.9168      0.9164       0.916     23.2711    -1156.4601      NA    -1133.2412    0.0130    0.0130    0.0000    0.0841 
##   4        0.9189      0.9185      0.9179      5.0716    -1174.5257      NA    -1146.6630    0.0127    0.0127    0.0000    0.0821 
##   5        0.9191      0.9186      0.9179      5.5003    -1174.1088      NA    -1141.6023    0.0127    0.0127    0.0000    0.0822 
##   6        0.9192      0.9186      0.9178      6.2271    -1173.3939      NA    -1136.2436    0.0127    0.0127    0.0000    0.0823 
##   7        0.9192      0.9185      0.9176      8.0000    -1171.6234      NA    -1129.8293    0.0127    0.0127    0.0000    0.0825 
##   8        0.9192      0.9185      0.9176      8.0000    -1169.6234      NA    -1123.1855    0.0127    0.0127    0.0000    0.0825 
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria 
##  SBIC: Sawa's Bayesian Information Criteria 
##  SBC: Schwarz Bayesian Criteria 
##  MSEP: Estimated error of prediction, assuming multivariate normality 
##  FPE: Final Prediction Error 
##  HSP: Hocking's Sp 
##  APC: Amemiya Prediction Criteria

Plot best subsets

plot(HL3)
## Warning: Removed 8 rows containing missing values (geom_point).

plot(CL3)
## Warning: Removed 8 rows containing missing values (geom_point).

comparison between models

HL1: log(HL)~ RC+WA+OH+OR+GA+GD HL2: log(HL)~ SA+WA+OH+GA+GD

CL1:log(CL)~ RC+WA+OH+OR+GA+GD CL2:log(CL)~RC+SA+WA+OH+GA

As of the third models, ols_best_subset provided 8 models, I selected the one with lowest AIC HL3: log(HL)~ SA+WA+OH+GA+GD with an AIC of -1065.0263 CL3: log(CL)~ RC+RA+OH+GA with an AIC of -1174.5257

Comparing three models I’ve got for HL, the best model for HL is the one with lowest AIC, which is Best HL: log(HL)~ SA+WA+OH+GA+GD While the model from CL3 has the lowest AIC, I noticed that when I did the basic regression, RA is a variable that has singularity. So I would prefer the CH2 as my best model for CL Best CL:log(CL)~RC+SA+WA+OH+GA

AIC(HL1)
## [1] -1060.693
AIC(HL2)
## [1] -1065.026
AIC(CL1)
## [1] -1170.688
AIC(CL2)
## [1] -1172.754

Part B (35 marks)

Read the data

df <- read.csv("kicksfootball.csv")
head(df)
##   yards goal practiceormatch
## 1    18    Y               P
## 2    19    Y               P
## 3    19    Y               P
## 4    19    Y               P
## 5    19    Y               P
## 6    19    Y               P

Make a bar graph for the goal variable

# geom_bar is designed to make it easy to 
# create bar charts that show
# counts (or sums of weights)
g <- ggplot(df, aes(goal))
# Number of Ys and Ns in variable goal
g + geom_bar()

Change goal and practiceormatch to upper case.

library(dplyr)
df$goal <- toupper(df$goal)
df$practiceormatch <- toupper(df$practiceormatch)

Verify, graphically

g <- ggplot(df, aes(goal))
# Number of Ys and Ns in variable goal
g + geom_bar()

Convert all Ys to 1, and the rest to 0

df$goal <- ifelse(df$goal=="Y", 1,0)

Create two subsets based on the variable practiceormatch using the functionality available in dplyr and using piping.

subset1 <- df %>% filter(practiceormatch=="P") 
subset2 <- df %>% filter(practiceormatch=="M")
subset1
##     yards goal practiceormatch
## 1      18    1               P
## 2      19    1               P
## 3      19    1               P
## 4      19    1               P
## 5      19    1               P
## 6      19    1               P
## 7      20    0               P
## 8      20    1               P
## 9      20    1               P
## 10     20    1               P
## 11     20    1               P
## 12     20    1               P
## 13     20    1               P
## 14     20    1               P
## 15     20    1               P
## 16     20    1               P
## 17     20    1               P
## 18     20    1               P
## 19     20    1               P
## 20     20    1               P
## 21     20    1               P
## 22     20    1               P
## 23     20    1               P
## 24     20    1               P
## 25     20    1               P
## 26     21    0               P
## 27     21    1               P
## 28     21    1               P
## 29     21    1               P
## 30     21    1               P
## 31     21    1               P
## 32     21    1               P
## 33     21    1               P
## 34     21    1               P
## 35     21    1               P
## 36     21    1               P
## 37     21    1               P
## 38     21    1               P
## 39     21    1               P
## 40     21    1               P
## 41     21    1               P
## 42     21    1               P
## 43     21    1               P
## 44     22    1               P
## 45     22    1               P
## 46     22    1               P
## 47     22    1               P
## 48     22    1               P
## 49     22    1               P
## 50     22    1               P
## 51     22    1               P
## 52     22    1               P
## 53     22    1               P
## 54     22    1               P
## 55     22    1               P
## 56     22    1               P
## 57     22    1               P
## 58     22    1               P
## 59     22    1               P
## 60     22    1               P
## 61     22    1               P
## 62     22    1               P
## 63     22    1               P
## 64     22    1               P
## 65     22    1               P
## 66     22    1               P
## 67     22    1               P
## 68     22    1               P
## 69     22    1               P
## 70     22    1               P
## 71     22    1               P
## 72     22    1               P
## 73     22    1               P
## 74     22    1               P
## 75     22    1               P
## 76     22    1               P
## 77     22    1               P
## 78     22    1               P
## 79     23    1               P
## 80     23    1               P
## 81     23    1               P
## 82     23    1               P
## 83     23    1               P
## 84     23    1               P
## 85     23    1               P
## 86     23    1               P
## 87     23    1               P
## 88     23    1               P
## 89     23    1               P
## 90     23    1               P
## 91     23    1               P
## 92     23    1               P
## 93     23    1               P
## 94     23    1               P
## 95     23    1               P
## 96     23    1               P
## 97     23    1               P
## 98     23    1               P
## 99     23    1               P
## 100    23    1               P
## 101    23    1               P
## 102    23    1               P
## 103    23    1               P
## 104    23    1               P
## 105    23    1               P
## 106    23    1               P
## 107    23    1               P
## 108    24    0               P
## 109    24    1               P
## 110    24    1               P
## 111    24    1               P
## 112    24    1               P
## 113    24    1               P
## 114    24    1               P
## 115    24    1               P
## 116    24    1               P
## 117    24    1               P
## 118    24    1               P
## 119    24    1               P
## 120    24    1               P
## 121    24    1               P
## 122    24    1               P
## 123    24    1               P
## 124    24    1               P
## 125    24    1               P
## 126    24    1               P
## 127    24    1               P
## 128    24    1               P
## 129    24    1               P
## 130    24    1               P
## 131    24    1               P
## 132    24    1               P
## 133    25    1               P
## 134    25    1               P
## 135    25    1               P
## 136    25    1               P
## 137    25    1               P
## 138    25    1               P
## 139    25    1               P
## 140    25    1               P
## 141    25    1               P
## 142    25    1               P
## 143    25    1               P
## 144    25    1               P
## 145    25    1               P
## 146    25    1               P
## 147    25    1               P
## 148    26    0               P
## 149    26    1               P
## 150    26    1               P
## 151    26    1               P
## 152    26    1               P
## 153    26    1               P
## 154    26    1               P
## 155    26    1               P
## 156    26    1               P
## 157    26    1               P
## 158    26    1               P
## 159    26    1               P
## 160    26    1               P
## 161    26    1               P
## 162    26    1               P
## 163    26    1               P
## 164    26    1               P
## 165    26    1               P
## 166    26    1               P
## 167    26    1               P
## 168    26    1               P
## 169    26    1               P
## 170    26    1               P
## 171    26    1               P
## 172    26    1               P
## 173    26    1               P
## 174    27    1               P
## 175    27    1               P
## 176    27    1               P
## 177    27    1               P
## 178    27    1               P
## 179    27    1               P
## 180    27    1               P
## 181    27    1               P
## 182    27    1               P
## 183    27    1               P
## 184    27    1               P
## 185    27    1               P
## 186    27    1               P
## 187    27    1               P
## 188    27    1               P
## 189    27    1               P
## 190    27    1               P
## 191    27    1               P
## 192    27    1               P
## 193    27    1               P
## 194    27    1               P
## 195    27    1               P
## 196    27    1               P
## 197    27    1               P
## 198    27    1               P
## 199    27    1               P
## 200    27    1               P
## 201    27    1               P
## 202    27    1               P
## 203    27    1               P
## 204    27    1               P
## 205    28    1               P
## 206    28    1               P
## 207    28    1               P
## 208    28    1               P
## 209    28    1               P
## 210    28    1               P
## 211    28    1               P
## 212    28    1               P
## 213    28    1               P
## 214    28    1               P
## 215    28    1               P
## 216    28    1               P
## 217    28    1               P
## 218    28    1               P
## 219    28    1               P
## 220    28    1               P
## 221    28    1               P
## 222    28    1               P
## 223    28    1               P
## 224    28    1               P
## 225    28    1               P
## 226    28    1               P
## 227    28    1               P
## 228    28    1               P
## 229    28    1               P
## 230    29    0               P
## 231    29    0               P
## 232    29    0               P
## 233    29    0               P
## 234    29    1               P
## 235    29    1               P
## 236    29    1               P
## 237    29    1               P
## 238    29    1               P
## 239    29    1               P
## 240    29    1               P
## 241    29    1               P
## 242    29    1               P
## 243    29    1               P
## 244    29    1               P
## 245    29    1               P
## 246    29    1               P
## 247    29    1               P
## 248    29    1               P
## 249    29    1               P
## 250    29    1               P
## 251    29    1               P
## 252    29    1               P
## 253    29    1               P
## 254    29    1               P
## 255    29    1               P
## 256    29    1               P
## 257    29    1               P
## 258    29    1               P
## 259    29    1               P
## 260    29    1               P
## 261    29    1               P
## 262    29    1               P
## 263    29    1               P
## 264    30    0               P
## 265    30    0               P
## 266    30    0               P
## 267    30    1               P
## 268    30    1               P
## 269    30    1               P
## 270    30    1               P
## 271    30    1               P
## 272    30    1               P
## 273    30    1               P
## 274    30    1               P
## 275    30    1               P
## 276    30    1               P
## 277    30    1               P
## 278    30    1               P
## 279    30    1               P
## 280    30    1               P
## 281    30    1               P
## 282    30    1               P
## 283    30    1               P
## 284    30    1               P
## 285    31    0               P
## 286    31    0               P
## 287    31    0               P
## 288    31    1               P
## 289    31    1               P
## 290    31    1               P
## 291    31    1               P
## 292    31    1               P
## 293    31    1               P
## 294    31    1               P
## 295    31    1               P
## 296    31    1               P
## 297    31    1               P
## 298    31    1               P
## 299    31    1               P
## 300    31    1               P
## 301    31    1               P
## 302    31    1               P
## 303    31    1               P
## 304    31    1               P
## 305    31    1               P
## 306    31    1               P
## 307    31    1               P
## 308    31    1               P
## 309    31    1               P
## 310    32    0               P
## 311    32    0               P
## 312    32    1               P
## 313    32    1               P
## 314    32    1               P
## 315    32    1               P
## 316    32    1               P
## 317    32    1               P
## 318    32    1               P
## 319    32    1               P
## 320    32    1               P
## 321    32    1               P
## 322    32    1               P
## 323    32    1               P
## 324    32    1               P
## 325    32    1               P
## 326    32    1               P
## 327    32    1               P
## 328    33    0               P
## 329    33    0               P
## 330    33    0               P
## 331    33    1               P
## 332    33    1               P
## 333    33    1               P
## 334    33    1               P
## 335    33    1               P
## 336    33    1               P
## 337    33    1               P
## 338    33    1               P
## 339    33    1               P
## 340    33    1               P
## 341    33    1               P
## 342    33    1               P
## 343    33    1               P
## 344    33    1               P
## 345    33    1               P
## 346    33    1               P
## 347    33    1               P
## 348    33    1               P
## 349    33    1               P
## 350    33    1               P
## 351    33    1               P
## 352    33    1               P
## 353    33    1               P
## 354    33    1               P
## 355    33    1               P
## 356    33    1               P
## 357    33    1               P
## 358    33    1               P
## 359    34    0               P
## 360    34    0               P
## 361    34    0               P
## 362    34    0               P
## 363    34    0               P
## 364    34    1               P
## 365    34    1               P
## 366    34    1               P
## 367    34    1               P
## 368    34    1               P
## 369    34    1               P
## 370    34    1               P
## 371    34    1               P
## 372    34    1               P
## 373    34    1               P
## 374    34    1               P
## 375    34    1               P
## 376    34    1               P
## 377    34    1               P
## 378    34    1               P
## 379    35    0               P
## 380    35    0               P
## 381    35    0               P
## 382    35    0               P
## 383    35    0               P
## 384    35    0               P
## 385    35    0               P
## 386    35    1               P
## 387    35    1               P
## 388    35    1               P
## 389    35    1               P
## 390    35    1               P
## 391    35    1               P
## 392    35    1               P
## 393    35    1               P
## 394    35    1               P
## 395    35    1               P
## 396    35    1               P
## 397    35    1               P
## 398    35    1               P
## 399    35    1               P
## 400    35    1               P
## 401    35    1               P
## 402    35    1               P
## 403    35    1               P
## 404    35    1               P
## 405    35    1               P
## 406    36    0               P
## 407    36    0               P
## 408    36    0               P
## 409    36    0               P
## 410    36    1               P
## 411    36    1               P
## 412    36    1               P
## 413    36    1               P
## 414    36    1               P
## 415    36    1               P
## 416    36    1               P
## 417    36    1               P
## 418    36    1               P
## 419    36    1               P
## 420    36    1               P
## 421    36    1               P
## 422    36    1               P
## 423    36    1               P
## 424    36    1               P
## 425    36    1               P
## 426    36    1               P
## 427    36    1               P
## 428    36    1               P
## 429    36    1               P
## 430    36    1               P
## 431    37    0               P
## 432    37    0               P
## 433    37    0               P
## 434    37    0               P
## 435    37    0               P
## 436    37    0               P
## 437    37    1               P
## 438    37    1               P
## 439    37    1               P
## 440    37    1               P
## 441    37    1               P
## 442    37    1               P
## 443    37    1               P
## 444    37    1               P
## 445    37    1               P
## 446    37    1               P
## 447    37    1               P
## 448    37    1               P
## 449    37    1               P
## 450    37    1               P
## 451    37    1               P
## 452    37    1               P
## 453    37    1               P
## 454    37    1               P
## 455    37    1               P
## 456    37    1               P
## 457    37    1               P
## 458    37    1               P
## 459    37    1               P
## 460    38    0               P
## 461    38    0               P
## 462    38    0               P
## 463    38    0               P
## 464    38    0               P
## 465    38    0               P
## 466    38    1               P
## 467    38    1               P
## 468    38    1               P
## 469    38    1               P
## 470    38    1               P
## 471    38    1               P
## 472    38    1               P
## 473    38    1               P
## 474    38    1               P
## 475    38    1               P
## 476    38    1               P
## 477    38    1               P
## 478    38    1               P
## 479    38    1               P
## 480    38    1               P
## 481    38    1               P
## 482    38    1               P
## 483    38    1               P
## 484    38    1               P
## 485    38    1               P
## 486    38    1               P
## 487    38    1               P
## 488    38    1               P
## 489    38    1               P
## 490    39    0               P
## 491    39    0               P
## 492    39    0               P
## 493    39    0               P
## 494    39    0               P
## 495    39    0               P
## 496    39    0               P
## 497    39    1               P
## 498    39    1               P
## 499    39    1               P
## 500    39    1               P
## 501    39    1               P
## 502    39    1               P
## 503    39    1               P
## 504    39    1               P
## 505    39    1               P
## 506    39    1               P
## 507    39    1               P
## 508    39    1               P
## 509    39    1               P
## 510    39    1               P
## 511    39    1               P
## 512    39    1               P
## 513    39    1               P
## 514    39    1               P
## 515    39    1               P
## 516    39    1               P
## 517    39    1               P
## 518    39    1               P
## 519    39    1               P
## 520    40    0               P
## 521    40    0               P
## 522    40    0               P
## 523    40    0               P
## 524    40    0               P
## 525    40    1               P
## 526    40    1               P
## 527    40    1               P
## 528    40    1               P
## 529    40    1               P
## 530    40    1               P
## 531    40    1               P
## 532    40    1               P
## 533    40    1               P
## 534    40    1               P
## 535    40    1               P
## 536    40    1               P
## 537    40    1               P
## 538    40    1               P
## 539    40    1               P
## 540    40    1               P
## 541    40    1               P
## 542    40    1               P
## 543    40    1               P
## 544    40    1               P
## 545    40    1               P
## 546    41    0               P
## 547    41    0               P
## 548    41    0               P
## 549    41    0               P
## 550    41    0               P
## 551    41    0               P
## 552    41    0               P
## 553    41    0               P
## 554    41    0               P
## 555    41    0               P
## 556    41    1               P
## 557    41    1               P
## 558    41    1               P
## 559    41    1               P
## 560    41    1               P
## 561    41    1               P
## 562    41    1               P
## 563    41    1               P
## 564    41    1               P
## 565    41    1               P
## 566    41    1               P
## 567    41    1               P
## 568    41    1               P
## 569    41    1               P
## 570    41    1               P
## 571    41    1               P
## 572    41    1               P
## 573    42    0               P
## 574    42    0               P
## 575    42    1               P
## 576    42    1               P
## 577    42    1               P
## 578    42    1               P
## 579    42    1               P
## 580    42    1               P
## 581    42    1               P
## 582    42    1               P
## 583    42    1               P
## 584    42    1               P
## 585    42    1               P
## 586    42    1               P
## 587    42    1               P
## 588    42    1               P
## 589    42    1               P
## 590    42    1               P
## 591    42    1               P
## 592    42    1               P
## 593    42    1               P
## 594    42    1               P
## 595    42    1               P
## 596    42    1               P
## 597    42    1               P
## 598    42    1               P
## 599    42    1               P
## 600    43    0               P
## 601    43    0               P
## 602    43    0               P
## 603    43    0               P
## 604    43    0               P
## 605    43    0               P
## 606    43    1               P
## 607    43    1               P
## 608    43    1               P
## 609    43    1               P
## 610    43    1               P
## 611    43    1               P
## 612    43    1               P
## 613    43    1               P
## 614    43    1               P
## 615    43    1               P
## 616    43    1               P
## 617    43    1               P
## 618    43    1               P
## 619    43    1               P
## 620    43    1               P
## 621    43    1               P
## 622    43    1               P
## 623    43    1               P
## 624    43    1               P
## 625    43    1               P
## 626    44    0               P
## 627    44    0               P
## 628    44    0               P
## 629    44    0               P
## 630    44    0               P
## 631    44    0               P
## 632    44    0               P
## 633    44    0               P
## 634    44    0               P
## 635    44    0               P
## 636    44    0               P
## 637    44    1               P
## 638    44    1               P
## 639    44    1               P
## 640    44    1               P
## 641    44    1               P
## 642    44    1               P
## 643    44    1               P
## 644    44    1               P
## 645    44    1               P
## 646    44    1               P
## 647    44    1               P
## 648    44    1               P
## 649    44    1               P
## 650    44    1               P
## 651    44    1               P
## 652    44    1               P
## 653    44    1               P
## 654    44    1               P
## 655    45    0               P
## 656    45    0               P
## 657    45    0               P
## 658    45    0               P
## 659    45    0               P
## 660    45    0               P
## 661    45    0               P
## 662    45    0               P
## 663    45    1               P
## 664    45    1               P
## 665    45    1               P
## 666    45    1               P
## 667    45    1               P
## 668    45    1               P
## 669    45    1               P
## 670    45    1               P
## 671    45    1               P
## 672    45    1               P
## 673    45    1               P
## 674    45    1               P
## 675    46    0               P
## 676    46    0               P
## 677    46    0               P
## 678    46    0               P
## 679    46    0               P
## 680    46    0               P
## 681    46    0               P
## 682    46    0               P
## 683    46    1               P
## 684    46    1               P
## 685    46    1               P
## 686    46    1               P
## 687    46    1               P
## 688    46    1               P
## 689    46    1               P
## 690    46    1               P
## 691    46    1               P
## 692    46    1               P
## 693    46    1               P
## 694    46    1               P
## 695    46    1               P
## 696    46    1               P
## 697    46    1               P
## 698    46    1               P
## 699    46    1               P
## 700    46    1               P
## 701    46    1               P
## 702    46    1               P
## 703    46    1               P
## 704    47    0               P
## 705    47    0               P
## 706    47    0               P
## 707    47    0               P
## 708    47    0               P
## 709    47    0               P
## 710    47    1               P
## 711    47    1               P
## 712    47    1               P
## 713    47    1               P
## 714    47    1               P
## 715    47    1               P
## 716    47    1               P
## 717    47    1               P
## 718    47    1               P
## 719    47    1               P
## 720    47    1               P
## 721    47    1               P
## 722    47    1               P
## 723    47    1               P
## 724    47    1               P
## 725    47    1               P
## 726    47    1               P
## 727    48    0               P
## 728    48    0               P
## 729    48    0               P
## 730    48    0               P
## 731    48    0               P
## 732    48    0               P
## 733    48    0               P
## 734    48    0               P
## 735    48    0               P
## 736    48    0               P
## 737    48    0               P
## 738    48    0               P
## 739    48    0               P
## 740    48    1               P
## 741    48    1               P
## 742    48    1               P
## 743    48    1               P
## 744    48    1               P
## 745    48    1               P
## 746    48    1               P
## 747    48    1               P
## 748    48    1               P
## 749    48    1               P
## 750    48    1               P
## 751    48    1               P
## 752    48    1               P
## 753    48    1               P
## 754    48    1               P
## 755    48    1               P
## 756    49    0               P
## 757    49    0               P
## 758    49    0               P
## 759    49    0               P
## 760    49    0               P
## 761    49    0               P
## 762    49    0               P
## 763    49    0               P
## 764    49    0               P
## 765    49    0               P
## 766    49    0               P
## 767    49    1               P
## 768    49    1               P
## 769    49    1               P
## 770    49    1               P
## 771    49    1               P
## 772    49    1               P
## 773    49    1               P
## 774    49    1               P
## 775    49    1               P
## 776    49    1               P
## 777    49    1               P
## 778    49    1               P
## 779    49    1               P
## 780    50    0               P
## 781    50    0               P
## 782    50    0               P
## 783    50    0               P
## 784    50    0               P
## 785    50    0               P
## 786    50    0               P
## 787    50    0               P
## 788    50    0               P
## 789    50    1               P
## 790    50    1               P
## 791    50    1               P
## 792    50    1               P
## 793    50    1               P
## 794    50    1               P
## 795    50    1               P
## 796    50    1               P
## 797    50    1               P
## 798    50    1               P
## 799    50    1               P
## 800    50    1               P
## 801    51    0               P
## 802    51    0               P
## 803    51    0               P
## 804    51    0               P
## 805    51    0               P
## 806    51    0               P
## 807    51    1               P
## 808    51    1               P
## 809    51    1               P
## 810    51    1               P
## 811    51    1               P
## 812    51    1               P
## 813    51    1               P
## 814    51    1               P
## 815    51    1               P
## 816    51    1               P
## 817    51    1               P
## 818    52    0               P
## 819    52    0               P
## 820    52    0               P
## 821    52    0               P
## 822    52    0               P
## 823    52    0               P
## 824    52    0               P
## 825    52    0               P
## 826    52    0               P
## 827    52    1               P
## 828    52    1               P
## 829    52    1               P
## 830    52    1               P
## 831    52    1               P
## 832    53    0               P
## 833    53    0               P
## 834    53    0               P
## 835    53    0               P
## 836    53    0               P
## 837    53    0               P
## 838    53    0               P
## 839    53    1               P
## 840    53    1               P
## 841    53    1               P
## 842    53    1               P
## 843    53    1               P
## 844    53    1               P
## 845    53    1               P
## 846    54    0               P
## 847    54    0               P
## 848    54    0               P
## 849    54    0               P
## 850    54    1               P
## 851    54    1               P
## 852    55    0               P
## 853    55    0               P
## 854    55    0               P
## 855    55    0               P
## 856    55    1               P
## 857    56    1               P
## 858    57    0               P
## 859    57    1               P
## 860    60    0               P
## 861    60    0               P
## 862    62    0               P
subset2
##    yards goal practiceormatch
## 1     22    1               M
## 2     23    1               M
## 3     23    1               M
## 4     23    1               M
## 5     23    1               M
## 6     24    0               M
## 7     24    1               M
## 8     24    1               M
## 9     24    1               M
## 10    24    1               M
## 11    25    1               M
## 12    25    1               M
## 13    26    1               M
## 14    27    1               M
## 15    28    0               M
## 16    28    1               M
## 17    28    1               M
## 18    28    1               M
## 19    28    1               M
## 20    28    1               M
## 21    28    1               M
## 22    29    1               M
## 23    29    1               M
## 24    29    1               M
## 25    30    1               M
## 26    31    1               M
## 27    33    1               M
## 28    33    1               M
## 29    33    1               M
## 30    34    0               M
## 31    34    1               M
## 32    34    1               M
## 33    34    1               M
## 34    34    1               M
## 35    37    0               M
## 36    37    1               M
## 37    37    1               M
## 38    37    1               M
## 39    37    1               M
## 40    37    1               M
## 41    38    0               M
## 42    38    1               M
## 43    38    1               M
## 44    39    1               M
## 45    39    1               M
## 46    39    1               M
## 47    40    1               M
## 48    40    1               M
## 49    40    1               M
## 50    41    1               M
## 51    41    1               M
## 52    41    1               M
## 53    41    1               M
## 54    41    1               M
## 55    42    1               M
## 56    43    0               M
## 57    43    1               M
## 58    44    1               M
## 59    44    1               M
## 60    45    1               M
## 61    46    0               M
## 62    46    1               M
## 63    46    1               M
## 64    46    1               M
## 65    46    1               M
## 66    47    0               M
## 67    47    1               M
## 68    47    1               M
## 69    47    1               M
## 70    48    0               M
## 71    48    0               M
## 72    48    1               M
## 73    48    1               M
## 74    48    1               M
## 75    48    1               M
## 76    48    1               M
## 77    49    0               M
## 78    49    0               M
## 79    49    1               M
## 80    49    1               M
## 81    49    1               M
## 82    50    0               M
## 83    50    0               M
## 84    50    1               M
## 85    50    1               M
## 86    51    1               M
## 87    54    0               M
## 88    54    1               M
## 89    55    0               M
## 90    58    1               M
plot(goal~yards,data=subset1,
     xlab="Yards",
     ylab="Goal")

Estimate two separate logistic regressions for goal as a function of yards for the two subsets.

subset1.log = glm(goal ~ yards, 
             data=subset1,
             family=binomial)
subset2.log = glm(goal ~ yards, 
             data=subset2,
             family=binomial)

See the results and compare two models

summary(subset1.log)
## 
## Call:
## glm(formula = goal ~ yards, family = binomial, data = subset1)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.6690   0.2538   0.3950   0.7057   1.4702  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  5.80284    0.47356   12.25   <2e-16 ***
## yards       -0.11349    0.01115  -10.18   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 875.30  on 861  degrees of freedom
## Residual deviance: 741.28  on 860  degrees of freedom
## AIC: 745.28
## 
## Number of Fisher Scoring iterations: 5
summary(subset2.log)
## 
## Call:
## glm(formula = goal ~ yards, family = binomial, data = subset2)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4254   0.3293   0.4865   0.6999   1.1253  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)  4.83766    1.50308   3.218  0.00129 **
## yards       -0.08127    0.03455  -2.353  0.01864 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 84.241  on 89  degrees of freedom
## Residual deviance: 77.735  on 88  degrees of freedom
## AIC: 81.735
## 
## Number of Fisher Scoring iterations: 5

Criteria

AIC is a number that is helpful for comparing models as it includes measures of both how well the model fits the data and how complex the model is. Lower is better.

compared the AIC, the second model is better.

If it is a 40-yard kick, what is the probability that the goal will be scored (a) in a practice game, and (b) in a match?

a <- predict(subset1.log, data.frame(yards=40), type="resp")
b <- predict(subset2.log, data.frame(yards=40), type="resp")
a
##         1 
## 0.7795695
b
##         1 
## 0.8301675

Part C (30 marks) 1. For the dataset in Part A, using ggplot, make three plots that a) plot HL against the most important predictor from your model in Part A, Accroding to my best model Best HL: log(HL)~ SA+WA+OH+GA+GD, the most important predictor is GA b) faceted by GD and OR c) and for each plot separately fit Accroding to my best model Best HL: log(HL)~ SA+WA+OH+GA+GD, the most important predictor is GA ## 1. linear model,

mydf1 <- subset(mydf, select = -CL)
p <- ggplot(mydf1) + facet_grid(GD~ OR) + ylab("GD")+xlab("OR")
lm <- p + stat_smooth(aes(x=GA, y= HL),method = "lm",
                formula = y ~ x,
                size = 1)
print(lm)

2. a second-order polynomial, and

poly <- p + stat_smooth(aes(x=GA, y= HL),method = "lm",
                formula = y ~ poly(x, 2),
                size = 1)
print(poly)

3. LOESS

loess <- p + stat_smooth(aes(x=GA, y= HL),method = "loess",
                formula = y ~ x,
                size = 1)
print(loess)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902

  1. For the dataset in Part B, using ggplot, make one plot that
  1. plots the probabilities of scoring a goal versus yards for both the subsets that you created
y1 <- subset(subset1, select = "yards") 
y2 <- subset(subset2, select = "yards")
prob1 <- matrix(predict(subset1.log,yards= y1, type="resp"))
prob2 <- matrix(predict(subset2.log,yards= y2, type="resp"))
subset1$prob <- prob1
subset2$prob <- prob2
ggplot()+ geom_smooth(colour="red",data= subset1,aes(x=yards, y=prob),method = "glm", method.args = list(family = "quasibinomial"), se = FALSE) + geom_smooth(data = subset2,aes(x=yards, y=prob),method = "glm", method.args = list(family = "quasibinomial"), se = FALSE)

p <- ggplot() + # blue plot geom_point(data=visual1, aes(x=ISSUE_DATE, y=COUNTED)) + geom_smooth(data=visual1, aes(x=ISSUE_DATE, y=COUNTED), fill=“blue”, colour=“darkblue”, size=1) + # red plot geom_point(data=visual2, aes(x=ISSUE_DATE, y=COUNTED)) + geom_smooth(data=visual2, aes(x=ISSUE_DATE, y=COUNTED), fill=“red”, colour=“red”, size=1)