library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1 ✔ purrr 0.2.4
## ✔ tibble 1.3.4 ✔ dplyr 0.7.4
## ✔ tidyr 0.7.2 ✔ stringr 1.2.0
## ✔ readr 1.1.1 ✔ forcats 0.2.0
## ── Conflicts ────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library("dplyr", warn.conflicts = FALSE)
if (!require(car)) {
install.packages("car")
require(car)
}
if (!require(ggplot2)) {
install.packages("ggplot2")
require(ggplot2)
}
if (!require(GGally)) {
install.packages("GGally")
require(GGally)
}
if (!require(MASS)) {
install.packages("MASS")
require(MASS)
}
if (!require(olsrr)) {
install.packages("olsrr")
require(olsrr)
}
Part A (35 marks)
mydf <- read_csv("buildingenergy.csv")
## Parsed with column specification:
## cols(
## RC = col_double(),
## SA = col_double(),
## WA = col_double(),
## RA = col_double(),
## OH = col_double(),
## OR = col_integer(),
## GA = col_double(),
## GD = col_integer(),
## HL = col_double(),
## CL = col_double()
## )
head(mydf)
## # A tibble: 6 x 10
## RC SA WA RA OH OR GA GD HL CL
## <dbl> <dbl> <dbl> <dbl> <dbl> <int> <dbl> <int> <dbl> <dbl>
## 1 0.98 514.5 294.0 110.25 7 2 0 0 15.55 21.33
## 2 0.98 514.5 294.0 110.25 7 3 0 0 15.55 21.33
## 3 0.98 514.5 294.0 110.25 7 4 0 0 15.55 21.33
## 4 0.98 514.5 294.0 110.25 7 5 0 0 15.55 21.33
## 5 0.90 563.5 318.5 122.50 7 2 0 0 20.84 28.28
## 6 0.90 563.5 318.5 122.50 7 3 0 0 21.46 25.38
We start by setting up the model and estimating it. This assumes that the data are clean and that there are no other issues with the data.
regModel1 <- lm(HL ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
regModel2 <- lm(CL ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
summary(regModel1)
##
## Call:
## lm(formula = HL ~ RC + SA + WA + RA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8965 -1.3196 -0.0252 1.3532 7.7052
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 84.014521 19.033607 4.414 1.16e-05 ***
## RC -64.773991 10.289445 -6.295 5.19e-10 ***
## SA -0.087290 0.017075 -5.112 4.04e-07 ***
## WA 0.060813 0.006648 9.148 < 2e-16 ***
## RA NA NA NA NA
## OH 4.169939 0.337990 12.337 < 2e-16 ***
## OR -0.023328 0.094705 -0.246 0.80550
## GA 19.932680 0.813986 24.488 < 2e-16 ***
## GD 0.203772 0.069918 2.914 0.00367 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.934 on 760 degrees of freedom
## Multiple R-squared: 0.9162, Adjusted R-squared: 0.9154
## F-statistic: 1187 on 7 and 760 DF, p-value: < 2.2e-16
summary(regModel2)
##
## Call:
## lm(formula = CL ~ RC + SA + WA + RA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6940 -1.5606 -0.2668 1.3968 11.1775
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 97.245749 20.764711 4.683 3.34e-06 ***
## RC -70.787707 11.225269 -6.306 4.85e-10 ***
## SA -0.088245 0.018628 -4.737 2.59e-06 ***
## WA 0.044682 0.007253 6.161 1.17e-09 ***
## RA NA NA NA NA
## OH 4.283843 0.368730 11.618 < 2e-16 ***
## OR 0.121510 0.103318 1.176 0.240
## GA 14.717068 0.888018 16.573 < 2e-16 ***
## GD 0.040697 0.076277 0.534 0.594
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.201 on 760 degrees of freedom
## Multiple R-squared: 0.8878, Adjusted R-squared: 0.8868
## F-statistic: 859.1 on 7 and 760 DF, p-value: < 2.2e-16
plot(regModel1)
plot(regModel2)
regModel1 <- lm(HL ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
regModel2 <- lm(CL ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
summary(regModel1)
##
## Call:
## lm(formula = HL ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.8965 -1.3196 -0.0252 1.3532 7.7052
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 84.014521 19.033607 4.414 1.16e-05 ***
## RC -64.773991 10.289445 -6.295 5.19e-10 ***
## SA -0.087290 0.017075 -5.112 4.04e-07 ***
## WA 0.060813 0.006648 9.148 < 2e-16 ***
## OH 4.169939 0.337990 12.337 < 2e-16 ***
## OR -0.023328 0.094705 -0.246 0.80550
## GA 19.932680 0.813986 24.488 < 2e-16 ***
## GD 0.203772 0.069918 2.914 0.00367 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.934 on 760 degrees of freedom
## Multiple R-squared: 0.9162, Adjusted R-squared: 0.9154
## F-statistic: 1187 on 7 and 760 DF, p-value: < 2.2e-16
summary(regModel2)
##
## Call:
## lm(formula = CL ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8.6940 -1.5606 -0.2668 1.3968 11.1775
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 97.245749 20.764711 4.683 3.34e-06 ***
## RC -70.787707 11.225269 -6.306 4.85e-10 ***
## SA -0.088245 0.018628 -4.737 2.59e-06 ***
## WA 0.044682 0.007253 6.161 1.17e-09 ***
## OH 4.283843 0.368730 11.618 < 2e-16 ***
## OR 0.121510 0.103318 1.176 0.240
## GA 14.717068 0.888018 16.573 < 2e-16 ***
## GD 0.040697 0.076277 0.534 0.594
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.201 on 760 degrees of freedom
## Multiple R-squared: 0.8878, Adjusted R-squared: 0.8868
## F-statistic: 859.1 on 7 and 760 DF, p-value: < 2.2e-16
plot(regModel1)
plot(regModel2)
hist(mydf$HL)
hist(mydf$CL)
Heating Load
regModel1 <- lm(log(HL) ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
summary(regModel1)
##
## Call:
## lm(formula = log(HL) ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35472 -0.05718 0.00176 0.06585 0.31204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5787874 0.7806251 -0.741 0.4587
## RC 0.5623864 0.4220009 1.333 0.1830
## SA 0.0014251 0.0007003 2.035 0.0422 *
## WA 0.0015991 0.0002727 5.865 6.70e-09 ***
## OH 0.2665273 0.0138620 19.227 < 2e-16 ***
## OR -0.0008389 0.0038841 -0.216 0.8290
## GA 1.0117131 0.0333840 30.305 < 2e-16 ***
## GD 0.0159934 0.0028675 5.577 3.39e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1203 on 760 degrees of freedom
## Multiple R-squared: 0.9368, Adjusted R-squared: 0.9362
## F-statistic: 1610 on 7 and 760 DF, p-value: < 2.2e-16
plot(regModel1)
Cooling Load
regModel2 <- lm(log(CL) ~ RC+SA+WA+OH+OR+GA+GD,data = mydf)
summary(regModel2)
##
## Call:
## lm(formula = log(CL) ~ RC + SA + WA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28014 -0.06793 -0.00703 0.04112 0.32881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0597218 0.7272638 4.207 2.89e-05 ***
## RC -1.1278100 0.3931541 -2.869 0.00424 **
## SA -0.0011130 0.0006524 -1.706 0.08842 .
## WA 0.0013456 0.0002540 5.298 1.54e-07 ***
## OH 0.2046199 0.0129144 15.844 < 2e-16 ***
## OR 0.0045359 0.0036186 1.254 0.21041
## GA 0.6341835 0.0311020 20.390 < 2e-16 ***
## GD 0.0030144 0.0026715 1.128 0.25953
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1121 on 760 degrees of freedom
## Multiple R-squared: 0.9192, Adjusted R-squared: 0.9185
## F-statistic: 1236 on 7 and 760 DF, p-value: < 2.2e-16
plot(regModel2)
vif(regModel1)
## RC SA WA OH OR GA
## 105.524054 201.531134 7.492984 31.205474 1.000000 1.047508
## GD
## 1.047508
library(GGally)
X <- subset(mydf,select=-c(HL, CL))
ggpairs(X)
Heating Load
HL1 <- lm(log(HL) ~ RC+WA+OH+OR+GA+GD,data = mydf)
summary(HL1)
##
## Call:
## lm(formula = log(HL) ~ RC + WA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35568 -0.05824 0.00023 0.05892 0.31448
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9940620 0.1095921 9.071 < 2e-16 ***
## RC -0.2578535 0.1252015 -2.060 0.0398 *
## WA 0.0020209 0.0001775 11.387 < 2e-16 ***
## OH 0.2430700 0.0077149 31.507 < 2e-16 ***
## OR -0.0008389 0.0038921 -0.216 0.8294
## GA 1.0117131 0.0334528 30.243 < 2e-16 ***
## GD 0.0159934 0.0028734 5.566 3.61e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1206 on 761 degrees of freedom
## Multiple R-squared: 0.9365, Adjusted R-squared: 0.936
## F-statistic: 1870 on 6 and 761 DF, p-value: < 2.2e-16
plot(HL1)
Cooling Load
CL1 <- lm(log(CL) ~ RC+WA+OH+OR+GA+GD,data = mydf)
summary(CL1)
##
## Call:
## lm(formula = log(CL) ~ RC + WA + OH + OR + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28207 -0.06701 -0.00228 0.04210 0.32691
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.8312778 0.1020184 17.950 < 2e-16 ***
## RC -0.4871773 0.1165491 -4.180 3.25e-05 ***
## WA 0.0010162 0.0001652 6.151 1.25e-09 ***
## OH 0.2229408 0.0071817 31.043 < 2e-16 ***
## OR 0.0045359 0.0036232 1.252 0.211
## GA 0.6341835 0.0311410 20.365 < 2e-16 ***
## GD 0.0030144 0.0026749 1.127 0.260
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1123 on 761 degrees of freedom
## Multiple R-squared: 0.9189, Adjusted R-squared: 0.9183
## F-statistic: 1438 on 6 and 761 DF, p-value: < 2.2e-16
plot(CL1)
vif(HL1)
## RC WA OH OR GA GD
## 9.250283 3.161934 9.626103 1.000000 1.047508 1.047508
vif(CL1)
## RC WA OH OR GA GD
## 9.250283 3.161934 9.626103 1.000000 1.047508 1.047508
library(MASS)
fitstepHL <- lm(log(HL) ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
step <- stepAIC(fitstepHL, direction="both")
## Start: AIC=-3244.36
## log(HL) ~ RC + SA + WA + RA + OH + OR + GA + GD
##
##
## Step: AIC=-3244.36
## log(HL) ~ RC + SA + WA + OH + OR + GA + GD
##
## Df Sum of Sq RSS AIC
## - OR 1 0.0007 11.008 -3246.3
## - RC 1 0.0257 11.033 -3244.6
## <none> 11.007 -3244.4
## - SA 1 0.0600 11.067 -3242.2
## - GD 1 0.4505 11.458 -3215.5
## - WA 1 0.4982 11.505 -3212.4
## - OH 1 5.3541 16.361 -2941.9
## - GA 1 13.3013 24.308 -2637.9
##
## Step: AIC=-3246.31
## log(HL) ~ RC + SA + WA + OH + GA + GD
##
## Df Sum of Sq RSS AIC
## - RC 1 0.0257 11.033 -3246.5
## <none> 11.008 -3246.3
## + OR 1 0.0007 11.007 -3244.4
## - SA 1 0.0600 11.068 -3244.1
## - GD 1 0.4505 11.458 -3217.5
## - WA 1 0.4982 11.506 -3214.3
## - OH 1 5.3541 16.362 -2943.9
## - GA 1 13.3013 24.309 -2639.9
##
## Step: AIC=-3246.52
## log(HL) ~ SA + WA + OH + GA + GD
##
## Df Sum of Sq RSS AIC
## <none> 11.033 -3246.5
## + RC 1 0.0257 11.008 -3246.3
## + OR 1 0.0007 11.033 -3244.6
## - SA 1 0.0959 11.129 -3241.9
## - GD 1 0.4505 11.484 -3217.8
## - WA 1 0.9421 11.976 -3185.6
## - OH 1 8.2720 19.305 -2818.9
## - GA 1 13.3013 24.335 -2641.0
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## log(HL) ~ RC + SA + WA + RA + OH + OR + GA + GD
##
## Final Model:
## log(HL) ~ SA + WA + OH + GA + GD
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 760 11.00704 -3244.356
## 2 - RA 0 0.0000000000 760 11.00704 -3244.356
## 3 - OR 1 0.0006756823 761 11.00772 -3246.308
## 4 - RC 1 0.0257217145 762 11.03344 -3246.516
fitstepCL <- lm(log(CL) ~ RC+SA+WA+RA+OH+OR+GA+GD,data = mydf)
step <- stepAIC(fitstepCL, direction="both")
## Start: AIC=-3353.11
## log(CL) ~ RC + SA + WA + RA + OH + OR + GA + GD
##
##
## Step: AIC=-3353.11
## log(CL) ~ RC + SA + WA + OH + OR + GA + GD
##
## Df Sum of Sq RSS AIC
## - GD 1 0.0160 9.5697 -3353.8
## - OR 1 0.0198 9.5734 -3353.5
## <none> 9.5537 -3353.1
## - SA 1 0.0366 9.5902 -3352.2
## - RC 1 0.1034 9.6571 -3346.8
## - WA 1 0.3528 9.9064 -3327.3
## - OH 1 3.1557 12.7094 -3135.9
## - GA 1 5.2265 14.7801 -3020.0
##
## Step: AIC=-3353.83
## log(CL) ~ RC + SA + WA + OH + OR + GA
##
## Df Sum of Sq RSS AIC
## - OR 1 0.0198 9.5894 -3354.2
## <none> 9.5697 -3353.8
## + GD 1 0.0160 9.5537 -3353.1
## - SA 1 0.0366 9.6062 -3352.9
## - RC 1 0.1034 9.6731 -3347.6
## - WA 1 0.3528 9.9224 -3328.0
## - OH 1 3.1557 12.7254 -3136.9
## - GA 1 5.6046 15.1743 -3001.8
##
## Step: AIC=-3354.24
## log(CL) ~ RC + SA + WA + OH + GA
##
## Df Sum of Sq RSS AIC
## <none> 9.5894 -3354.2
## + OR 1 0.0198 9.5697 -3353.8
## + GD 1 0.0160 9.5734 -3353.5
## - SA 1 0.0366 9.6260 -3353.3
## - RC 1 0.1034 9.6929 -3348.0
## - WA 1 0.3528 9.9422 -3328.5
## - OH 1 3.1557 12.7452 -3137.8
## - GA 1 5.6046 15.1940 -3002.8
step$anova # display results
## Stepwise Model Path
## Analysis of Deviance Table
##
## Initial Model:
## log(CL) ~ RC + SA + WA + RA + OH + OR + GA + GD
##
## Final Model:
## log(CL) ~ RC + SA + WA + OH + GA
##
##
## Step Df Deviance Resid. Df Resid. Dev AIC
## 1 760 9.553656 -3353.113
## 2 - RA 0 0.00000000 760 9.553656 -3353.113
## 3 - GD 1 0.01600475 761 9.569660 -3353.827
## 4 - OR 1 0.01975180 762 9.589412 -3354.244
HL2 <- lm(log(HL) ~ SA + WA + OH + GA + GD, data = mydf)
summary(HL2)
##
## Call:
## lm(formula = log(HL) ~ SA + WA + OH + GA + GD, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.35668 -0.06061 0.00220 0.06112 0.31164
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4428329 0.1346474 3.289 0.00105 **
## SA 0.0005337 0.0002073 2.574 0.01024 *
## WA 0.0018063 0.0002239 8.066 2.81e-15 ***
## OH 0.2547154 0.0106568 23.902 < 2e-16 ***
## GA 1.0117131 0.0333801 30.309 < 2e-16 ***
## GD 0.0159934 0.0028672 5.578 3.38e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1203 on 762 degrees of freedom
## Multiple R-squared: 0.9367, Adjusted R-squared: 0.9363
## F-statistic: 2254 on 5 and 762 DF, p-value: < 2.2e-16
CL2 <- lm(log(CL) ~ RC + SA + WA + OH + GA, data = mydf)
summary(CL2)
##
## Call:
## lm(formula = log(CL) ~ RC + SA + WA + OH + GA, data = mydf)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.28460 -0.06733 -0.00694 0.04061 0.32844
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.0823240 0.7275319 4.237 2.55e-05 ***
## RC -1.1278100 0.3933719 -2.867 0.00426 **
## SA -0.0011130 0.0006528 -1.705 0.08860 .
## WA 0.0013456 0.0002542 5.295 1.56e-07 ***
## OH 0.2046199 0.0129216 15.836 < 2e-16 ***
## GA 0.6416573 0.0304053 21.103 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1122 on 762 degrees of freedom
## Multiple R-squared: 0.9189, Adjusted R-squared: 0.9184
## F-statistic: 1728 on 5 and 762 DF, p-value: < 2.2e-16
library("olsrr")
myModelHL <- lm(log(HL) ~ RC+SA+WA+RA+OH+OR+GA+GD, data = mydf)
HL3 <- ols_best_subset(myModelHL)
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
HL3
## Best Subsets Regression
## --------------------------------------
## Model Index Predictors
## --------------------------------------
## 1 OH
## 2 OH GA
## 3 WA OH GA
## 4 WA OH GA GD
## 5 SA WA OH GA GD
## 6 RC WA RA OH GA GD
## 7 RC WA RA OH OR GA GD
## 8 RC SA WA RA OH OR GA GD
## --------------------------------------
##
## Subsets Regression Summary
## ---------------------------------------------------------------------------------------------------------------------------------
## Adj. Pred
## Model R-Square R-Square R-Square C(p) AIC SBIC SBC MSEP FPE HSP APC
## ---------------------------------------------------------------------------------------------------------------------------------
## 1 0.8058 0.8055 0.8047 1572.9415 -212.1870 NA -198.2556 0.0443 0.0443 1e-04 0.1953
## 2 0.8921 0.8918 0.8911 536.0051 -661.7821 NA -643.2070 0.0247 0.0247 0.0000 0.1087
## 3 0.9335 0.9333 0.9328 39.5542 -1031.9008 NA -1008.6819 0.0152 0.0152 0.0000 0.0672
## 4 0.9361 0.9358 0.9352 10.4467 -1060.3774 NA -1032.5147 0.0147 0.0147 0.0000 0.0647
## 5 0.9367 0.9363 0.9356 5.8227 -1065.0263 NA -1032.5198 0.0146 0.0146 0.0000 0.0643
## 6 0.9368 0.9363 0.9356 6.0467 -1064.8188 NA -1027.6685 0.0146 0.0146 0.0000 0.0643
## 7 0.9368 0.9362 0.9355 8.0000 -1062.8659 NA -1021.0718 0.0146 0.0146 0.0000 0.0645
## 8 0.9368 0.9362 0.9355 8.0000 -1060.8659 NA -1014.4280 0.0146 0.0146 0.0000 0.0645
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria
## SBIC: Sawa's Bayesian Information Criteria
## SBC: Schwarz Bayesian Criteria
## MSEP: Estimated error of prediction, assuming multivariate normality
## FPE: Final Prediction Error
## HSP: Hocking's Sp
## APC: Amemiya Prediction Criteria
myModelCL <- lm(log(CL) ~ RC+SA+WA+RA+OH+OR+GA+GD, data = mydf)
CL3 <- ols_best_subset(myModelCL)
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
## Warning in b * sx: longer object length is not a multiple of shorter object
## length
CL3
## Best Subsets Regression
## --------------------------------------
## Model Index Predictors
## --------------------------------------
## 1 OH
## 2 OH GA
## 3 WA OH GA
## 4 RC RA OH GA
## 5 RC RA OH OR GA
## 6 RC RA OH OR GA GD
## 7 RC SA WA OH OR GA GD
## 8 RC SA WA RA OH OR GA GD
## --------------------------------------
##
## Subsets Regression Summary
## ---------------------------------------------------------------------------------------------------------------------------------
## Adj. Pred
## Model R-Square R-Square R-Square C(p) AIC SBIC SBC MSEP FPE HSP APC
## ---------------------------------------------------------------------------------------------------------------------------------
## 1 0.8412 0.8410 0.8403 730.7225 -664.1655 NA -650.2342 0.0246 0.0246 0.0000 0.1597
## 2 0.8886 0.8883 0.8876 286.8731 -934.2097 NA -915.6346 0.0173 0.0173 0.0000 0.1123
## 3 0.9168 0.9164 0.916 23.2711 -1156.4601 NA -1133.2412 0.0130 0.0130 0.0000 0.0841
## 4 0.9189 0.9185 0.9179 5.0716 -1174.5257 NA -1146.6630 0.0127 0.0127 0.0000 0.0821
## 5 0.9191 0.9186 0.9179 5.5003 -1174.1088 NA -1141.6023 0.0127 0.0127 0.0000 0.0822
## 6 0.9192 0.9186 0.9178 6.2271 -1173.3939 NA -1136.2436 0.0127 0.0127 0.0000 0.0823
## 7 0.9192 0.9185 0.9176 8.0000 -1171.6234 NA -1129.8293 0.0127 0.0127 0.0000 0.0825
## 8 0.9192 0.9185 0.9176 8.0000 -1169.6234 NA -1123.1855 0.0127 0.0127 0.0000 0.0825
## ---------------------------------------------------------------------------------------------------------------------------------
## AIC: Akaike Information Criteria
## SBIC: Sawa's Bayesian Information Criteria
## SBC: Schwarz Bayesian Criteria
## MSEP: Estimated error of prediction, assuming multivariate normality
## FPE: Final Prediction Error
## HSP: Hocking's Sp
## APC: Amemiya Prediction Criteria
plot(HL3)
## Warning: Removed 8 rows containing missing values (geom_point).
plot(CL3)
## Warning: Removed 8 rows containing missing values (geom_point).
HL1: log(HL)~ RC+WA+OH+OR+GA+GD HL2: log(HL)~ SA+WA+OH+GA+GD
CL1:log(CL)~ RC+WA+OH+OR+GA+GD CL2:log(CL)~RC+SA+WA+OH+GA
As of the third models, ols_best_subset provided 8 models, I selected the one with lowest AIC HL3: log(HL)~ SA+WA+OH+GA+GD with an AIC of -1065.0263 CL3: log(CL)~ RC+RA+OH+GA with an AIC of -1174.5257
Comparing three models I’ve got for HL, the best model for HL is the one with lowest AIC, which is Best HL: log(HL)~ SA+WA+OH+GA+GD While the model from CL3 has the lowest AIC, I noticed that when I did the basic regression, RA is a variable that has singularity. So I would prefer the CH2 as my best model for CL Best CL:log(CL)~RC+SA+WA+OH+GA
AIC(HL1)
## [1] -1060.693
AIC(HL2)
## [1] -1065.026
AIC(CL1)
## [1] -1170.688
AIC(CL2)
## [1] -1172.754
Part B (35 marks)
df <- read.csv("kicksfootball.csv")
head(df)
## yards goal practiceormatch
## 1 18 Y P
## 2 19 Y P
## 3 19 Y P
## 4 19 Y P
## 5 19 Y P
## 6 19 Y P
# geom_bar is designed to make it easy to
# create bar charts that show
# counts (or sums of weights)
g <- ggplot(df, aes(goal))
# Number of Ys and Ns in variable goal
g + geom_bar()
library(dplyr)
df$goal <- toupper(df$goal)
df$practiceormatch <- toupper(df$practiceormatch)
g <- ggplot(df, aes(goal))
# Number of Ys and Ns in variable goal
g + geom_bar()
df$goal <- ifelse(df$goal=="Y", 1,0)
subset1 <- df %>% filter(practiceormatch=="P")
subset2 <- df %>% filter(practiceormatch=="M")
subset1
## yards goal practiceormatch
## 1 18 1 P
## 2 19 1 P
## 3 19 1 P
## 4 19 1 P
## 5 19 1 P
## 6 19 1 P
## 7 20 0 P
## 8 20 1 P
## 9 20 1 P
## 10 20 1 P
## 11 20 1 P
## 12 20 1 P
## 13 20 1 P
## 14 20 1 P
## 15 20 1 P
## 16 20 1 P
## 17 20 1 P
## 18 20 1 P
## 19 20 1 P
## 20 20 1 P
## 21 20 1 P
## 22 20 1 P
## 23 20 1 P
## 24 20 1 P
## 25 20 1 P
## 26 21 0 P
## 27 21 1 P
## 28 21 1 P
## 29 21 1 P
## 30 21 1 P
## 31 21 1 P
## 32 21 1 P
## 33 21 1 P
## 34 21 1 P
## 35 21 1 P
## 36 21 1 P
## 37 21 1 P
## 38 21 1 P
## 39 21 1 P
## 40 21 1 P
## 41 21 1 P
## 42 21 1 P
## 43 21 1 P
## 44 22 1 P
## 45 22 1 P
## 46 22 1 P
## 47 22 1 P
## 48 22 1 P
## 49 22 1 P
## 50 22 1 P
## 51 22 1 P
## 52 22 1 P
## 53 22 1 P
## 54 22 1 P
## 55 22 1 P
## 56 22 1 P
## 57 22 1 P
## 58 22 1 P
## 59 22 1 P
## 60 22 1 P
## 61 22 1 P
## 62 22 1 P
## 63 22 1 P
## 64 22 1 P
## 65 22 1 P
## 66 22 1 P
## 67 22 1 P
## 68 22 1 P
## 69 22 1 P
## 70 22 1 P
## 71 22 1 P
## 72 22 1 P
## 73 22 1 P
## 74 22 1 P
## 75 22 1 P
## 76 22 1 P
## 77 22 1 P
## 78 22 1 P
## 79 23 1 P
## 80 23 1 P
## 81 23 1 P
## 82 23 1 P
## 83 23 1 P
## 84 23 1 P
## 85 23 1 P
## 86 23 1 P
## 87 23 1 P
## 88 23 1 P
## 89 23 1 P
## 90 23 1 P
## 91 23 1 P
## 92 23 1 P
## 93 23 1 P
## 94 23 1 P
## 95 23 1 P
## 96 23 1 P
## 97 23 1 P
## 98 23 1 P
## 99 23 1 P
## 100 23 1 P
## 101 23 1 P
## 102 23 1 P
## 103 23 1 P
## 104 23 1 P
## 105 23 1 P
## 106 23 1 P
## 107 23 1 P
## 108 24 0 P
## 109 24 1 P
## 110 24 1 P
## 111 24 1 P
## 112 24 1 P
## 113 24 1 P
## 114 24 1 P
## 115 24 1 P
## 116 24 1 P
## 117 24 1 P
## 118 24 1 P
## 119 24 1 P
## 120 24 1 P
## 121 24 1 P
## 122 24 1 P
## 123 24 1 P
## 124 24 1 P
## 125 24 1 P
## 126 24 1 P
## 127 24 1 P
## 128 24 1 P
## 129 24 1 P
## 130 24 1 P
## 131 24 1 P
## 132 24 1 P
## 133 25 1 P
## 134 25 1 P
## 135 25 1 P
## 136 25 1 P
## 137 25 1 P
## 138 25 1 P
## 139 25 1 P
## 140 25 1 P
## 141 25 1 P
## 142 25 1 P
## 143 25 1 P
## 144 25 1 P
## 145 25 1 P
## 146 25 1 P
## 147 25 1 P
## 148 26 0 P
## 149 26 1 P
## 150 26 1 P
## 151 26 1 P
## 152 26 1 P
## 153 26 1 P
## 154 26 1 P
## 155 26 1 P
## 156 26 1 P
## 157 26 1 P
## 158 26 1 P
## 159 26 1 P
## 160 26 1 P
## 161 26 1 P
## 162 26 1 P
## 163 26 1 P
## 164 26 1 P
## 165 26 1 P
## 166 26 1 P
## 167 26 1 P
## 168 26 1 P
## 169 26 1 P
## 170 26 1 P
## 171 26 1 P
## 172 26 1 P
## 173 26 1 P
## 174 27 1 P
## 175 27 1 P
## 176 27 1 P
## 177 27 1 P
## 178 27 1 P
## 179 27 1 P
## 180 27 1 P
## 181 27 1 P
## 182 27 1 P
## 183 27 1 P
## 184 27 1 P
## 185 27 1 P
## 186 27 1 P
## 187 27 1 P
## 188 27 1 P
## 189 27 1 P
## 190 27 1 P
## 191 27 1 P
## 192 27 1 P
## 193 27 1 P
## 194 27 1 P
## 195 27 1 P
## 196 27 1 P
## 197 27 1 P
## 198 27 1 P
## 199 27 1 P
## 200 27 1 P
## 201 27 1 P
## 202 27 1 P
## 203 27 1 P
## 204 27 1 P
## 205 28 1 P
## 206 28 1 P
## 207 28 1 P
## 208 28 1 P
## 209 28 1 P
## 210 28 1 P
## 211 28 1 P
## 212 28 1 P
## 213 28 1 P
## 214 28 1 P
## 215 28 1 P
## 216 28 1 P
## 217 28 1 P
## 218 28 1 P
## 219 28 1 P
## 220 28 1 P
## 221 28 1 P
## 222 28 1 P
## 223 28 1 P
## 224 28 1 P
## 225 28 1 P
## 226 28 1 P
## 227 28 1 P
## 228 28 1 P
## 229 28 1 P
## 230 29 0 P
## 231 29 0 P
## 232 29 0 P
## 233 29 0 P
## 234 29 1 P
## 235 29 1 P
## 236 29 1 P
## 237 29 1 P
## 238 29 1 P
## 239 29 1 P
## 240 29 1 P
## 241 29 1 P
## 242 29 1 P
## 243 29 1 P
## 244 29 1 P
## 245 29 1 P
## 246 29 1 P
## 247 29 1 P
## 248 29 1 P
## 249 29 1 P
## 250 29 1 P
## 251 29 1 P
## 252 29 1 P
## 253 29 1 P
## 254 29 1 P
## 255 29 1 P
## 256 29 1 P
## 257 29 1 P
## 258 29 1 P
## 259 29 1 P
## 260 29 1 P
## 261 29 1 P
## 262 29 1 P
## 263 29 1 P
## 264 30 0 P
## 265 30 0 P
## 266 30 0 P
## 267 30 1 P
## 268 30 1 P
## 269 30 1 P
## 270 30 1 P
## 271 30 1 P
## 272 30 1 P
## 273 30 1 P
## 274 30 1 P
## 275 30 1 P
## 276 30 1 P
## 277 30 1 P
## 278 30 1 P
## 279 30 1 P
## 280 30 1 P
## 281 30 1 P
## 282 30 1 P
## 283 30 1 P
## 284 30 1 P
## 285 31 0 P
## 286 31 0 P
## 287 31 0 P
## 288 31 1 P
## 289 31 1 P
## 290 31 1 P
## 291 31 1 P
## 292 31 1 P
## 293 31 1 P
## 294 31 1 P
## 295 31 1 P
## 296 31 1 P
## 297 31 1 P
## 298 31 1 P
## 299 31 1 P
## 300 31 1 P
## 301 31 1 P
## 302 31 1 P
## 303 31 1 P
## 304 31 1 P
## 305 31 1 P
## 306 31 1 P
## 307 31 1 P
## 308 31 1 P
## 309 31 1 P
## 310 32 0 P
## 311 32 0 P
## 312 32 1 P
## 313 32 1 P
## 314 32 1 P
## 315 32 1 P
## 316 32 1 P
## 317 32 1 P
## 318 32 1 P
## 319 32 1 P
## 320 32 1 P
## 321 32 1 P
## 322 32 1 P
## 323 32 1 P
## 324 32 1 P
## 325 32 1 P
## 326 32 1 P
## 327 32 1 P
## 328 33 0 P
## 329 33 0 P
## 330 33 0 P
## 331 33 1 P
## 332 33 1 P
## 333 33 1 P
## 334 33 1 P
## 335 33 1 P
## 336 33 1 P
## 337 33 1 P
## 338 33 1 P
## 339 33 1 P
## 340 33 1 P
## 341 33 1 P
## 342 33 1 P
## 343 33 1 P
## 344 33 1 P
## 345 33 1 P
## 346 33 1 P
## 347 33 1 P
## 348 33 1 P
## 349 33 1 P
## 350 33 1 P
## 351 33 1 P
## 352 33 1 P
## 353 33 1 P
## 354 33 1 P
## 355 33 1 P
## 356 33 1 P
## 357 33 1 P
## 358 33 1 P
## 359 34 0 P
## 360 34 0 P
## 361 34 0 P
## 362 34 0 P
## 363 34 0 P
## 364 34 1 P
## 365 34 1 P
## 366 34 1 P
## 367 34 1 P
## 368 34 1 P
## 369 34 1 P
## 370 34 1 P
## 371 34 1 P
## 372 34 1 P
## 373 34 1 P
## 374 34 1 P
## 375 34 1 P
## 376 34 1 P
## 377 34 1 P
## 378 34 1 P
## 379 35 0 P
## 380 35 0 P
## 381 35 0 P
## 382 35 0 P
## 383 35 0 P
## 384 35 0 P
## 385 35 0 P
## 386 35 1 P
## 387 35 1 P
## 388 35 1 P
## 389 35 1 P
## 390 35 1 P
## 391 35 1 P
## 392 35 1 P
## 393 35 1 P
## 394 35 1 P
## 395 35 1 P
## 396 35 1 P
## 397 35 1 P
## 398 35 1 P
## 399 35 1 P
## 400 35 1 P
## 401 35 1 P
## 402 35 1 P
## 403 35 1 P
## 404 35 1 P
## 405 35 1 P
## 406 36 0 P
## 407 36 0 P
## 408 36 0 P
## 409 36 0 P
## 410 36 1 P
## 411 36 1 P
## 412 36 1 P
## 413 36 1 P
## 414 36 1 P
## 415 36 1 P
## 416 36 1 P
## 417 36 1 P
## 418 36 1 P
## 419 36 1 P
## 420 36 1 P
## 421 36 1 P
## 422 36 1 P
## 423 36 1 P
## 424 36 1 P
## 425 36 1 P
## 426 36 1 P
## 427 36 1 P
## 428 36 1 P
## 429 36 1 P
## 430 36 1 P
## 431 37 0 P
## 432 37 0 P
## 433 37 0 P
## 434 37 0 P
## 435 37 0 P
## 436 37 0 P
## 437 37 1 P
## 438 37 1 P
## 439 37 1 P
## 440 37 1 P
## 441 37 1 P
## 442 37 1 P
## 443 37 1 P
## 444 37 1 P
## 445 37 1 P
## 446 37 1 P
## 447 37 1 P
## 448 37 1 P
## 449 37 1 P
## 450 37 1 P
## 451 37 1 P
## 452 37 1 P
## 453 37 1 P
## 454 37 1 P
## 455 37 1 P
## 456 37 1 P
## 457 37 1 P
## 458 37 1 P
## 459 37 1 P
## 460 38 0 P
## 461 38 0 P
## 462 38 0 P
## 463 38 0 P
## 464 38 0 P
## 465 38 0 P
## 466 38 1 P
## 467 38 1 P
## 468 38 1 P
## 469 38 1 P
## 470 38 1 P
## 471 38 1 P
## 472 38 1 P
## 473 38 1 P
## 474 38 1 P
## 475 38 1 P
## 476 38 1 P
## 477 38 1 P
## 478 38 1 P
## 479 38 1 P
## 480 38 1 P
## 481 38 1 P
## 482 38 1 P
## 483 38 1 P
## 484 38 1 P
## 485 38 1 P
## 486 38 1 P
## 487 38 1 P
## 488 38 1 P
## 489 38 1 P
## 490 39 0 P
## 491 39 0 P
## 492 39 0 P
## 493 39 0 P
## 494 39 0 P
## 495 39 0 P
## 496 39 0 P
## 497 39 1 P
## 498 39 1 P
## 499 39 1 P
## 500 39 1 P
## 501 39 1 P
## 502 39 1 P
## 503 39 1 P
## 504 39 1 P
## 505 39 1 P
## 506 39 1 P
## 507 39 1 P
## 508 39 1 P
## 509 39 1 P
## 510 39 1 P
## 511 39 1 P
## 512 39 1 P
## 513 39 1 P
## 514 39 1 P
## 515 39 1 P
## 516 39 1 P
## 517 39 1 P
## 518 39 1 P
## 519 39 1 P
## 520 40 0 P
## 521 40 0 P
## 522 40 0 P
## 523 40 0 P
## 524 40 0 P
## 525 40 1 P
## 526 40 1 P
## 527 40 1 P
## 528 40 1 P
## 529 40 1 P
## 530 40 1 P
## 531 40 1 P
## 532 40 1 P
## 533 40 1 P
## 534 40 1 P
## 535 40 1 P
## 536 40 1 P
## 537 40 1 P
## 538 40 1 P
## 539 40 1 P
## 540 40 1 P
## 541 40 1 P
## 542 40 1 P
## 543 40 1 P
## 544 40 1 P
## 545 40 1 P
## 546 41 0 P
## 547 41 0 P
## 548 41 0 P
## 549 41 0 P
## 550 41 0 P
## 551 41 0 P
## 552 41 0 P
## 553 41 0 P
## 554 41 0 P
## 555 41 0 P
## 556 41 1 P
## 557 41 1 P
## 558 41 1 P
## 559 41 1 P
## 560 41 1 P
## 561 41 1 P
## 562 41 1 P
## 563 41 1 P
## 564 41 1 P
## 565 41 1 P
## 566 41 1 P
## 567 41 1 P
## 568 41 1 P
## 569 41 1 P
## 570 41 1 P
## 571 41 1 P
## 572 41 1 P
## 573 42 0 P
## 574 42 0 P
## 575 42 1 P
## 576 42 1 P
## 577 42 1 P
## 578 42 1 P
## 579 42 1 P
## 580 42 1 P
## 581 42 1 P
## 582 42 1 P
## 583 42 1 P
## 584 42 1 P
## 585 42 1 P
## 586 42 1 P
## 587 42 1 P
## 588 42 1 P
## 589 42 1 P
## 590 42 1 P
## 591 42 1 P
## 592 42 1 P
## 593 42 1 P
## 594 42 1 P
## 595 42 1 P
## 596 42 1 P
## 597 42 1 P
## 598 42 1 P
## 599 42 1 P
## 600 43 0 P
## 601 43 0 P
## 602 43 0 P
## 603 43 0 P
## 604 43 0 P
## 605 43 0 P
## 606 43 1 P
## 607 43 1 P
## 608 43 1 P
## 609 43 1 P
## 610 43 1 P
## 611 43 1 P
## 612 43 1 P
## 613 43 1 P
## 614 43 1 P
## 615 43 1 P
## 616 43 1 P
## 617 43 1 P
## 618 43 1 P
## 619 43 1 P
## 620 43 1 P
## 621 43 1 P
## 622 43 1 P
## 623 43 1 P
## 624 43 1 P
## 625 43 1 P
## 626 44 0 P
## 627 44 0 P
## 628 44 0 P
## 629 44 0 P
## 630 44 0 P
## 631 44 0 P
## 632 44 0 P
## 633 44 0 P
## 634 44 0 P
## 635 44 0 P
## 636 44 0 P
## 637 44 1 P
## 638 44 1 P
## 639 44 1 P
## 640 44 1 P
## 641 44 1 P
## 642 44 1 P
## 643 44 1 P
## 644 44 1 P
## 645 44 1 P
## 646 44 1 P
## 647 44 1 P
## 648 44 1 P
## 649 44 1 P
## 650 44 1 P
## 651 44 1 P
## 652 44 1 P
## 653 44 1 P
## 654 44 1 P
## 655 45 0 P
## 656 45 0 P
## 657 45 0 P
## 658 45 0 P
## 659 45 0 P
## 660 45 0 P
## 661 45 0 P
## 662 45 0 P
## 663 45 1 P
## 664 45 1 P
## 665 45 1 P
## 666 45 1 P
## 667 45 1 P
## 668 45 1 P
## 669 45 1 P
## 670 45 1 P
## 671 45 1 P
## 672 45 1 P
## 673 45 1 P
## 674 45 1 P
## 675 46 0 P
## 676 46 0 P
## 677 46 0 P
## 678 46 0 P
## 679 46 0 P
## 680 46 0 P
## 681 46 0 P
## 682 46 0 P
## 683 46 1 P
## 684 46 1 P
## 685 46 1 P
## 686 46 1 P
## 687 46 1 P
## 688 46 1 P
## 689 46 1 P
## 690 46 1 P
## 691 46 1 P
## 692 46 1 P
## 693 46 1 P
## 694 46 1 P
## 695 46 1 P
## 696 46 1 P
## 697 46 1 P
## 698 46 1 P
## 699 46 1 P
## 700 46 1 P
## 701 46 1 P
## 702 46 1 P
## 703 46 1 P
## 704 47 0 P
## 705 47 0 P
## 706 47 0 P
## 707 47 0 P
## 708 47 0 P
## 709 47 0 P
## 710 47 1 P
## 711 47 1 P
## 712 47 1 P
## 713 47 1 P
## 714 47 1 P
## 715 47 1 P
## 716 47 1 P
## 717 47 1 P
## 718 47 1 P
## 719 47 1 P
## 720 47 1 P
## 721 47 1 P
## 722 47 1 P
## 723 47 1 P
## 724 47 1 P
## 725 47 1 P
## 726 47 1 P
## 727 48 0 P
## 728 48 0 P
## 729 48 0 P
## 730 48 0 P
## 731 48 0 P
## 732 48 0 P
## 733 48 0 P
## 734 48 0 P
## 735 48 0 P
## 736 48 0 P
## 737 48 0 P
## 738 48 0 P
## 739 48 0 P
## 740 48 1 P
## 741 48 1 P
## 742 48 1 P
## 743 48 1 P
## 744 48 1 P
## 745 48 1 P
## 746 48 1 P
## 747 48 1 P
## 748 48 1 P
## 749 48 1 P
## 750 48 1 P
## 751 48 1 P
## 752 48 1 P
## 753 48 1 P
## 754 48 1 P
## 755 48 1 P
## 756 49 0 P
## 757 49 0 P
## 758 49 0 P
## 759 49 0 P
## 760 49 0 P
## 761 49 0 P
## 762 49 0 P
## 763 49 0 P
## 764 49 0 P
## 765 49 0 P
## 766 49 0 P
## 767 49 1 P
## 768 49 1 P
## 769 49 1 P
## 770 49 1 P
## 771 49 1 P
## 772 49 1 P
## 773 49 1 P
## 774 49 1 P
## 775 49 1 P
## 776 49 1 P
## 777 49 1 P
## 778 49 1 P
## 779 49 1 P
## 780 50 0 P
## 781 50 0 P
## 782 50 0 P
## 783 50 0 P
## 784 50 0 P
## 785 50 0 P
## 786 50 0 P
## 787 50 0 P
## 788 50 0 P
## 789 50 1 P
## 790 50 1 P
## 791 50 1 P
## 792 50 1 P
## 793 50 1 P
## 794 50 1 P
## 795 50 1 P
## 796 50 1 P
## 797 50 1 P
## 798 50 1 P
## 799 50 1 P
## 800 50 1 P
## 801 51 0 P
## 802 51 0 P
## 803 51 0 P
## 804 51 0 P
## 805 51 0 P
## 806 51 0 P
## 807 51 1 P
## 808 51 1 P
## 809 51 1 P
## 810 51 1 P
## 811 51 1 P
## 812 51 1 P
## 813 51 1 P
## 814 51 1 P
## 815 51 1 P
## 816 51 1 P
## 817 51 1 P
## 818 52 0 P
## 819 52 0 P
## 820 52 0 P
## 821 52 0 P
## 822 52 0 P
## 823 52 0 P
## 824 52 0 P
## 825 52 0 P
## 826 52 0 P
## 827 52 1 P
## 828 52 1 P
## 829 52 1 P
## 830 52 1 P
## 831 52 1 P
## 832 53 0 P
## 833 53 0 P
## 834 53 0 P
## 835 53 0 P
## 836 53 0 P
## 837 53 0 P
## 838 53 0 P
## 839 53 1 P
## 840 53 1 P
## 841 53 1 P
## 842 53 1 P
## 843 53 1 P
## 844 53 1 P
## 845 53 1 P
## 846 54 0 P
## 847 54 0 P
## 848 54 0 P
## 849 54 0 P
## 850 54 1 P
## 851 54 1 P
## 852 55 0 P
## 853 55 0 P
## 854 55 0 P
## 855 55 0 P
## 856 55 1 P
## 857 56 1 P
## 858 57 0 P
## 859 57 1 P
## 860 60 0 P
## 861 60 0 P
## 862 62 0 P
subset2
## yards goal practiceormatch
## 1 22 1 M
## 2 23 1 M
## 3 23 1 M
## 4 23 1 M
## 5 23 1 M
## 6 24 0 M
## 7 24 1 M
## 8 24 1 M
## 9 24 1 M
## 10 24 1 M
## 11 25 1 M
## 12 25 1 M
## 13 26 1 M
## 14 27 1 M
## 15 28 0 M
## 16 28 1 M
## 17 28 1 M
## 18 28 1 M
## 19 28 1 M
## 20 28 1 M
## 21 28 1 M
## 22 29 1 M
## 23 29 1 M
## 24 29 1 M
## 25 30 1 M
## 26 31 1 M
## 27 33 1 M
## 28 33 1 M
## 29 33 1 M
## 30 34 0 M
## 31 34 1 M
## 32 34 1 M
## 33 34 1 M
## 34 34 1 M
## 35 37 0 M
## 36 37 1 M
## 37 37 1 M
## 38 37 1 M
## 39 37 1 M
## 40 37 1 M
## 41 38 0 M
## 42 38 1 M
## 43 38 1 M
## 44 39 1 M
## 45 39 1 M
## 46 39 1 M
## 47 40 1 M
## 48 40 1 M
## 49 40 1 M
## 50 41 1 M
## 51 41 1 M
## 52 41 1 M
## 53 41 1 M
## 54 41 1 M
## 55 42 1 M
## 56 43 0 M
## 57 43 1 M
## 58 44 1 M
## 59 44 1 M
## 60 45 1 M
## 61 46 0 M
## 62 46 1 M
## 63 46 1 M
## 64 46 1 M
## 65 46 1 M
## 66 47 0 M
## 67 47 1 M
## 68 47 1 M
## 69 47 1 M
## 70 48 0 M
## 71 48 0 M
## 72 48 1 M
## 73 48 1 M
## 74 48 1 M
## 75 48 1 M
## 76 48 1 M
## 77 49 0 M
## 78 49 0 M
## 79 49 1 M
## 80 49 1 M
## 81 49 1 M
## 82 50 0 M
## 83 50 0 M
## 84 50 1 M
## 85 50 1 M
## 86 51 1 M
## 87 54 0 M
## 88 54 1 M
## 89 55 0 M
## 90 58 1 M
plot(goal~yards,data=subset1,
xlab="Yards",
ylab="Goal")
subset1.log = glm(goal ~ yards,
data=subset1,
family=binomial)
subset2.log = glm(goal ~ yards,
data=subset2,
family=binomial)
summary(subset1.log)
##
## Call:
## glm(formula = goal ~ yards, family = binomial, data = subset1)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6690 0.2538 0.3950 0.7057 1.4702
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 5.80284 0.47356 12.25 <2e-16 ***
## yards -0.11349 0.01115 -10.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 875.30 on 861 degrees of freedom
## Residual deviance: 741.28 on 860 degrees of freedom
## AIC: 745.28
##
## Number of Fisher Scoring iterations: 5
summary(subset2.log)
##
## Call:
## glm(formula = goal ~ yards, family = binomial, data = subset2)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4254 0.3293 0.4865 0.6999 1.1253
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.83766 1.50308 3.218 0.00129 **
## yards -0.08127 0.03455 -2.353 0.01864 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 84.241 on 89 degrees of freedom
## Residual deviance: 77.735 on 88 degrees of freedom
## AIC: 81.735
##
## Number of Fisher Scoring iterations: 5
AIC is a number that is helpful for comparing models as it includes measures of both how well the model fits the data and how complex the model is. Lower is better.
compared the AIC, the second model is better.
a <- predict(subset1.log, data.frame(yards=40), type="resp")
b <- predict(subset2.log, data.frame(yards=40), type="resp")
a
## 1
## 0.7795695
b
## 1
## 0.8301675
Part C (30 marks) 1. For the dataset in Part A, using ggplot, make three plots that a) plot HL against the most important predictor from your model in Part A, Accroding to my best model Best HL: log(HL)~ SA+WA+OH+GA+GD, the most important predictor is GA b) faceted by GD and OR c) and for each plot separately fit Accroding to my best model Best HL: log(HL)~ SA+WA+OH+GA+GD, the most important predictor is GA ## 1. linear model,
mydf1 <- subset(mydf, select = -CL)
p <- ggplot(mydf1) + facet_grid(GD~ OR) + ylab("GD")+xlab("OR")
lm <- p + stat_smooth(aes(x=GA, y= HL),method = "lm",
formula = y ~ x,
size = 1)
print(lm)
poly <- p + stat_smooth(aes(x=GA, y= HL),method = "lm",
formula = y ~ poly(x, 2),
size = 1)
print(poly)
loess <- p + stat_smooth(aes(x=GA, y= HL),method = "loess",
formula = y ~ x,
size = 1)
print(loess)
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 0.0985
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 0.3015
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 0
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 0.090902
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used
## at 0.0985
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
## 0.3015
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : reciprocal
## condition number 0
## Warning in predLoess(object$y, object$x, newx = if
## (is.null(newdata)) object$x else if (is.data.frame(newdata))
## as.matrix(model.frame(delete.response(terms(object)), : There are other
## near singularities as well. 0.090902
y1 <- subset(subset1, select = "yards")
y2 <- subset(subset2, select = "yards")
prob1 <- matrix(predict(subset1.log,yards= y1, type="resp"))
prob2 <- matrix(predict(subset2.log,yards= y2, type="resp"))
subset1$prob <- prob1
subset2$prob <- prob2
ggplot()+ geom_smooth(colour="red",data= subset1,aes(x=yards, y=prob),method = "glm", method.args = list(family = "quasibinomial"), se = FALSE) + geom_smooth(data = subset2,aes(x=yards, y=prob),method = "glm", method.args = list(family = "quasibinomial"), se = FALSE)
p <- ggplot() + # blue plot geom_point(data=visual1, aes(x=ISSUE_DATE, y=COUNTED)) + geom_smooth(data=visual1, aes(x=ISSUE_DATE, y=COUNTED), fill=“blue”, colour=“darkblue”, size=1) + # red plot geom_point(data=visual2, aes(x=ISSUE_DATE, y=COUNTED)) + geom_smooth(data=visual2, aes(x=ISSUE_DATE, y=COUNTED), fill=“red”, colour=“red”, size=1)