library(tidyverse)
library(broom)
library(data.table)
library(performance)
library(patchwork)
library(car)
library(rsample)
library(tidytuesdayR)
library(ggplot2)
library(see)Lab 7: Multiple Regression and Bootstrapping
Lab 7: Multiple Regression and Bootstrapping
load packages
Essentials
1.) Load data ‘soccer’ from tidytuesday
soccer <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2023/2023-04-04/soccer21-22.csv')Rows: 380 Columns: 22
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (6): Date, HomeTeam, AwayTeam, FTR, HTR, Referee
dbl (16): FTHG, FTAG, HTHG, HTAG, HS, AS, HST, AST, HF, AF, HC, AC, HY, AY, ...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(soccer)# A tibble: 6 × 22
Date HomeT…¹ AwayT…² FTHG FTAG FTR HTHG HTAG HTR Referee HS AS
<chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
1 13/08… Brentf… Arsenal 2 0 H 1 0 H M Oliv… 8 22
2 14/08… Man Un… Leeds 5 1 H 1 0 H P Tier… 16 10
3 14/08… Burnley Bright… 1 2 A 1 0 H D Coote 14 14
4 14/08… Chelsea Crysta… 3 0 H 2 0 H J Moss 13 4
5 14/08… Everton Southa… 3 1 H 0 1 A A Madl… 14 6
6 14/08… Leices… Wolves 1 0 H 1 0 H C Paws… 9 17
# … with 10 more variables: HST <dbl>, AST <dbl>, HF <dbl>, AF <dbl>, HC <dbl>,
# AC <dbl>, HY <dbl>, AY <dbl>, HR <dbl>, AR <dbl>, and abbreviated variable
# names ¹HomeTeam, ²AwayTeam
After you load the data, record which variables are categorical and which are numeric.
categorical Date, HomeTeam, AwayTeam, FTR, HTR, Referee
numeric all the others FTHG, FTAG, HTHG, HTAG, HS, AS, HST, AST, HF, AF, HC, AC, HY, AY, HR, AR
2.) Let’s consider the effects of home team shots (HS), home team (HomeTeam), and home team fouls (HF) on home team goals (fullt time home goals). Build a fully interactive multiple linear regression model. Assess model fit and then model assumptions. How well does the model fit the data? Is the model valid?
lm_2<- lm(FTHG~HS*HF*HomeTeam, data = soccer)
summary(lm_2)
Call:
lm(formula = FTHG ~ HS * HF * HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-2.7138 -0.6469 -0.0615 0.5152 3.9047
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.697352 3.708178 0.188 0.8510
HS 0.055902 0.190075 0.294 0.7689
HF -0.009934 0.365871 -0.027 0.9784
HomeTeamAston Villa 0.881435 4.996848 0.176 0.8601
HomeTeamBrentford -2.533050 5.203392 -0.487 0.6268
HomeTeamBrighton 3.717225 6.575577 0.565 0.5723
HomeTeamBurnley -0.276222 6.857200 -0.040 0.9679
HomeTeamChelsea 6.034787 4.925771 1.225 0.2215
HomeTeamCrystal Palace -2.318297 4.283831 -0.541 0.5888
HomeTeamEverton 0.319386 4.645420 0.069 0.9452
HomeTeamLeeds -3.624261 5.001969 -0.725 0.4693
HomeTeamLeicester -0.792986 4.114557 -0.193 0.8473
HomeTeamLiverpool 4.175053 5.026751 0.831 0.4069
HomeTeamMan City -10.661735 5.330983 -2.000 0.0464 *
HomeTeamMan United 7.326569 4.781359 1.532 0.1265
HomeTeamNewcastle 0.168113 4.800783 0.035 0.9721
HomeTeamNorwich 2.094630 5.334412 0.393 0.6948
HomeTeamSouthampton -0.310685 4.470009 -0.070 0.9446
HomeTeamTottenham -2.750661 4.805536 -0.572 0.5675
HomeTeamWatford -2.913636 4.142058 -0.703 0.4823
HomeTeamWest Ham -2.027195 4.796066 -0.423 0.6728
HomeTeamWolves -1.557139 4.102273 -0.380 0.7045
HS:HF 0.001280 0.019025 0.067 0.9464
HS:HomeTeamAston Villa 0.061853 0.328940 0.188 0.8510
HS:HomeTeamBrentford 0.182702 0.332776 0.549 0.5834
HS:HomeTeamBrighton -0.262377 0.425503 -0.617 0.5379
HS:HomeTeamBurnley 0.031909 0.467206 0.068 0.9456
HS:HomeTeamChelsea -0.391915 0.267354 -1.466 0.1437
HS:HomeTeamCrystal Palace 0.105923 0.251576 0.421 0.6740
HS:HomeTeamEverton 0.011687 0.286931 0.041 0.9675
HS:HomeTeamLeeds 0.243965 0.297532 0.820 0.4129
HS:HomeTeamLeicester 0.146672 0.235410 0.623 0.5337
HS:HomeTeamLiverpool -0.109337 0.242931 -0.450 0.6530
HS:HomeTeamMan City 0.558982 0.261335 2.139 0.0332 *
HS:HomeTeamMan United -0.414803 0.258899 -1.602 0.1102
HS:HomeTeamNewcastle -0.025273 0.307031 -0.082 0.9345
HS:HomeTeamNorwich -0.290202 0.363840 -0.798 0.4257
HS:HomeTeamSouthampton 0.053585 0.266735 0.201 0.8409
HS:HomeTeamTottenham 0.208991 0.306756 0.681 0.4962
HS:HomeTeamWatford 0.257706 0.248203 1.038 0.3000
HS:HomeTeamWest Ham 0.147201 0.312313 0.471 0.6378
HS:HomeTeamWolves 0.010637 0.248269 0.043 0.9659
HF:HomeTeamAston Villa -0.084654 0.465462 -0.182 0.8558
HF:HomeTeamBrentford 0.221657 0.505506 0.438 0.6613
HF:HomeTeamBrighton -0.268913 0.596677 -0.451 0.6525
HF:HomeTeamBurnley -0.085970 0.651113 -0.132 0.8950
HF:HomeTeamChelsea -0.450434 0.447628 -1.006 0.3151
HF:HomeTeamCrystal Palace 0.196644 0.417779 0.471 0.6382
HF:HomeTeamEverton -0.128741 0.491577 -0.262 0.7936
HF:HomeTeamLeeds 0.213802 0.446558 0.479 0.6324
HF:HomeTeamLeicester 0.048414 0.403071 0.120 0.9045
HF:HomeTeamLiverpool -0.289467 0.533930 -0.542 0.5881
HF:HomeTeamMan City 1.036326 0.560155 1.850 0.0653 .
HF:HomeTeamMan United -0.787337 0.499734 -1.576 0.1162
HF:HomeTeamNewcastle -0.073170 0.446549 -0.164 0.8700
HF:HomeTeamNorwich -0.254151 0.505784 -0.502 0.6157
HF:HomeTeamSouthampton 0.024514 0.429623 0.057 0.9545
HF:HomeTeamTottenham 0.187767 0.454811 0.413 0.6800
HF:HomeTeamWatford 0.117282 0.411028 0.285 0.7756
HF:HomeTeamWest Ham 0.451202 0.482874 0.934 0.3508
HF:HomeTeamWolves 0.302944 0.420129 0.721 0.4714
HS:HF:HomeTeamAston Villa -0.005724 0.028970 -0.198 0.8435
HS:HF:HomeTeamBrentford -0.018001 0.032204 -0.559 0.5766
HS:HF:HomeTeamBrighton 0.015377 0.038286 0.402 0.6882
HS:HF:HomeTeamBurnley 0.002547 0.044694 0.057 0.9546
HS:HF:HomeTeamChelsea 0.031442 0.024617 1.277 0.2025
HS:HF:HomeTeamCrystal Palace -0.008544 0.024429 -0.350 0.7268
HS:HF:HomeTeamEverton 0.006583 0.032144 0.205 0.8379
HS:HF:HomeTeamLeeds -0.017376 0.025839 -0.672 0.5018
HS:HF:HomeTeamLeicester -0.010627 0.024129 -0.440 0.6599
HS:HF:HomeTeamLiverpool 0.006820 0.025865 0.264 0.7922
HS:HF:HomeTeamMan City -0.048830 0.028501 -1.713 0.0877 .
HS:HF:HomeTeamMan United 0.046000 0.028069 1.639 0.1023
HS:HF:HomeTeamNewcastle 0.006266 0.028067 0.223 0.8235
HS:HF:HomeTeamNorwich 0.027014 0.033845 0.798 0.4254
HS:HF:HomeTeamSouthampton -0.006353 0.025158 -0.253 0.8008
HS:HF:HomeTeamTottenham -0.011332 0.028415 -0.399 0.6903
HS:HF:HomeTeamWatford -0.013823 0.024557 -0.563 0.5739
HS:HF:HomeTeamWest Ham -0.030930 0.030862 -1.002 0.3171
HS:HF:HomeTeamWolves -0.015949 0.027111 -0.588 0.5568
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.16 on 300 degrees of freedom
Multiple R-squared: 0.3949, Adjusted R-squared: 0.2355
F-statistic: 2.478 on 79 and 300 DF, p-value: 1.774e-08
model_performance(lm_2)# Indices of model performance
AIC | AICc | BIC | R2 | R2 (adj.) | RMSE | Sigma
------------------------------------------------------------------
1263.266 | 1307.843 | 1582.420 | 0.395 | 0.236 | 1.031 | 1.160
check_model(lm_2)3.) Run through a top-down modeling approach to find the best fit model! Be sure to check assumptions after each change and compare performance. What model is the best fit?
- One Additive predictor and two interactive predictors
lm_3<- lm(FTHG~HS*HF+HomeTeam, data = soccer)
summary(lm_3)
Call:
lm(formula = FTHG ~ HS * HF + HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-3.0156 -0.7730 -0.1622 0.7126 4.7874
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.054622 0.600204 0.091 0.92754
HS 0.107632 0.034405 3.128 0.00190 **
HF 0.047767 0.048015 0.995 0.32049
HomeTeamAston Villa 0.131380 0.388419 0.338 0.73538
HomeTeamBrentford -0.240752 0.388160 -0.620 0.53550
HomeTeamBrighton -0.568370 0.384571 -1.478 0.14031
HomeTeamBurnley -0.443570 0.388853 -1.141 0.25475
HomeTeamChelsea 0.204743 0.382748 0.535 0.59303
HomeTeamCrystal Palace 0.033537 0.389335 0.086 0.93140
HomeTeamEverton 0.044378 0.389247 0.114 0.90929
HomeTeamLeeds -0.506466 0.388565 -1.303 0.19327
HomeTeamLeicester 0.358504 0.387650 0.925 0.35569
HomeTeamLiverpool 0.512532 0.382760 1.339 0.18141
HomeTeamMan City 1.117645 0.382884 2.919 0.00373 **
HomeTeamMan United 0.095348 0.383545 0.249 0.80382
HomeTeamNewcastle -0.059758 0.388093 -0.154 0.87771
HomeTeamNorwich -0.657118 0.392205 -1.675 0.09472 .
HomeTeamSouthampton -0.192727 0.388611 -0.496 0.62024
HomeTeamTottenham 0.508326 0.386270 1.316 0.18902
HomeTeamWatford -0.431863 0.391792 -1.102 0.27108
HomeTeamWest Ham 0.356874 0.389661 0.916 0.36036
HomeTeamWolves -0.307251 0.389534 -0.789 0.43077
HS:HF -0.003699 0.003256 -1.136 0.25670
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.174 on 357 degrees of freedom
Multiple R-squared: 0.2624, Adjusted R-squared: 0.217
F-statistic: 5.773 on 22 and 357 DF, p-value: 5.697e-14
check_model(lm_3)lm_4<- lm(FTHG~HS+HF*HomeTeam, data = soccer)
summary(lm_4)
Call:
lm(formula = FTHG ~ HS + HF * HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-3.0620 -0.7081 -0.1844 0.6738 4.4639
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.41144 0.97096 0.424 0.672
HS 0.07084 0.01255 5.644 3.51e-08 ***
HF 0.01453 0.09506 0.153 0.879
HomeTeamAston Villa 1.75669 1.42505 1.233 0.219
HomeTeamBrentford -0.05703 1.27030 -0.045 0.964
HomeTeamBrighton 0.01759 1.50664 0.012 0.991
HomeTeamBurnley 0.29620 1.47263 0.201 0.841
HomeTeamChelsea -0.38245 1.32782 -0.288 0.774
HomeTeamCrystal Palace -0.97143 1.32039 -0.736 0.462
HomeTeamEverton 0.58837 1.20401 0.489 0.625
HomeTeamLeeds -0.15823 1.46057 -0.108 0.914
HomeTeamLeicester 1.19835 1.26066 0.951 0.342
HomeTeamLiverpool 1.74348 1.19441 1.460 0.145
HomeTeamMan City 1.49448 1.25543 1.190 0.235
HomeTeamMan United 0.49891 1.25593 0.397 0.691
HomeTeamNewcastle 0.05715 1.27913 0.045 0.964
HomeTeamNorwich -1.28301 1.32997 -0.965 0.335
HomeTeamSouthampton 0.49871 1.34835 0.370 0.712
HomeTeamTottenham -0.13231 1.40490 -0.094 0.925
HomeTeamWatford -0.14920 1.15855 -0.129 0.898
HomeTeamWest Ham 0.24801 1.23164 0.201 0.841
HomeTeamWolves -1.57711 1.18681 -1.329 0.185
HF:HomeTeamAston Villa -0.16633 0.14027 -1.186 0.237
HF:HomeTeamBrentford -0.02116 0.12567 -0.168 0.866
HF:HomeTeamBrighton -0.05774 0.14151 -0.408 0.684
HF:HomeTeamBurnley -0.07258 0.14097 -0.515 0.607
HF:HomeTeamChelsea 0.04817 0.12455 0.387 0.699
HF:HomeTeamCrystal Palace 0.08678 0.12420 0.699 0.485
HF:HomeTeamEverton -0.06289 0.12408 -0.507 0.613
HF:HomeTeamLeeds -0.03365 0.12913 -0.261 0.795
HF:HomeTeamLeicester -0.09414 0.13124 -0.717 0.474
HF:HomeTeamLiverpool -0.13408 0.12195 -1.100 0.272
HF:HomeTeamMan City -0.03834 0.13411 -0.286 0.775
HF:HomeTeamMan United -0.04448 0.13024 -0.341 0.733
HF:HomeTeamNewcastle -0.01373 0.12106 -0.113 0.910
HF:HomeTeamNorwich 0.06212 0.13249 0.469 0.639
HF:HomeTeamSouthampton -0.06798 0.12771 -0.532 0.595
HF:HomeTeamTottenham 0.05412 0.13233 0.409 0.683
HF:HomeTeamWatford -0.02980 0.10943 -0.272 0.786
HF:HomeTeamWest Ham 0.01038 0.12896 0.080 0.936
HF:HomeTeamWolves 0.13434 0.11916 1.127 0.260
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.181 on 339 degrees of freedom
Multiple R-squared: 0.2907, Adjusted R-squared: 0.207
F-statistic: 3.473 on 40 and 339 DF, p-value: 2.481e-10
check_model(lm_4)lm_5<- lm(FTHG~HF+HS*HomeTeam, data = soccer)
summary(lm_5)
Call:
lm(formula = FTHG ~ HF + HS * HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-3.1058 -0.7017 -0.0771 0.6253 4.8216
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.5875109 0.8115556 0.724 0.4696
HF 0.0018097 0.0185305 0.098 0.9223
HS 0.0677618 0.0406653 1.666 0.0966 .
HomeTeamAston Villa 0.4099977 1.1178407 0.367 0.7140
HomeTeamBrentford -0.2757562 1.1303731 -0.244 0.8074
HomeTeamBrighton 0.6371700 1.1085324 0.575 0.5658
HomeTeamBurnley -1.2589357 1.2039312 -1.046 0.2965
HomeTeamChelsea 0.6758702 1.1390581 0.593 0.5533
HomeTeamCrystal Palace -0.3339769 1.0450447 -0.320 0.7495
HomeTeamEverton -0.7725122 1.1957637 -0.646 0.5187
HomeTeamLeeds -0.9315106 1.1081056 -0.841 0.4011
HomeTeamLeicester -0.4377626 1.0262928 -0.427 0.6700
HomeTeamLiverpool 1.6366174 1.3217309 1.238 0.2165
HomeTeamMan City -1.5272285 1.3092599 -1.166 0.2442
HomeTeamMan United 0.5861582 1.0724096 0.547 0.5850
HomeTeamNewcastle -0.7039699 1.0539149 -0.668 0.5046
HomeTeamNorwich -0.6814923 1.1089671 -0.615 0.5393
HomeTeamSouthampton -0.0199162 1.0172364 -0.020 0.9844
HomeTeamTottenham -0.7742871 1.2066158 -0.642 0.5215
HomeTeamWatford -1.8307843 1.1284529 -1.622 0.1057
HomeTeamWest Ham 1.9358478 1.2533784 1.545 0.1234
HomeTeamWolves 1.0335377 1.0473375 0.987 0.3244
HS:HomeTeamAston Villa -0.0264704 0.0727988 -0.364 0.7164
HS:HomeTeamBrentford -0.0002157 0.0744791 -0.003 0.9977
HS:HomeTeamBrighton -0.0843524 0.0642159 -1.314 0.1899
HS:HomeTeamBurnley 0.0649601 0.0828363 0.784 0.4335
HS:HomeTeamChelsea -0.0290800 0.0608248 -0.478 0.6329
HS:HomeTeamCrystal Palace 0.0266226 0.0660586 0.403 0.6872
HS:HomeTeamEverton 0.0653342 0.0825903 0.791 0.4295
HS:HomeTeamLeeds 0.0273476 0.0664777 0.411 0.6811
HS:HomeTeamLeicester 0.0624101 0.0630716 0.990 0.3231
HS:HomeTeamLiverpool -0.0518453 0.0631318 -0.821 0.4121
HS:HomeTeamMan City 0.1415647 0.0670106 2.113 0.0354 *
HS:HomeTeamMan United -0.0340900 0.0614591 -0.555 0.5795
HS:HomeTeamNewcastle 0.0487005 0.0655296 0.743 0.4579
HS:HomeTeamNorwich -0.0015015 0.0797687 -0.019 0.9850
HS:HomeTeamSouthampton -0.0175801 0.0622761 -0.282 0.7779
HS:HomeTeamTottenham 0.0905933 0.0757403 1.196 0.2325
HS:HomeTeamWatford 0.1185025 0.0785813 1.508 0.1325
HS:HomeTeamWest Ham -0.1334385 0.0867371 -1.538 0.1249
HS:HomeTeamWolves -0.1176284 0.0677485 -1.736 0.0834 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.157 on 339 degrees of freedom
Multiple R-squared: 0.3198, Adjusted R-squared: 0.2395
F-statistic: 3.985 on 40 and 339 DF, p-value: 1.197e-12
check_model(lm_5)- Aditive model with three predictors
lm_6<- lm(FTHG~HS+HF+HomeTeam, data = soccer)
summary(lm_6)
Call:
lm(formula = FTHG ~ HS + HF + HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-2.9901 -0.7640 -0.1282 0.7084 4.6443
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.567460 0.395711 1.434 0.15244
HS 0.071115 0.012273 5.794 1.51e-08 ***
HF -0.002563 0.018519 -0.138 0.89000
HomeTeamAston Villa 0.104901 0.387877 0.270 0.78697
HomeTeamBrentford -0.256439 0.388072 -0.661 0.50916
HomeTeamBrighton -0.587418 0.384361 -1.528 0.12732
HomeTeamBurnley -0.450105 0.388968 -1.157 0.24797
HomeTeamChelsea 0.188451 0.382634 0.493 0.62266
HomeTeamCrystal Palace 0.017308 0.389231 0.044 0.96456
HomeTeamEverton 0.026478 0.389086 0.068 0.94578
HomeTeamLeeds -0.523754 0.388424 -1.348 0.17838
HomeTeamLeicester 0.357605 0.387806 0.922 0.35709
HomeTeamLiverpool 0.522552 0.382813 1.365 0.17310
HomeTeamMan City 1.155428 0.381591 3.028 0.00264 **
HomeTeamMan United 0.095275 0.383700 0.248 0.80404
HomeTeamNewcastle -0.065402 0.388219 -0.168 0.86631
HomeTeamNorwich -0.670737 0.392181 -1.710 0.08808 .
HomeTeamSouthampton -0.212473 0.388379 -0.547 0.58467
HomeTeamTottenham 0.487441 0.385989 1.263 0.20747
HomeTeamWatford -0.452190 0.391542 -1.155 0.24890
HomeTeamWest Ham 0.322879 0.388668 0.831 0.40668
HomeTeamWolves -0.325622 0.389356 -0.836 0.40354
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.174 on 358 degrees of freedom
Multiple R-squared: 0.2597, Adjusted R-squared: 0.2163
F-statistic: 5.982 on 21 and 358 DF, p-value: 3.927e-14
check_model(lm_6)- Interactive Models with two variables
lm_7<- lm(FTHG~HS*HF, data = soccer)
summary(lm_7)
Call:
lm(formula = FTHG ~ HS * HF, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-2.8901 -0.8349 -0.1590 0.7448 4.9689
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.321706 0.508989 -0.632 0.5277
HS 0.148330 0.033657 4.407 1.37e-05 ***
HF 0.057670 0.048804 1.182 0.2381
HS:HF -0.005795 0.003298 -1.757 0.0797 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.214 on 376 degrees of freedom
Multiple R-squared: 0.169, Adjusted R-squared: 0.1623
F-statistic: 25.48 on 3 and 376 DF, p-value: 4.983e-15
check_model(lm_7)lm_8<- lm(FTHG~HS*HomeTeam, data = soccer)
summary(lm_8)
Call:
lm(formula = FTHG ~ HS * HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-3.0969 -0.6949 -0.0787 0.6219 4.8281
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.6062793 0.7873245 0.770 0.4418
HS 0.0676677 0.0405946 1.667 0.0965 .
HomeTeamAston Villa 0.4058636 1.1154107 0.364 0.7162
HomeTeamBrentford -0.2765702 1.1286947 -0.245 0.8066
HomeTeamBrighton 0.6385711 1.1068239 0.577 0.5644
HomeTeamBurnley -1.2562178 1.2018551 -1.045 0.2967
HomeTeamChelsea 0.6815947 1.1358908 0.600 0.5489
HomeTeamCrystal Palace -0.3342486 1.0435177 -0.320 0.7489
HomeTeamEverton -0.7733524 1.1939899 -0.648 0.5176
HomeTeamLeeds -0.9274281 1.1057028 -0.839 0.4022
HomeTeamLeicester -0.4366315 1.0247316 -0.426 0.6703
HomeTeamLiverpool 1.6351886 1.3197234 1.239 0.2162
HomeTeamMan City -1.5220021 1.3062589 -1.165 0.2448
HomeTeamMan United 0.5860863 1.0708462 0.547 0.5845
HomeTeamNewcastle -0.7014382 1.0520603 -0.667 0.5054
HomeTeamNorwich -0.6813070 1.1073490 -0.615 0.5388
HomeTeamSouthampton -0.0206024 1.0157294 -0.020 0.9838
HomeTeamTottenham -0.7750313 1.2048330 -0.643 0.5205
HomeTeamWatford -1.8348507 1.1260407 -1.629 0.1041
HomeTeamWest Ham 1.9270235 1.2482950 1.544 0.1236
HomeTeamWolves 1.0319744 1.0456887 0.987 0.3244
HS:HomeTeamAston Villa -0.0260963 0.0725920 -0.359 0.7194
HS:HomeTeamBrentford -0.0001333 0.0743657 -0.002 0.9986
HS:HomeTeamBrighton -0.0842826 0.0641184 -1.314 0.1896
HS:HomeTeamBurnley 0.0648599 0.0827092 0.784 0.4335
HS:HomeTeamChelsea -0.0292309 0.0607165 -0.481 0.6305
HS:HomeTeamCrystal Palace 0.0268406 0.0659246 0.407 0.6842
HS:HomeTeamEverton 0.0652591 0.0824664 0.791 0.4293
HS:HomeTeamLeeds 0.0274150 0.0663773 0.413 0.6799
HS:HomeTeamLeicester 0.0621918 0.0629401 0.988 0.3238
HS:HomeTeamLiverpool -0.0517961 0.0630378 -0.822 0.4118
HS:HomeTeamMan City 0.1411931 0.0668049 2.114 0.0353 *
HS:HomeTeamMan United -0.0341729 0.0613637 -0.557 0.5780
HS:HomeTeamNewcastle 0.0486838 0.0654339 0.744 0.4574
HS:HomeTeamNorwich -0.0015321 0.0796519 -0.019 0.9847
HS:HomeTeamSouthampton -0.0173620 0.0621453 -0.279 0.7801
HS:HomeTeamTottenham 0.0908180 0.0755950 1.201 0.2304
HS:HomeTeamWatford 0.1191048 0.0782248 1.523 0.1288
HS:HomeTeamWest Ham -0.1328951 0.0864323 -1.538 0.1251
HS:HomeTeamWolves -0.1175638 0.0676465 -1.738 0.0831 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.155 on 340 degrees of freedom
Multiple R-squared: 0.3198, Adjusted R-squared: 0.2418
F-statistic: 4.098 on 39 and 340 DF, p-value: 5.84e-13
check_model(lm_8)lm_9<- lm(FTHG~HF*HomeTeam, data = soccer)
summary(lm_9)
Call:
lm(formula = FTHG ~ HF * HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-3.3190 -0.8813 -0.1339 0.6537 4.9591
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.886998 0.976617 1.932 0.0542 .
HF -0.004765 0.099214 -0.048 0.9617
HomeTeamAston Villa 0.851439 1.478853 0.576 0.5652
HomeTeamBrentford -0.656366 1.322047 -0.496 0.6199
HomeTeamBrighton -0.369585 1.571892 -0.235 0.8143
HomeTeamBurnley -0.164821 1.535638 -0.107 0.9146
HomeTeamChelsea -0.315325 1.386713 -0.227 0.8203
HomeTeamCrystal Palace -1.706709 1.372278 -1.244 0.2145
HomeTeamEverton 0.027988 1.253175 0.022 0.9822
HomeTeamLeeds -0.614952 1.523067 -0.404 0.6866
HomeTeamLeicester 0.972261 1.315959 0.739 0.4605
HomeTeamLiverpool 1.812562 1.247365 1.453 0.1471
HomeTeamMan City 1.827336 1.309713 1.395 0.1639
HomeTeamMan United 0.284559 1.311083 0.217 0.8283
HomeTeamNewcastle -0.429867 1.332867 -0.323 0.7473
HomeTeamNorwich -1.919152 1.384016 -1.387 0.1665
HomeTeamSouthampton -0.261520 1.401160 -0.187 0.8521
HomeTeamTottenham -0.749793 1.462807 -0.513 0.6086
HomeTeamWatford -0.959873 1.200644 -0.799 0.4246
HomeTeamWest Ham -0.548536 1.277840 -0.429 0.6680
HomeTeamWolves -2.203054 1.234075 -1.785 0.0751 .
HF:HomeTeamAston Villa -0.116447 0.146201 -0.796 0.4263
HF:HomeTeamBrentford -0.002625 0.131200 -0.020 0.9840
HF:HomeTeamBrighton -0.042499 0.147762 -0.288 0.7738
HF:HomeTeamBurnley -0.068475 0.147230 -0.465 0.6422
HF:HomeTeamChelsea 0.038278 0.130069 0.294 0.7687
HF:HomeTeamCrystal Palace 0.117025 0.129597 0.903 0.3672
HF:HomeTeamEverton -0.051431 0.129571 -0.397 0.6917
HF:HomeTeamLeeds -0.017137 0.134825 -0.127 0.8989
HF:HomeTeamLeicester -0.116223 0.137002 -0.848 0.3969
HF:HomeTeamLiverpool -0.119023 0.127332 -0.935 0.3506
HF:HomeTeamMan City -0.074306 0.139908 -0.531 0.5957
HF:HomeTeamMan United -0.050025 0.136023 -0.368 0.7133
HF:HomeTeamNewcastle -0.003299 0.126416 -0.026 0.9792
HF:HomeTeamNorwich 0.072932 0.138357 0.527 0.5984
HF:HomeTeamSouthampton -0.033507 0.133227 -0.252 0.8016
HF:HomeTeamTottenham 0.083578 0.138099 0.605 0.5454
HF:HomeTeamWatford 0.001903 0.114136 0.017 0.9867
HF:HomeTeamWest Ham 0.051489 0.134472 0.383 0.7020
HF:HomeTeamWolves 0.151686 0.124414 1.219 0.2236
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.234 on 340 degrees of freedom
Multiple R-squared: 0.224, Adjusted R-squared: 0.135
F-statistic: 2.517 on 39 and 340 DF, p-value: 5.184e-06
check_model(lm_9)- additive Models with two variables
lm_10<- lm(FTHG~HS+HomeTeam, data = soccer)
summary(lm_10)
Call:
lm(formula = FTHG ~ HS + HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-2.9814 -0.7704 -0.1274 0.7145 4.6367
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.54200 0.34986 1.549 0.12222
HS 0.07119 0.01225 5.813 1.35e-08 ***
HomeTeamAston Villa 0.10384 0.38727 0.268 0.78875
HomeTeamBrentford -0.25709 0.38751 -0.663 0.50749
HomeTeamBrighton -0.59108 0.38293 -1.544 0.12357
HomeTeamBurnley -0.45262 0.38801 -1.167 0.24418
HomeTeamChelsea 0.18394 0.38073 0.483 0.62929
HomeTeamCrystal Palace 0.01357 0.38776 0.035 0.97211
HomeTeamEverton 0.02855 0.38827 0.074 0.94142
HomeTeamLeeds -0.53113 0.38423 -1.382 0.16773
HomeTeamLeicester 0.35951 0.38703 0.929 0.35358
HomeTeamLiverpool 0.52328 0.38225 1.369 0.17188
HomeTeamMan City 1.15807 0.38059 3.043 0.00252 **
HomeTeamMan United 0.09688 0.38300 0.253 0.80045
HomeTeamNewcastle -0.06904 0.38680 -0.178 0.85844
HomeTeamNorwich -0.67100 0.39164 -1.713 0.08752 .
HomeTeamSouthampton -0.21569 0.38715 -0.557 0.57779
HomeTeamTottenham 0.48386 0.38459 1.258 0.20917
HomeTeamWatford -0.45655 0.38974 -1.171 0.24221
HomeTeamWest Ham 0.32561 0.38764 0.840 0.40148
HomeTeamWolves -0.32488 0.38879 -0.836 0.40392
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.173 on 359 degrees of freedom
Multiple R-squared: 0.2597, Adjusted R-squared: 0.2185
F-statistic: 6.297 on 20 and 359 DF, p-value: 1.542e-14
check_model(lm_10)lm_11<- lm(FTHG~HF+HomeTeam, data = soccer)
summary(lm_11)
Call:
lm(formula = FTHG ~ HF + HomeTeam, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-3.0766 -0.9307 -0.0940 0.7621 5.0725
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.90919 0.33513 5.697 2.54e-08 ***
HF -0.00712 0.01932 -0.368 0.71273
HomeTeamAston Villa -0.31167 0.39807 -0.783 0.43418
HomeTeamBrentford -0.68121 0.39800 -1.712 0.08783 .
HomeTeamBrighton -0.83124 0.39901 -2.083 0.03793 *
HomeTeamBurnley -0.88649 0.39854 -2.224 0.02675 *
HomeTeamChelsea 0.11800 0.39942 0.295 0.76782
HomeTeamCrystal Palace -0.40943 0.39916 -1.026 0.30571
HomeTeamEverton -0.42555 0.39810 -1.069 0.28581
HomeTeamLeeds -0.82074 0.40212 -2.041 0.04198 *
HomeTeamLeicester -0.05675 0.39807 -0.143 0.88671
HomeTeamLiverpool 0.73422 0.39798 1.845 0.06588 .
HomeTeamMan City 1.20303 0.39843 3.019 0.00271 **
HomeTeamMan United -0.16164 0.39804 -0.406 0.68492
HomeTeamNewcastle -0.46244 0.39908 -1.159 0.24732
HomeTeamNorwich -1.20828 0.39796 -3.036 0.00257 **
HomeTeamSouthampton -0.62146 0.39886 -1.558 0.12009
HomeTeamTottenham 0.16876 0.39901 0.423 0.67258
HomeTeamWatford -0.93388 0.39960 -2.337 0.01999 *
HomeTeamWest Ham -0.11163 0.39829 -0.280 0.77942
HomeTeamWolves -0.79022 0.39792 -1.986 0.04781 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.226 on 359 degrees of freedom
Multiple R-squared: 0.1903, Adjusted R-squared: 0.1452
F-statistic: 4.219 on 20 and 359 DF, p-value: 9.816e-09
check_model(lm_11)lm_12<-lm(FTHG~HF+HF, data = soccer)
summary(lm_12)
Call:
lm(formula = FTHG ~ HF + HF, data = soccer)
Residuals:
Min 1Q Median 3Q Max
-1.7761 -0.7517 -0.4170 0.5503 5.6156
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 1.84143 0.20977 8.778 <2e-16 ***
HF -0.03265 0.01974 -1.654 0.099 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 1.324 on 378 degrees of freedom
Multiple R-squared: 0.007185, Adjusted R-squared: 0.004558
F-statistic: 2.736 on 1 and 378 DF, p-value: 0.09897
check_model(lm_12)compare_performance(lm_2,lm_3,lm_4,lm_5,lm_6,lm_7,lm_8,lm_9,lm_10,lm_11,lm_12)# Comparison of Model Performance Indices
Name | Model | AIC (weights) | AICc (weights) | BIC (weights) | R2 | R2 (adj.) | RMSE | Sigma
----------------------------------------------------------------------------------------------------
lm_2 | lm | 1263.3 (<.001) | 1307.8 (<.001) | 1582.4 (<.001) | 0.395 | 0.236 | 1.031 | 1.160
lm_3 | lm | 1224.5 (0.157) | 1227.9 (0.132) | 1319.1 (<.001) | 0.262 | 0.217 | 1.138 | 1.174
lm_4 | lm | 1245.6 (<.001) | 1256.4 (<.001) | 1411.1 (<.001) | 0.291 | 0.207 | 1.116 | 1.181
lm_5 | lm | 1229.7 (0.012) | 1240.4 (<.001) | 1395.2 (<.001) | 0.320 | 0.240 | 1.093 | 1.157
lm_6 | lm | 1223.9 (0.216) | 1227.0 (0.208) | 1314.5 (<.001) | 0.260 | 0.216 | 1.140 | 1.174
lm_7 | lm | 1231.8 (0.004) | 1232.0 (0.017) | 1251.5 (>.999) | 0.169 | 0.162 | 1.208 | 1.214
lm_8 | lm | 1227.7 (0.031) | 1237.9 (<.001) | 1389.3 (<.001) | 0.320 | 0.242 | 1.093 | 1.155
lm_9 | lm | 1277.8 (<.001) | 1288.0 (<.001) | 1439.3 (<.001) | 0.224 | 0.135 | 1.167 | 1.234
lm_10 | lm | 1221.9 (0.580) | 1224.7 (0.641) | 1308.6 (<.001) | 0.260 | 0.218 | 1.140 | 1.173
lm_11 | lm | 1255.9 (<.001) | 1258.8 (<.001) | 1342.6 (<.001) | 0.190 | 0.145 | 1.192 | 1.226
lm_12 | lm | 1295.4 (<.001) | 1295.5 (<.001) | 1307.2 (<.001) | 0.007 | 0.005 | 1.320 | 1.324
For all models, the R squared is very small. Model 6 is the best model -> VIF < 5 for all variables: this shows that there is No colinearity between variables.
4.) After identifying the best fit model, build the appropriate graph! See our multiple regression tutorial. Next, Build a coef plot for the model. Using patchwork, show me a 2-panel figure with the coef plot and the graph for the model
# make a pretty graph!
lm6g2<-lm_6 %>%
augment() %>%
ggplot(aes(x=HS, y=FTHG,color=HomeTeam))+
geom_point()+
geom_line(aes(y=.fitted))+
theme_classic()+
facet_wrap(~HomeTeam)
lm6g2 OR
lm6g3<-lm_6 %>%
augment() %>%
ggplot(aes(x=HS, y=FTHG,color=HomeTeam))+
geom_point()+
geom_line(aes(y=.fitted))+
theme_classic()
lm6g3coefs_lm6<-tidy(lm_6, quick=FALSE)
coefs_lm6# A tibble: 22 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.567 0.396 1.43 0.152
2 HS 0.0711 0.0123 5.79 0.0000000151
3 HF -0.00256 0.0185 -0.138 0.890
4 HomeTeamAston Villa 0.105 0.388 0.270 0.787
5 HomeTeamBrentford -0.256 0.388 -0.661 0.509
6 HomeTeamBrighton -0.587 0.384 -1.53 0.127
7 HomeTeamBurnley -0.450 0.389 -1.16 0.248
8 HomeTeamChelsea 0.188 0.383 0.493 0.623
9 HomeTeamCrystal Palace 0.0173 0.389 0.0445 0.965
10 HomeTeamEverton 0.0265 0.389 0.0681 0.946
# … with 12 more rows
ci1<-data.table(confint(lm_6), keep.rownames = "term")
ci1 term 2.5 % 97.5 %
1: (Intercept) -0.21075026 1.34567111
2: HS 0.04697836 0.09525245
3: HF -0.03898277 0.03385659
4: HomeTeamAston Villa -0.65790223 0.86770425
5: HomeTeamBrentford -1.01962630 0.50674849
6: HomeTeamBrighton -1.34330647 0.16847085
7: HomeTeamBurnley -1.21505431 0.31484437
8: HomeTeamChelsea -0.56404249 0.94094445
9: HomeTeamCrystal Palace -0.74815835 0.78277336
10: HomeTeamEverton -0.73870205 0.79165904
11: HomeTeamLeeds -1.28763336 0.24012543
12: HomeTeamLeicester -0.40505935 1.12026992
13: HomeTeamLiverpool -0.23029360 1.27539679
14: HomeTeamMan City 0.40498557 1.90586943
15: HomeTeamMan United -0.65931506 0.84986417
16: HomeTeamNewcastle -0.82887822 0.69807362
17: HomeTeamNorwich -1.44200427 0.10053024
18: HomeTeamSouthampton -0.97626468 0.55131870
19: HomeTeamTottenham -0.27164995 1.24653097
20: HomeTeamWatford -1.22220073 0.31782064
21: HomeTeamWest Ham -0.44147985 1.08723763
22: HomeTeamWolves -1.09133427 0.44009047
term 2.5 % 97.5 %
new_df <-cbind(coefs_lm6, ci1)
new_df term estimate std.error statistic p.value
1 (Intercept) 0.56746042 0.39571125 1.43402653 1.524375e-01
2 HS 0.07111540 0.01227341 5.79426619 1.505048e-08
3 HF -0.00256309 0.01851899 -0.13840334 8.899995e-01
4 HomeTeamAston Villa 0.10490101 0.38787673 0.27044935 7.869704e-01
5 HomeTeamBrentford -0.25643891 0.38807207 -0.66080227 5.091641e-01
6 HomeTeamBrighton -0.58741781 0.38436075 -1.52829811 1.273216e-01
7 HomeTeamBurnley -0.45010497 0.38896800 -1.15717736 2.479714e-01
8 HomeTeamChelsea 0.18845098 0.38263433 0.49250932 6.226612e-01
9 HomeTeamCrystal Palace 0.01730751 0.38923064 0.04446594 9.645578e-01
10 HomeTeamEverton 0.02647849 0.38908557 0.06805314 9.457813e-01
11 HomeTeamLeeds -0.52375396 0.38842394 -1.34840803 1.783796e-01
12 HomeTeamLeicester 0.35760529 0.38780626 0.92212356 3.570851e-01
13 HomeTeamLiverpool 0.52255160 0.38281318 1.36503032 1.731007e-01
14 HomeTeamMan City 1.15542750 0.38159115 3.02792008 2.640982e-03
15 HomeTeamMan United 0.09527456 0.38370020 0.24830468 8.040410e-01
16 HomeTeamNewcastle -0.06540230 0.38821878 -0.16846763 8.663105e-01
17 HomeTeamNorwich -0.67073702 0.39218059 -1.71027591 8.808125e-02
18 HomeTeamSouthampton -0.21247299 0.38837935 -0.54707591 5.846674e-01
19 HomeTeamTottenham 0.48744051 0.38598883 1.26283581 2.074701e-01
20 HomeTeamWatford -0.45219004 0.39154164 -1.15489644 2.489031e-01
21 HomeTeamWest Ham 0.32287889 0.38866769 0.83073252 4.066784e-01
22 HomeTeamWolves -0.32562190 0.38935599 -0.83630895 4.035390e-01
term 2.5 % 97.5 %
1 (Intercept) -0.21075026 1.34567111
2 HS 0.04697836 0.09525245
3 HF -0.03898277 0.03385659
4 HomeTeamAston Villa -0.65790223 0.86770425
5 HomeTeamBrentford -1.01962630 0.50674849
6 HomeTeamBrighton -1.34330647 0.16847085
7 HomeTeamBurnley -1.21505431 0.31484437
8 HomeTeamChelsea -0.56404249 0.94094445
9 HomeTeamCrystal Palace -0.74815835 0.78277336
10 HomeTeamEverton -0.73870205 0.79165904
11 HomeTeamLeeds -1.28763336 0.24012543
12 HomeTeamLeicester -0.40505935 1.12026992
13 HomeTeamLiverpool -0.23029360 1.27539679
14 HomeTeamMan City 0.40498557 1.90586943
15 HomeTeamMan United -0.65931506 0.84986417
16 HomeTeamNewcastle -0.82887822 0.69807362
17 HomeTeamNorwich -1.44200427 0.10053024
18 HomeTeamSouthampton -0.97626468 0.55131870
19 HomeTeamTottenham -0.27164995 1.24653097
20 HomeTeamWatford -1.22220073 0.31782064
21 HomeTeamWest Ham -0.44147985 1.08723763
22 HomeTeamWolves -1.09133427 0.44009047
nw_df<-new_df[,-6]nw_df term estimate std.error statistic p.value
1 (Intercept) 0.56746042 0.39571125 1.43402653 1.524375e-01
2 HS 0.07111540 0.01227341 5.79426619 1.505048e-08
3 HF -0.00256309 0.01851899 -0.13840334 8.899995e-01
4 HomeTeamAston Villa 0.10490101 0.38787673 0.27044935 7.869704e-01
5 HomeTeamBrentford -0.25643891 0.38807207 -0.66080227 5.091641e-01
6 HomeTeamBrighton -0.58741781 0.38436075 -1.52829811 1.273216e-01
7 HomeTeamBurnley -0.45010497 0.38896800 -1.15717736 2.479714e-01
8 HomeTeamChelsea 0.18845098 0.38263433 0.49250932 6.226612e-01
9 HomeTeamCrystal Palace 0.01730751 0.38923064 0.04446594 9.645578e-01
10 HomeTeamEverton 0.02647849 0.38908557 0.06805314 9.457813e-01
11 HomeTeamLeeds -0.52375396 0.38842394 -1.34840803 1.783796e-01
12 HomeTeamLeicester 0.35760529 0.38780626 0.92212356 3.570851e-01
13 HomeTeamLiverpool 0.52255160 0.38281318 1.36503032 1.731007e-01
14 HomeTeamMan City 1.15542750 0.38159115 3.02792008 2.640982e-03
15 HomeTeamMan United 0.09527456 0.38370020 0.24830468 8.040410e-01
16 HomeTeamNewcastle -0.06540230 0.38821878 -0.16846763 8.663105e-01
17 HomeTeamNorwich -0.67073702 0.39218059 -1.71027591 8.808125e-02
18 HomeTeamSouthampton -0.21247299 0.38837935 -0.54707591 5.846674e-01
19 HomeTeamTottenham 0.48744051 0.38598883 1.26283581 2.074701e-01
20 HomeTeamWatford -0.45219004 0.39154164 -1.15489644 2.489031e-01
21 HomeTeamWest Ham 0.32287889 0.38866769 0.83073252 4.066784e-01
22 HomeTeamWolves -0.32562190 0.38935599 -0.83630895 4.035390e-01
2.5 % 97.5 %
1 -0.21075026 1.34567111
2 0.04697836 0.09525245
3 -0.03898277 0.03385659
4 -0.65790223 0.86770425
5 -1.01962630 0.50674849
6 -1.34330647 0.16847085
7 -1.21505431 0.31484437
8 -0.56404249 0.94094445
9 -0.74815835 0.78277336
10 -0.73870205 0.79165904
11 -1.28763336 0.24012543
12 -0.40505935 1.12026992
13 -0.23029360 1.27539679
14 0.40498557 1.90586943
15 -0.65931506 0.84986417
16 -0.82887822 0.69807362
17 -1.44200427 0.10053024
18 -0.97626468 0.55131870
19 -0.27164995 1.24653097
20 -1.22220073 0.31782064
21 -0.44147985 1.08723763
22 -1.09133427 0.44009047
nw_df$term=as.factor(nw_df$term)nw_df2<-nw_df %>%
dplyr:::rename.data.frame("lower"="2.5 %",
"upper"="97.5 %")
nw_df2 term estimate std.error statistic p.value
1 (Intercept) 0.56746042 0.39571125 1.43402653 1.524375e-01
2 HS 0.07111540 0.01227341 5.79426619 1.505048e-08
3 HF -0.00256309 0.01851899 -0.13840334 8.899995e-01
4 HomeTeamAston Villa 0.10490101 0.38787673 0.27044935 7.869704e-01
5 HomeTeamBrentford -0.25643891 0.38807207 -0.66080227 5.091641e-01
6 HomeTeamBrighton -0.58741781 0.38436075 -1.52829811 1.273216e-01
7 HomeTeamBurnley -0.45010497 0.38896800 -1.15717736 2.479714e-01
8 HomeTeamChelsea 0.18845098 0.38263433 0.49250932 6.226612e-01
9 HomeTeamCrystal Palace 0.01730751 0.38923064 0.04446594 9.645578e-01
10 HomeTeamEverton 0.02647849 0.38908557 0.06805314 9.457813e-01
11 HomeTeamLeeds -0.52375396 0.38842394 -1.34840803 1.783796e-01
12 HomeTeamLeicester 0.35760529 0.38780626 0.92212356 3.570851e-01
13 HomeTeamLiverpool 0.52255160 0.38281318 1.36503032 1.731007e-01
14 HomeTeamMan City 1.15542750 0.38159115 3.02792008 2.640982e-03
15 HomeTeamMan United 0.09527456 0.38370020 0.24830468 8.040410e-01
16 HomeTeamNewcastle -0.06540230 0.38821878 -0.16846763 8.663105e-01
17 HomeTeamNorwich -0.67073702 0.39218059 -1.71027591 8.808125e-02
18 HomeTeamSouthampton -0.21247299 0.38837935 -0.54707591 5.846674e-01
19 HomeTeamTottenham 0.48744051 0.38598883 1.26283581 2.074701e-01
20 HomeTeamWatford -0.45219004 0.39154164 -1.15489644 2.489031e-01
21 HomeTeamWest Ham 0.32287889 0.38866769 0.83073252 4.066784e-01
22 HomeTeamWolves -0.32562190 0.38935599 -0.83630895 4.035390e-01
lower upper
1 -0.21075026 1.34567111
2 0.04697836 0.09525245
3 -0.03898277 0.03385659
4 -0.65790223 0.86770425
5 -1.01962630 0.50674849
6 -1.34330647 0.16847085
7 -1.21505431 0.31484437
8 -0.56404249 0.94094445
9 -0.74815835 0.78277336
10 -0.73870205 0.79165904
11 -1.28763336 0.24012543
12 -0.40505935 1.12026992
13 -0.23029360 1.27539679
14 0.40498557 1.90586943
15 -0.65931506 0.84986417
16 -0.82887822 0.69807362
17 -1.44200427 0.10053024
18 -0.97626468 0.55131870
19 -0.27164995 1.24653097
20 -1.22220073 0.31782064
21 -0.44147985 1.08723763
22 -1.09133427 0.44009047
ciplot<-ggplot(data=nw_df2, aes(x=estimate, y=term))+
geom_vline(xintercept = 0, linetype=2)+
geom_point(size=3)+
geom_errorbarh(aes(xmin=lower, xmax=upper), height=0.2)+
theme_classic()
ciplot# Two panel graph
ciplot +lm6g2# OR
ciplot +lm6g3Depth
1.) Bootstrap the coef plot from Essential #4, above.
tidy(lm_6)# A tibble: 22 × 5
term estimate std.error statistic p.value
<chr> <dbl> <dbl> <dbl> <dbl>
1 (Intercept) 0.567 0.396 1.43 0.152
2 HS 0.0711 0.0123 5.79 0.0000000151
3 HF -0.00256 0.0185 -0.138 0.890
4 HomeTeamAston Villa 0.105 0.388 0.270 0.787
5 HomeTeamBrentford -0.256 0.388 -0.661 0.509
6 HomeTeamBrighton -0.587 0.384 -1.53 0.127
7 HomeTeamBurnley -0.450 0.389 -1.16 0.248
8 HomeTeamChelsea 0.188 0.383 0.493 0.623
9 HomeTeamCrystal Palace 0.0173 0.389 0.0445 0.965
10 HomeTeamEverton 0.0265 0.389 0.0681 0.946
# … with 12 more rows
set.seed(200) #any number is fine
soccer_intervals<- reg_intervals(FTHG~HS+HF+HomeTeam, data = soccer,
type='percentile',
keep_reps=FALSE)
soccer_intervals# A tibble: 21 × 6
term .lower .estimate .upper .alpha .method
<chr> <dbl> <dbl> <dbl> <dbl> <chr>
1 HF -0.0372 -0.00257 0.0313 0.05 percentile
2 HomeTeamAston Villa -0.608 0.117 0.851 0.05 percentile
3 HomeTeamBrentford -0.904 -0.238 0.443 0.05 percentile
4 HomeTeamBrighton -1.36 -0.571 0.206 0.05 percentile
5 HomeTeamBurnley -1.16 -0.448 0.277 0.05 percentile
6 HomeTeamChelsea -0.602 0.198 1.14 0.05 percentile
7 HomeTeamCrystal Palace -0.702 0.0228 0.723 0.05 percentile
8 HomeTeamEverton -0.689 0.0343 0.763 0.05 percentile
9 HomeTeamLeeds -1.15 -0.508 0.105 0.05 percentile
10 HomeTeamLeicester -0.425 0.376 1.14 0.05 percentile
# … with 11 more rows
#plot the results (boostrap data)
soccerboots<-ggplot(data=soccer_intervals, aes(x=.estimate, y=term))+
geom_vline(xintercept=0, linetype=2)+
geom_errorbarh(aes(xmin=.lower, xmax=.upper),height=0.2)+
geom_point(size=3)+
theme_classic()
soccerboots2.) Calculate means and 95% CIs of full time home goals and full time away goals (using bootstrapping). Plot the results and interpret the plot (is there a home advantage or not?)
n<- 100
orig_sample <- soccer %>%
slice_sample(n=n, replace=FALSE)
orig_sample# A tibble: 100 × 22
Date HomeT…¹ AwayT…² FTHG FTAG FTR HTHG HTAG HTR Referee HS AS
<chr> <chr> <chr> <dbl> <dbl> <chr> <dbl> <dbl> <chr> <chr> <dbl> <dbl>
1 03/1… West H… Brentf… 1 2 A 0 1 A P Bank… 18 13
2 10/1… Brentf… Watford 2 1 H 0 1 A M Oliv… 14 6
3 28/1… Chelsea Man Un… 1 1 D 0 0 D A Tayl… 24 3
4 11/0… Southa… West H… 0 0 D 0 0 D D Coote 11 13
5 15/1… Crysta… Southa… 2 2 D 1 2 A S Hoop… 9 15
6 16/1… Aston … Wolves 2 3 A 0 0 D M Oliv… 14 9
7 12/1… Crysta… Everton 3 1 H 1 0 H A Madl… 17 12
8 17/0… Southa… Liverp… 1 2 A 1 1 D M Atki… 4 24
9 21/0… Burnley Southa… 2 0 H 2 0 H S Attw… 16 11
10 26/0… Brentf… Newcas… 0 2 A 0 2 A M Dean 6 26
# … with 90 more rows, 10 more variables: HST <dbl>, AST <dbl>, HF <dbl>,
# AF <dbl>, HC <dbl>, AC <dbl>, HY <dbl>, AY <dbl>, HR <dbl>, AR <dbl>, and
# abbreviated variable names ¹HomeTeam, ²AwayTeam
orig_sample %>%
slice_sample(n=n, replace=TRUE) %>%
summarize(mean_FTHG=mean(FTHG), mean_FTAG=mean(FTAG))# A tibble: 1 × 2
mean_FTHG mean_FTAG
<dbl> <dbl>
1 1.31 1.48
soccer_100_bs<- 1:1000 %>% #1000 = number of trials / resamples
map_dfr(
~orig_sample %>%
slice_sample(n=n, replace=TRUE) %>%
summarize(mean_FTHG=mean(FTHG), mean_FTAG=mean(FTAG))) %>%
mutate(n=n)
soccer_100_bs #you will see we now have means for 1000 trials!# A tibble: 1,000 × 3
mean_FTHG mean_FTAG n
<dbl> <dbl> <dbl>
1 1.16 1.65 100
2 1.39 1.5 100
3 1.6 1.55 100
4 1.2 1.71 100
5 1.52 1.46 100
6 1.33 1.74 100
7 1.49 1.41 100
8 1.42 1.76 100
9 1.3 1.42 100
10 1.37 1.58 100
# … with 990 more rows
soccer_100_bs<- 1:1000 %>% #1000 = number of trials / resamples
map_dfr(
~orig_sample %>%
slice_sample(n=n, replace=TRUE) %>%
summarize(mean_FTHG=mean(FTHG), mean_FTAG=mean(FTAG))) %>%
mutate(n=n)
calc_CIs<-soccer_100_bs %>%
summarize(mean_FTHG_boot=mean(mean_FTHG),mean_FTAG_boot=mean(mean_FTAG), CI_1=1.96*sd(mean_FTHG), CI_2=1.96*sd(mean_FTAG))
calc_CIs# A tibble: 1 × 4
mean_FTHG_boot mean_FTAG_boot CI_1 CI_2
<dbl> <dbl> <dbl> <dbl>
1 1.42 1.53 0.231 0.261
#
fthg <-calc_CIs[,c(1,3)]
ftag <-calc_CIs[,c(2,4)]
head(fthg)# A tibble: 1 × 2
mean_FTHG_boot CI_1
<dbl> <dbl>
1 1.42 0.231
head(ftag)# A tibble: 1 × 2
mean_FTAG_boot CI_2
<dbl> <dbl>
1 1.53 0.261
fthg_1<-fthg %>%
add_column(team = "home")
fthg_1# A tibble: 1 × 3
mean_FTHG_boot CI_1 team
<dbl> <dbl> <chr>
1 1.42 0.231 home
ftag_1<-ftag %>%
add_column(team = "away")
ftag_1# A tibble: 1 × 3
mean_FTAG_boot CI_2 team
<dbl> <dbl> <chr>
1 1.53 0.261 away
fthg_1<-fthg_1 %>% rename(mean = mean_FTHG_boot, Ci=CI_1)
ftag_1<-ftag_1 %>% rename(mean = mean_FTAG_boot, Ci=CI_2)all_goals<-rbind(fthg_1,ftag_1)
head(all_goals)# A tibble: 2 × 3
mean Ci team
<dbl> <dbl> <chr>
1 1.42 0.231 home
2 1.53 0.261 away
ggplot(data = all_goals, aes(y = mean, x= team, color = team))+
geom_point()+
geom_errorbar(aes(ymin=mean-Ci,ymax=mean+Ci), height=0.001)+
labs(title = "95% CI plot of team goals")+
theme_classic()Warning in geom_errorbar(aes(ymin = mean - Ci, ymax = mean + Ci), height =
0.001): Ignoring unknown parameters: `height`
The mean goals for home team is genrally higher than the away team. However,the 95% confidence interval (error bars) overlap and thus there isn’t any statistically significant home advantage.
3.) Add raw data behind your 95% CI plot above!
soccer_gls <-soccer[c(2,4,5)]
head(soccer_gls)# A tibble: 6 × 3
HomeTeam FTHG FTAG
<chr> <dbl> <dbl>
1 Brentford 2 0
2 Man United 5 1
3 Burnley 1 2
4 Chelsea 3 0
5 Everton 3 1
6 Leicester 1 0
soccer_gls<-soccer_gls %>% rename(home = FTHG)
soccer_gls<-soccer_gls %>% rename(away = FTAG)soccer_gls_1 <-soccer_gls %>%
pivot_longer(cols=c("away", "home"),
names_to = "team",
values_to = "mean")
soccer_gls_1# A tibble: 760 × 3
HomeTeam team mean
<chr> <chr> <dbl>
1 Brentford away 0
2 Brentford home 2
3 Man United away 1
4 Man United home 5
5 Burnley away 2
6 Burnley home 1
7 Chelsea away 0
8 Chelsea home 3
9 Everton away 1
10 Everton home 3
# … with 750 more rows
ggplot(data = all_goals, aes(y = mean, x= team))+
geom_point(color ="red")+
geom_errorbar(aes(ymin=mean-Ci,ymax=mean+Ci), height=0.1)+
labs(title = "95% CI plot of team goals")+
geom_jitter(data = soccer_gls_1, aes(x = team, y = mean))+
theme_classic()Warning in geom_errorbar(aes(ymin = mean - Ci, ymax = mean + Ci), height = 0.1):
Ignoring unknown parameters: `height`