# building the model
Model1 <- glm(Won ~ TossWon
+ BatFrist
+ HomeMatch
+ PPRuns
+ PPWickets
+ FourCount
+ SixCount
+ WicketsLost
+ TotelRuns
+ Year
+ Team,
data = IPLData, family = binomial())
# summary of the model
summary(Model1)##
## Call:
## glm(formula = Won ~ TossWon + BatFrist + HomeMatch + PPRuns +
## PPWickets + FourCount + SixCount + WicketsLost + TotelRuns +
## Year + Team, family = binomial(), data = IPLData)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3241 -0.8824 -0.2143 0.8511 2.2801
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.765059 0.872891 0.876 0.3808
## TossWon1 0.175730 0.311800 0.564 0.5730
## BatFrist1 -0.352356 0.315292 -1.118 0.2638
## HomeMatch1 0.802882 0.205678 3.904 9.48e-05 ***
## PPRuns 0.022711 0.010665 2.129 0.0332 *
## PPWickets -0.025834 0.113906 -0.227 0.8206
## FourCount 0.072760 0.037190 1.956 0.0504 .
## SixCount 0.042510 0.047148 0.902 0.3673
## WicketsLost -0.423578 0.052777 -8.026 1.01e-15 ***
## TotelRuns -0.003345 0.006855 -0.488 0.6256
## YearB2018 0.218858 0.316903 0.691 0.4898
## YearC2017 0.262339 0.350292 0.749 0.4539
## YearD2016 0.193095 0.351213 0.550 0.5825
## YearE2015 0.267262 0.372516 0.717 0.4731
## YearF2014 0.449776 0.324002 1.388 0.1651
## TeamDC -0.999901 0.434966 -2.299 0.0215 *
## TeamGL -0.859330 0.618370 -1.390 0.1646
## TeamKKR -0.544720 0.442733 -1.230 0.2186
## TeamKXIP -0.700140 0.436910 -1.602 0.1090
## TeamMI -0.329963 0.436539 -0.756 0.4497
## TeamRCB -0.882062 0.439100 -2.009 0.0446 *
## TeamRPS -0.701445 0.608046 -1.154 0.2487
## TeamRR -0.553484 0.473415 -1.169 0.2424
## TeamSH 12.541240 882.743514 0.014 0.9887
## TeamSPS 11.577445 882.743529 0.013 0.9895
## TeamSRH -0.329923 0.442357 -0.746 0.4558
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 848.41 on 611 degrees of freedom
## Residual deviance: 654.29 on 586 degrees of freedom
## AIC: 706.29
##
## Number of Fisher Scoring iterations: 13
## Loading required package: Matrix
Model2 <- glmer(Won ~ TossWon
+ BatFrist
+ HomeMatch
+ PPRuns
+ PPWickets
+ FourCount
+ SixCount
+ WicketsLost
+ TotelRuns
+ Year
+ (1 | Team),
data = IPLData, family = binomial,
control = glmerControl(optimizer = "bobyqa"), nAGQ = 1)## boundary (singular) fit: see ?isSingular
# print the model results without correlations among fixed effects
print(summary(Model2), corr = TRUE)## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula:
## Won ~ TossWon + BatFrist + HomeMatch + PPRuns + PPWickets + FourCount +
## SixCount + WicketsLost + TotelRuns + Year + (1 | Team)
## Data: IPLData
## Control: glmerControl(optimizer = "bobyqa")
##
## AIC BIC logLik deviance df.resid
## 695.6 766.2 -331.8 663.6 596
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9607 -0.7157 -0.1722 0.6838 3.2551
##
## Random effects:
## Groups Name Variance Std.Dev.
## Team (Intercept) 0 0
## Number of obs: 612, groups: Team, 12
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.267304 0.819656 0.326 0.744336
## TossWon1 0.153016 0.306873 0.499 0.618041
## BatFrist1 -0.315705 0.310701 -1.016 0.309579
## HomeMatch1 0.747774 0.200822 3.724 0.000196 ***
## PPRuns 0.022951 0.010443 2.198 0.027966 *
## PPWickets -0.029972 0.113054 -0.265 0.790924
## FourCount 0.060284 0.036544 1.650 0.099019 .
## SixCount 0.025291 0.045808 0.552 0.580878
## WicketsLost -0.434106 0.052018 -8.345 < 2e-16 ***
## TotelRuns -0.001389 0.006775 -0.205 0.837603
## YearB2018 0.219835 0.314966 0.698 0.485199
## YearC2017 0.139841 0.324555 0.431 0.666562
## YearD2016 0.036444 0.325688 0.112 0.910904
## YearE2015 0.255970 0.369062 0.694 0.487952
## YearF2014 0.393555 0.321133 1.226 0.220380
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of fixed effects could have been required in summary()
##
## Correlation of Fixed Effects:
## (Intr) TssWn1 BtFrs1 HmMtc1 PPRuns PPWckt ForCnt SixCnt WcktsL
## TossWon1 -0.353
## BatFrist1 -0.275 0.763
## HomeMatch1 -0.019 -0.033 -0.140
## PPRuns -0.291 -0.031 0.090 0.003
## PPWickets -0.276 -0.052 0.025 -0.062 0.359
## FourCount 0.206 -0.068 -0.038 0.018 -0.164 -0.051
## SixCount 0.286 -0.100 -0.060 -0.022 -0.003 -0.063 0.532
## WicketsLost -0.219 -0.036 -0.089 -0.053 -0.204 -0.362 0.096 0.155
## TotelRuns -0.549 0.109 -0.018 -0.031 -0.161 0.063 -0.699 -0.743 -0.024
## YearB2018 -0.106 0.011 0.010 0.052 -0.041 -0.061 0.066 -0.019 -0.048
## YearC2017 -0.071 0.001 -0.004 0.036 -0.085 -0.097 0.080 0.101 -0.025
## YearD2016 -0.177 -0.012 -0.008 0.016 0.031 -0.007 0.090 0.158 0.052
## YearE2015 -0.172 -0.008 -0.005 0.020 0.048 0.000 0.028 0.098 -0.017
## YearF2014 -0.209 -0.020 -0.031 0.150 0.081 -0.009 0.156 0.169 0.019
## TtlRns YB2018 YC2017 YD2016 YE2015
## TossWon1
## BatFrist1
## HomeMatch1
## PPRuns
## PPWickets
## FourCount
## SixCount
## WicketsLost
## TotelRuns
## YearB2018 -0.056
## YearC2017 -0.094 0.492
## YearD2016 -0.115 0.469 0.472
## YearE2015 -0.050 0.414 0.415 0.423
## YearF2014 -0.149 0.487 0.484 0.502 0.437
## convergence code: 0
## boundary (singular) fit: see ?isSingular
## df AIC
## Model1 26 706.2949
## Model2 16 695.5801
## df BIC
## Model1 26 821.1299
## Model2 16 766.2479
## 'log Lik.' 654.2949 (df=26)
## 'log Lik.' 663.5801 (df=16)
The logistic regression method assumes that:
The outcome is a binary or dichotomous variable like yes vs no, positive vs negative, 1 vs 0.
There is a linear relationship between the logit of the outcome and each predictor variables. Recall that the logit function is logit(p) = log(p/(1-p)), where p is the probabilities of the outcome.
There is no influential values (extreme values or outliers) in the continuous predictors
There is no high intercorrelations (i.e. multicollinearity) among the predictors.
library(tidyverse)
library(broom)
# Predict the probability
probabilities <- predict(Model2, type = "response")
logit <- log(probabilities/(1-probabilities))
mydata <- IPLData %>%
dplyr::select_if(is.numeric)
predictors <- colnames(mydata)
mydata <- mydata %>%
mutate(logit = log(probabilities/(1-probabilities))) %>%
gather(key = "predictors", value = "predictor.value", -logit)
# Create the scatter plots:
ggplot(mydata, aes(logit, predictor.value))+
geom_point(size = 0.5, alpha = 0.5) +
geom_smooth(method = "loess") +
theme_bw() +
facet_wrap(~predictors, scales = "free_y")## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## GVIF Df GVIF^(1/(2*Df))
## TossWon 2.658700 1 1.630552
## BatFrist 2.719590 1 1.649118
## HomeMatch 1.121307 1 1.058918
## PPRuns 1.617735 1 1.271902
## PPWickets 1.397512 1 1.182164
## FourCount 2.534929 1 1.592146
## SixCount 2.746477 1 1.657250
## WicketsLost 1.277906 1 1.130445
## TotelRuns 4.349602 1 2.085570
## Year 1.678021 5 1.053125
## Team 1.750530 11 1.025777
## GVIF Df GVIF^(1/(2*Df))
## TossWon 2.615618 1 1.617287
## BatFrist 2.683231 1 1.638057
## HomeMatch 1.087224 1 1.042700
## PPRuns 1.582959 1 1.258157
## PPWickets 1.388087 1 1.178171
## FourCount 2.468067 1 1.571008
## SixCount 2.630357 1 1.621837
## WicketsLost 1.251017 1 1.118488
## TotelRuns 4.353196 1 2.086431
## Year 1.220991 5 1.020167
attach(IPLData)
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns),
PPWickets = mean(PPWickets),
FourCount = mean(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbF1 <- predict(Model2, Fdata, type = "response")
ProbF1## 1
## 0.5420654
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "0",
PPRuns = mean(PPRuns),
PPWickets = mean(PPWickets),
FourCount = mean(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbF2 <- predict(Model2, Fdata, type = "response")
ProbF2## 1
## 0.3591366
homeMatch <- c("0", "1")
Probability <- c(round(ProbF2,2),round(ProbF1,2))
cbind(homeMatch, Probability)## homeMatch Probability
## 1 "0" "0.36"
## 1 "1" "0.54"
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns)- sd(PPRuns),
PPWickets = mean(PPWickets),
FourCount = mean(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbG1 <- predict(Model2, Fdata, type = "response")
ProbG1## 1
## 0.4732919
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns) + sd(PPRuns),
PPWickets = mean(PPWickets),
FourCount = mean(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbG2 <- predict(Model2, Fdata, type = "response")
ProbG2## 1
## 0.6092723
PowerPlayRuns <- c("mean-sd", "mean", "mean + sd")
probability <- c(round(ProbG1,2),round(ProbF1,2),round(ProbG2,2))
cbind(PowerPlayRuns, probability)## PowerPlayRuns probability
## 1 "mean-sd" "0.47"
## 1 "mean" "0.54"
## 1 "mean + sd" "0.61"
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns),
PPWickets = mean(PPWickets),
FourCount = mean(FourCount)- sd(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbH1 <- predict(Model2, Fdata, type = "response")
ProbH1## 1
## 0.4783956
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns) ,
PPWickets = mean(PPWickets),
FourCount = mean(FourCount) + sd(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbH2 <- predict(Model2, Fdata, type = "response")
ProbH2## 1
## 0.6043903
FourCounts <- c("mean-sd", "mean", "mean + sd")
probability <- c(round(ProbH1,2),round(ProbF1,2),round(ProbH2,2))
cbind(FourCounts, probability)## FourCounts probability
## 1 "mean-sd" "0.48"
## 1 "mean" "0.54"
## 1 "mean + sd" "0.6"
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns),
PPWickets = mean(PPWickets),
FourCount = mean(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost) - sd(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbI1 <- predict(Model2, Fdata, type = "response")
ProbI1## 1
## 0.7710066
# creating single value dataframe
Fdata <- data.frame(
TossWon = "1",
BatFrist = "1",
HomeMatch = "1",
PPRuns = mean(PPRuns) ,
PPWickets = mean(PPWickets),
FourCount = mean(FourCount),
SixCount = mean(SixCount),
WicketsLost = mean(WicketsLost)+ sd(WicketsLost),
TotelRuns = mean(TotelRuns),
Year = "A2019",
Team = "KKR")
# predicting probability for female attrition
ProbI2 <- predict(Model2, Fdata, type = "response")
ProbI2## 1
## 0.2938655
WicketLost <- c("mean-sd", "mean", "mean + sd")
probability <- c(round(ProbI1,2),round(ProbF1,2),round(ProbI2,2))
cbind(WicketLost, probability)## WicketLost probability
## 1 "mean-sd" "0.77"
## 1 "mean" "0.54"
## 1 "mean + sd" "0.29"