Basic framework
# read data
dat <- read.table("data/2015-2016NBA.txt",h=T)
names(dat)[25] <- "DIFF"
dat <- subset(dat,!dat$PS=="??")
datgp <- subset(dat,GP > 30)
head(datgp)
Player Team PS GP Min FGM FGA X3M X3A FTM FTA OR TR AS ST
1 acy,quincy sac SF 59 877 119 214 19 49 50 68 65 188 27 29
3 adams,steven okl C 80 2019 261 426 0 0 114 196 218 531 61 42
4 afflalo,arron nyk SG 71 2359 354 799 91 238 110 131 23 266 145 25
5 ajinca,alexis nor C 59 863 150 314 0 1 52 62 75 269 32 19
6 aldrich,cole lac C 60 802 134 225 0 0 60 84 86 288 50 47
7 aldridge,lamarcu san PF 74 2260 536 1045 0 16 259 302 175 631 110 38
TO BK PF DQ PTS TC EJ FF Sta DIFF
1 27 24 103 0 307 3 0 0 29 -128
3 84 89 223 2 636 2 0 0 80 477
4 82 10 142 1 909 1 0 0 57 -154
5 54 36 134 0 352 2 0 0 17 -68
6 64 68 139 1 328 0 0 0 5 -28
7 99 81 151 0 1331 0 0 0 74 510
# conditional plots for each explanatory variable
fit1 <- lm(DIFF ~ X3M + AS + TO + BK, data = datgp)
summary(fit1)
Call:
lm(formula = DIFF ~ X3M + AS + TO + BK, data = datgp)
Residuals:
Min 1Q Median 3Q Max
-799.30 -89.14 -1.20 86.52 708.90
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -89.1880 18.8750 -4.725 3.26e-06 ***
X3M 1.3110 0.2166 6.053 3.47e-09 ***
AS 1.0456 0.1614 6.479 2.91e-10 ***
TO -2.3321 0.4045 -5.765 1.71e-08 ***
BK 2.5581 0.3728 6.863 2.82e-11 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 191.5 on 374 degrees of freedom
Multiple R-squared: 0.2135, Adjusted R-squared: 0.2051
F-statistic: 25.38 on 4 and 374 DF, p-value: < 2.2e-16
library(visreg)
visreg(fit1,"AS")
visreg(fit1,"TO")
# change color and CI(default alpha=.05)
visreg(fit1, "AS", line=list(col="orange"), points=list(col="plum"))
visreg(fit1, "AS", line=list(col="orange"),
points=list(col="plum"),alpha=.01,fill=list(col="yellow"))
# factor variable
fit2 <- lm(DIFF ~ X3M + AS + TO + BK + PS, data = datgp)
visreg(fit2, "PS")
fit3 <- lm(DIFF ~ X3M + AS + TO + BK + Team, data = datgp)
visreg(fit3, "Team")
Cross-sectional plots
# interaction(x1,x2=continus)
fit4 <- lm(DIFF ~ FGM*PTS, data = datgp)
# overlay these cross-sections
visreg(fit4,"FGM",by="PTS", overlay=T, partial=FALSE,band=F)
# interaction(x1=continus,x2=discrete)
fit5 <- lm(DIFF ~ AS*PS, data = datgp)
# overlay these cross-sections
visreg(fit5,"AS",by="PS", overlay=T, partial=FALSE,band=F)
# main effect
visreg(fit5, "AS", cond = list(PS = "SF"))
Please note that you are attempting to plot a 'main effect' in a
model that contains an interaction. This is potentially
misleading; you may wish to consider using the 'by' argument.
Conditions used in construction of plot
PS: SF
visreg(fit5, "AS", cond = list(PS = "SG"))
Please note that you are attempting to plot a 'main effect' in a
model that contains an interaction. This is potentially
misleading; you may wish to consider using the 'by' argument.
Conditions used in construction of plot
PS: SG
Interplot
library(interplot)
Warning: package 'interplot' was built under R version 3.2.5
Loading required package: ggplot2
Loading required package: abind
Loading required package: arm
Warning: package 'arm' was built under R version 3.2.5
Loading required package: MASS
Loading required package: Matrix
Loading required package: lme4
arm (Version 1.8-6, built: 2015-7-7)
Working directory is C:/Users/Mini/Documents/R project/Data M
# draw interaction
fit6 <- lm(DIFF ~ AS*TO, data = datgp)
summary(fit6)
Call:
lm(formula = DIFF ~ AS * TO, data = datgp)
Residuals:
Min 1Q Median 3Q Max
-665.62 -90.96 -11.19 80.44 827.38
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -1.277e+01 2.528e+01 -0.505 0.6137
AS 3.216e-01 2.272e-01 1.415 0.1578
TO -6.293e-01 3.594e-01 -1.751 0.0807 .
AS:TO 1.478e-03 8.707e-04 1.698 0.0904 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 206 on 375 degrees of freedom
Multiple R-squared: 0.08703, Adjusted R-squared: 0.07972
F-statistic: 11.92 on 3 and 375 DF, p-value: 1.807e-07
interplot(m = fit6, var1 = "AS", var2 = "TO",hist = TRUE) +
# Add labels for X and Y axes
xlab("TO") +
ylab("Estimated Coefficient for\nAS scores") +
# Change the background
theme_bw() +
# Add the title
ggtitle("Estimated Coefficient of AS scores on TO") +
theme(plot.title = element_text(face="bold"))
# Plot interactions with a factor term
fit7 <- lm(DIFF ~ BK*PS, data = datgp)
summary(fit7)
Call:
lm(formula = DIFF ~ BK * PS, data = datgp)
Residuals:
Min 1Q Median 3Q Max
-781.43 -96.11 0.55 78.50 985.30
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -33.9107 48.3869 -0.701 0.4839
BK 0.4634 0.5998 0.773 0.4403
PSPF -33.2831 59.7485 -0.557 0.5778
PSPG -17.1197 59.7002 -0.287 0.7745
PSSF -28.8054 61.4915 -0.468 0.6397
PSSG -37.8908 60.7323 -0.624 0.5331
BK:PSPF 1.1681 0.8690 1.344 0.1797
BK:PSPG 3.7072 2.1293 1.741 0.0825 .
BK:PSSF 2.3330 1.1623 2.007 0.0454 *
BK:PSSG 4.1791 1.7198 2.430 0.0156 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 209.6 on 369 degrees of freedom
Multiple R-squared: 0.07059, Adjusted R-squared: 0.04793
F-statistic: 3.114 on 9 and 369 DF, p-value: 0.001263
interplot(m = fit7, var1 = "BK", var2 = "PS") +
xlab("PS") +
ylab("Estimated Coefficient for\nBK scores") +
theme_bw()
Nonlinear models
# read data
ucla <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
head(ucla)
admit gre gpa rank
1 0 380 3.61 3
2 1 660 3.67 3
3 1 800 4.00 1
4 1 640 3.19 4
5 0 520 2.93 4
6 1 760 3.00 2
str(ucla)
'data.frame': 400 obs. of 4 variables:
$ admit: int 0 1 1 1 0 1 1 0 1 0 ...
$ gre : int 380 660 800 640 520 760 560 400 540 700 ...
$ gpa : num 3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
$ rank : int 3 3 1 4 4 2 1 2 3 2 ...
# logistic regression
ucla$rank <- factor(ucla$rank)
fit8 <- glm(admit ~ gre + gpa + rank, data = ucla, family = "binomial")
summary(fit8)
Call:
glm(formula = admit ~ gre + gpa + rank, family = "binomial",
data = ucla)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.6268 -0.8662 -0.6388 1.1490 2.0790
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.989979 1.139951 -3.500 0.000465 ***
gre 0.002264 0.001094 2.070 0.038465 *
gpa 0.804038 0.331819 2.423 0.015388 *
rank2 -0.675443 0.316490 -2.134 0.032829 *
rank3 -1.340204 0.345306 -3.881 0.000104 ***
rank4 -1.551464 0.417832 -3.713 0.000205 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 499.98 on 399 degrees of freedom
Residual deviance: 458.52 on 394 degrees of freedom
AIC: 470.52
Number of Fisher Scoring iterations: 4
visreg(fit8, "gre", scale = "response", rug = 2,xlab = "GRE score", ylab = "P(Admit)")
fit9 <- glm(admit ~ gre + gpa + rank, data = ucla, family = "binomial")
visreg(fit9, "rank", scale = "response", rug = 2, xlab = "Rank", ylab = "P(Admit)")