Visreg_Tutorial

Basic framework

# read data
dat <- read.table("data/2015-2016NBA.txt",h=T)
names(dat)[25] <- "DIFF"
dat <- subset(dat,!dat$PS=="??")
datgp <- subset(dat,GP > 30)
head(datgp)

            Player Team PS GP  Min FGM  FGA X3M X3A FTM FTA  OR  TR  AS ST
1       acy,quincy  sac SF 59  877 119  214  19  49  50  68  65 188  27 29
3     adams,steven  okl  C 80 2019 261  426   0   0 114 196 218 531  61 42
4    afflalo,arron  nyk SG 71 2359 354  799  91 238 110 131  23 266 145 25
5    ajinca,alexis  nor  C 59  863 150  314   0   1  52  62  75 269  32 19
6     aldrich,cole  lac  C 60  802 134  225   0   0  60  84  86 288  50 47
7 aldridge,lamarcu  san PF 74 2260 536 1045   0  16 259 302 175 631 110 38
  TO BK  PF DQ  PTS TC EJ FF Sta DIFF
1 27 24 103  0  307  3  0  0  29 -128
3 84 89 223  2  636  2  0  0  80  477
4 82 10 142  1  909  1  0  0  57 -154
5 54 36 134  0  352  2  0  0  17  -68
6 64 68 139  1  328  0  0  0   5  -28
7 99 81 151  0 1331  0  0  0  74  510

# conditional plots for each explanatory variable
fit1 <- lm(DIFF ~ X3M + AS + TO + BK, data = datgp)
summary(fit1)


Call:
lm(formula = DIFF ~ X3M + AS + TO + BK, data = datgp)

Residuals:
    Min      1Q  Median      3Q     Max 
-799.30  -89.14   -1.20   86.52  708.90 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) -89.1880    18.8750  -4.725 3.26e-06 ***
X3M           1.3110     0.2166   6.053 3.47e-09 ***
AS            1.0456     0.1614   6.479 2.91e-10 ***
TO           -2.3321     0.4045  -5.765 1.71e-08 ***
BK            2.5581     0.3728   6.863 2.82e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 191.5 on 374 degrees of freedom
Multiple R-squared:  0.2135,    Adjusted R-squared:  0.2051 
F-statistic: 25.38 on 4 and 374 DF,  p-value: < 2.2e-16

library(visreg)
visreg(fit1,"AS")

visreg(fit1,"TO")

# change color and CI(default alpha=.05)
visreg(fit1, "AS", line=list(col="orange"), points=list(col="plum"))

visreg(fit1, "AS", line=list(col="orange"),
       points=list(col="plum"),alpha=.01,fill=list(col="yellow"))

# factor variable
fit2 <- lm(DIFF ~ X3M + AS + TO + BK + PS, data = datgp)
visreg(fit2, "PS")

fit3 <- lm(DIFF ~ X3M + AS + TO + BK + Team, data = datgp)
visreg(fit3, "Team")

Cross-sectional plots

# interaction(x1,x2=continus) 
fit4 <- lm(DIFF ~ FGM*PTS, data = datgp)
# overlay these cross-sections
visreg(fit4,"FGM",by="PTS", overlay=T, partial=FALSE,band=F)

# interaction(x1=continus,x2=discrete) 
fit5 <- lm(DIFF ~ AS*PS, data = datgp)
# overlay these cross-sections
visreg(fit5,"AS",by="PS", overlay=T, partial=FALSE,band=F)

# main effect
visreg(fit5, "AS", cond = list(PS = "SF"))

Please note that you are attempting to plot a 'main effect' in a
model that contains an interaction.  This is potentially
misleading; you may wish to consider using the 'by' argument.

Conditions used in construction of plot
PS: SF

visreg(fit5, "AS", cond = list(PS = "SG"))

Please note that you are attempting to plot a 'main effect' in a
model that contains an interaction.  This is potentially
misleading; you may wish to consider using the 'by' argument.

Conditions used in construction of plot
PS: SG

Interplot

library(interplot)

Warning: package 'interplot' was built under R version 3.2.5

Loading required package: ggplot2

Loading required package: abind

Loading required package: arm

Warning: package 'arm' was built under R version 3.2.5

Loading required package: MASS

Loading required package: Matrix

Loading required package: lme4


arm (Version 1.8-6, built: 2015-7-7)

Working directory is C:/Users/Mini/Documents/R project/Data M

# draw interaction 
fit6 <- lm(DIFF ~ AS*TO, data = datgp)
summary(fit6)


Call:
lm(formula = DIFF ~ AS * TO, data = datgp)

Residuals:
    Min      1Q  Median      3Q     Max 
-665.62  -90.96  -11.19   80.44  827.38 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)  
(Intercept) -1.277e+01  2.528e+01  -0.505   0.6137  
AS           3.216e-01  2.272e-01   1.415   0.1578  
TO          -6.293e-01  3.594e-01  -1.751   0.0807 .
AS:TO        1.478e-03  8.707e-04   1.698   0.0904 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 206 on 375 degrees of freedom
Multiple R-squared:  0.08703,   Adjusted R-squared:  0.07972 
F-statistic: 11.92 on 3 and 375 DF,  p-value: 1.807e-07

interplot(m = fit6, var1 = "AS", var2 = "TO",hist = TRUE) +
    # Add labels for X and Y axes
    xlab("TO") +
    ylab("Estimated Coefficient for\nAS scores") +
    # Change the background
    theme_bw() +
    # Add the title
    ggtitle("Estimated Coefficient of AS scores on TO") +
    theme(plot.title = element_text(face="bold"))

# Plot interactions with a factor term
fit7 <- lm(DIFF ~ BK*PS, data = datgp)
summary(fit7)


Call:
lm(formula = DIFF ~ BK * PS, data = datgp)

Residuals:
    Min      1Q  Median      3Q     Max 
-781.43  -96.11    0.55   78.50  985.30 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)  
(Intercept) -33.9107    48.3869  -0.701   0.4839  
BK            0.4634     0.5998   0.773   0.4403  
PSPF        -33.2831    59.7485  -0.557   0.5778  
PSPG        -17.1197    59.7002  -0.287   0.7745  
PSSF        -28.8054    61.4915  -0.468   0.6397  
PSSG        -37.8908    60.7323  -0.624   0.5331  
BK:PSPF       1.1681     0.8690   1.344   0.1797  
BK:PSPG       3.7072     2.1293   1.741   0.0825 .
BK:PSSF       2.3330     1.1623   2.007   0.0454 *
BK:PSSG       4.1791     1.7198   2.430   0.0156 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 209.6 on 369 degrees of freedom
Multiple R-squared:  0.07059,   Adjusted R-squared:  0.04793 
F-statistic: 3.114 on 9 and 369 DF,  p-value: 0.001263

interplot(m = fit7, var1 = "BK", var2 = "PS") +
    xlab("PS") +
    ylab("Estimated Coefficient for\nBK scores") +
    theme_bw()

Nonlinear models

# read data
ucla <- read.csv("http://www.ats.ucla.edu/stat/data/binary.csv")
head(ucla)

  admit gre  gpa rank
1     0 380 3.61    3
2     1 660 3.67    3
3     1 800 4.00    1
4     1 640 3.19    4
5     0 520 2.93    4
6     1 760 3.00    2

str(ucla)

'data.frame':   400 obs. of  4 variables:
 $ admit: int  0 1 1 1 0 1 1 0 1 0 ...
 $ gre  : int  380 660 800 640 520 760 560 400 540 700 ...
 $ gpa  : num  3.61 3.67 4 3.19 2.93 3 2.98 3.08 3.39 3.92 ...
 $ rank : int  3 3 1 4 4 2 1 2 3 2 ...

# logistic regression
ucla$rank <- factor(ucla$rank)
fit8 <- glm(admit ~ gre + gpa + rank, data = ucla, family = "binomial")
summary(fit8)


Call:
glm(formula = admit ~ gre + gpa + rank, family = "binomial", 
    data = ucla)

Deviance Residuals: 
    Min       1Q   Median       3Q      Max  
-1.6268  -0.8662  -0.6388   1.1490   2.0790  

Coefficients:
             Estimate Std. Error z value Pr(>|z|)    
(Intercept) -3.989979   1.139951  -3.500 0.000465 ***
gre          0.002264   0.001094   2.070 0.038465 *  
gpa          0.804038   0.331819   2.423 0.015388 *  
rank2       -0.675443   0.316490  -2.134 0.032829 *  
rank3       -1.340204   0.345306  -3.881 0.000104 ***
rank4       -1.551464   0.417832  -3.713 0.000205 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

(Dispersion parameter for binomial family taken to be 1)

    Null deviance: 499.98  on 399  degrees of freedom
Residual deviance: 458.52  on 394  degrees of freedom
AIC: 470.52

Number of Fisher Scoring iterations: 4

visreg(fit8, "gre", scale = "response", rug = 2,xlab = "GRE score", ylab = "P(Admit)")

fit9 <- glm(admit ~ gre + gpa + rank, data = ucla, family = "binomial")
visreg(fit9, "rank", scale = "response", rug = 2, xlab = "Rank", ylab = "P(Admit)")