I have three continuous variables that range from 1 to 7. There is a negative interaction between the two predictor variables that I am trying to understand better.
Download this text file: https://www.dropbox.com/s/2bb747j20ho8wr5/interaction-data.txt?dl=0
mydata <- read.csv(file="C:/Users/abc/Downloads/interaction-data.txt", sep="\t",header=TRUE, stringsAsFactors = FALSE)
head(mydata)
## pred1 pred2 out
## 1 3.8 5.6875 5.000000
## 2 6.0 6.1250 6.333333
## 3 5.4 5.7500 6.333333
## 4 4.0 6.1250 6.666667
## 5 6.0 5.6875 7.000000
## 6 5.0 4.8125 7.000000
psych::describe(mydata)
## vars n mean sd median trimmed mad min max range skew kurtosis
## pred1 1 200 4.46 1.46 4.80 4.55 1.48 1 7 6 -0.48 -0.45
## pred2 2 200 4.74 1.15 4.81 4.80 1.11 1 7 6 -0.48 0.23
## out 3 200 5.56 1.81 6.00 5.92 1.48 1 7 6 -1.32 0.69
## se
## pred1 0.10
## pred2 0.08
## out 0.13
mod1 <- lm(out~pred1*pred2,data=mydata)
summary(mod1)
##
## Call:
## lm(formula = out ~ pred1 * pred2, data = mydata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.8828 -0.4969 0.0669 0.7127 3.6755
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2.13336 0.81334 -2.623 0.009402 **
## pred1 0.88200 0.21371 4.127 5.43e-05 ***
## pred2 1.47723 0.20750 7.119 2.01e-11 ***
## pred1:pred2 -0.14453 0.04292 -3.367 0.000914 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.242 on 196 degrees of freedom
## Multiple R-squared: 0.5387, Adjusted R-squared: 0.5316
## F-statistic: 76.3 on 3 and 196 DF, p-value: < 2.2e-16
library(rsm)
par(mfrow=c(1,3))
image(mod1, pred2 ~ pred1)
contour(mod1, pred2 ~ pred1)
persp(mod1, pred2 ~ pred1, zlab = "Outcome",col = rainbow(50), contours = "colors")
# split pred2 into five levels
mydata$pred2_level <- cut(mydata$pred2, 5)
dplyr::count(mydata,pred2_level)
## Source: local data frame [5 x 2]
##
## pred2_level n
## (fctr) (int)
## 1 (0.994,2.2] 7
## 2 (2.2,3.4] 16
## 3 (3.4,4.6] 65
## 4 (4.6,5.8] 79
## 5 (5.8,7.01] 33
# plot it out
library(ggplot2)
ggplot(mydata,aes(pred1, out, colour=factor(pred2_level))) +
geom_jitter(alpha=.3) +
geom_smooth(method=lm) +
coord_cartesian(xlim = c(1,7), ylim= c(1,7))