Analysis problem

I have three continuous variables that range from 1 to 7. There is a negative interaction between the two predictor variables that I am trying to understand better.

getting the data

Download this text file: https://www.dropbox.com/s/2bb747j20ho8wr5/interaction-data.txt?dl=0

mydata <- read.csv(file="C:/Users/abc/Downloads/interaction-data.txt", sep="\t",header=TRUE, stringsAsFactors = FALSE)
head(mydata)
##   pred1  pred2      out
## 1   3.8 5.6875 5.000000
## 2   6.0 6.1250 6.333333
## 3   5.4 5.7500 6.333333
## 4   4.0 6.1250 6.666667
## 5   6.0 5.6875 7.000000
## 6   5.0 4.8125 7.000000
psych::describe(mydata)
##       vars   n mean   sd median trimmed  mad min max range  skew kurtosis
## pred1    1 200 4.46 1.46   4.80    4.55 1.48   1   7     6 -0.48    -0.45
## pred2    2 200 4.74 1.15   4.81    4.80 1.11   1   7     6 -0.48     0.23
## out      3 200 5.56 1.81   6.00    5.92 1.48   1   7     6 -1.32     0.69
##         se
## pred1 0.10
## pred2 0.08
## out   0.13
mod1 <- lm(out~pred1*pred2,data=mydata)
summary(mod1)
## 
## Call:
## lm(formula = out ~ pred1 * pred2, data = mydata)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8828 -0.4969  0.0669  0.7127  3.6755 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.13336    0.81334  -2.623 0.009402 ** 
## pred1        0.88200    0.21371   4.127 5.43e-05 ***
## pred2        1.47723    0.20750   7.119 2.01e-11 ***
## pred1:pred2 -0.14453    0.04292  -3.367 0.000914 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.242 on 196 degrees of freedom
## Multiple R-squared:  0.5387, Adjusted R-squared:  0.5316 
## F-statistic:  76.3 on 3 and 196 DF,  p-value: < 2.2e-16

Interaction plots w/rsm

library(rsm)
par(mfrow=c(1,3))
image(mod1, pred2 ~ pred1)
contour(mod1, pred2 ~ pred1)
persp(mod1, pred2 ~ pred1, zlab = "Outcome",col = rainbow(50), contours = "colors")

Interaction plots w/ggplot2

# split pred2 into five levels
mydata$pred2_level <- cut(mydata$pred2, 5)
dplyr::count(mydata,pred2_level)
## Source: local data frame [5 x 2]
## 
##   pred2_level     n
##        (fctr) (int)
## 1 (0.994,2.2]     7
## 2   (2.2,3.4]    16
## 3   (3.4,4.6]    65
## 4   (4.6,5.8]    79
## 5  (5.8,7.01]    33
# plot it out
library(ggplot2)
ggplot(mydata,aes(pred1, out, colour=factor(pred2_level))) +
  geom_jitter(alpha=.3) +
  geom_smooth(method=lm) +
  coord_cartesian(xlim = c(1,7), ylim= c(1,7))