#Load Custom Functions

### Johnson Neyman Technique ###

  jnt = function(.lm, predictor, moderator, alpha=.05) {
    require(stringi)
    b1 = coef(.lm)[predictor]
    b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))]
    se_b1 = coef(summary(.lm))[predictor, 2]
    se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2]
    COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))]
    t_crit = qt(1-alpha/2, .lm$df.residual)
    # see Bauer & Curran, 2005
    a = t_crit^2 * se_b3^2 - b3^2
    b = 2 * (t_crit^2 * COV_b1b3 - b1 * b3)
    c = t_crit^2 * se_b1^2 - b1^2
    jn = c(
      (-b - sqrt(b^2 - 4 * a * c)) / (2 * a),
      (-b + sqrt(b^2 - 4 * a * c)) / (2 * a)
    )
    JN = sort(unname(jn))
    JN = JN[JN>=min(.lm$model[,moderator]) & JN<=max(.lm$model[,moderator])]
    JN
  }
2*qf(0.95,2,306)
## [1] 6.050506
### Plot Johnson Neyman Technique ###

  theta_plot = function(.lm, predictor, moderator, alpha=.05, jn=F) {
    require(dplyr)
    require(ggplot2); theme_set(theme_minimal())
    require(stringi)
    .data = data_frame(b1 = coef(.lm)[predictor],
                       b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       Z = quantile(.lm$model[,moderator], seq(0,1,.01)),
                       theta = b1 + Z * b3,
                       se_b1 = coef(summary(.lm))[predictor, 2],
                       COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2],
                       se_theta = sqrt(se_b1^2 + 2 * Z * COV_b1b3 + Z^2 * se_b3^2),
                       #ci.lo_theta = theta+qt(alpha/2, .lm$df.residual)*se_theta,
                       #ci.hi_theta = theta+qt(1-alpha/2, .lm$df.residual)*se_theta)
                      #USe for simulataneous Cit
                       ci.lo_theta = theta-2*qf(1-alpha,df1=2,df2=(length(.lm$residuals)-4))*se_theta, #This does not seem quite right
                       ci.hi_theta = theta+2*qf(1-alpha,df1=2,df2=(length(.lm$residuals)-4))*se_theta) #This does not seem quite right
                
    if (jn) {
      JN = jnt(.lm=.lm, predictor=predictor, moderator=moderator, alpha=alpha)
      JN_lines = geom_vline(xintercept=JN, linetype=2)
      JN_regions = ifelse(length(JN) == 0, "no significance regions", paste(round(JN,2), collapse = "; "))
      Xlab = paste0(moderator, " (JN Significance Regions: ", JN_regions,")")
    }
    else {
      Xlab = moderator
      JN_lines = NULL
    }
    .data %>%
      ggplot(aes(Z, theta, ymin=ci.lo_theta, ymax=ci.hi_theta)) + geom_ribbon(alpha = .2) + geom_line() + ggtitle(paste("Conditional Effect of", predictor, "as function of", moderator)) + geom_hline(yintercept=0, linetype=2) + labs(x = Xlab, y=expression(theta)) + JN_lines
  }

Exercise 14

library(readr)
## Warning: package 'readr' was built under R version 3.3.2
library(ggplot2)
library(plyr)
library(car)
library(lsmeans)
ch9e_3 <- read_csv("~/R/DataHouse/Delaney/Week8_stat_survey/ch9e_3.csv")
ch9e_3$Tx <- ifelse(ch9e_3$Treatment==0,"Control",ifelse(ch9e_3$Treatment==1,"Bloomers",NA))
#Means of the IQ PRe
  IQpreMEans <- ddply(ch9e_3,c("Tx"),summarise,m=mean(IQPre))
  IQpreMEans
##         Tx        m
## 1 Bloomers 101.1875
## 2  Control  97.7561
#ANCOHET Regression MODEL
options(contrasts=c("contr.treatment", "contr.poly"))
  mod.het <- lm(IQ8~IQPre*Treatment,data=ch9e_3)
  summary(mod.het)
## 
## Call:
## lm(formula = IQ8 ~ IQPre * Treatment, data = ch9e_3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.334  -8.629  -0.719   7.590  57.597 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      30.36587    4.56698   6.649 1.36e-10 ***
## IQPre             0.77799    0.04591  16.945  < 2e-16 ***
## Treatment       -14.83283    9.91252  -1.496   0.1356    
## IQPre:Treatment   0.19096    0.09695   1.970   0.0498 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.24 on 306 degrees of freedom
## Multiple R-squared:  0.5846, Adjusted R-squared:  0.5806 
## F-statistic: 143.6 on 3 and 306 DF,  p-value: < 2.2e-16
#ANCOHET ANOVA Model  
  options(contrasts=c("contr.sum", "contr.poly"))
  mod.het2 <- lm(IQ8~IQPre*Tx,data=ch9e_3)
  Anova(mod.het2,type=3)
## Anova Table (Type III tests)
## 
## Response: IQ8
##             Sum Sq  Df  F value    Pr(>F)    
## (Intercept)   3759   1  21.4406 5.402e-06 ***
## IQPre        56921   1 324.6586 < 2.2e-16 ***
## Tx             393   1   2.2391   0.13559    
## IQPre:Tx       680   1   3.8793   0.04979 *  
## Residuals    53649 306                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ANCOVA Model
  mod.ANCOVA <- lm(IQ8~Tx+IQPre,data=ch9e_3)
  Anova(mod.ANCOVA,type=3)
## Anova Table (Type III tests)
## 
## Response: IQ8
##             Sum Sq  Df  F value    Pr(>F)    
## (Intercept)   8268   1  46.7226 4.414e-11 ***
## Tx             953   1   5.3827   0.02099 *  
## IQPre        72234   1 408.1712 < 2.2e-16 ***
## Residuals    54330 307                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regression for Control

\(\hat { Y } =30.36+0.779{ x }_{ i }^{ }\)

Regression for Bloomers

\(\hat { Y } =15.53+0.96{ x }_{ i }^{ }\)

Plot of the Regresson Lines

p1 <- ggplot(ch9e_3,aes(x=IQPre,y=IQ8,color=Tx)) + geom_point(size =1.5) +
  geom_abline(slope=0.77799,intercept=30.36,size=2,color="#00BFC4") + 
  geom_abline(slope=0.96,intercept=15.53,size=2,color="#F8766D")
print(p1)

Plot with the center of accuracy and computation of Center of Accuracy

Control <- subset(ch9e_3,Tx=="Control")
Bloomer <- subset(ch9e_3,Tx=="Bloomers")
.n1 <- sum((Bloomer$IQPre-mean(Bloomer$IQPre))^2)*mean(Control$IQPre)
.n2 <- sum((Control$IQPre-mean(Control$IQPre))^2)*mean(Bloomer$IQPre)
.d1 <- sum((Control$IQPre-mean(Control$IQPre))^2)+sum((Bloomer$IQPre-mean(Bloomer$IQPre))^2)
Ca <- (.n1+.n2)/.d1
Ca
## [1] 100.418
p1 <- p1 + geom_vline(xintercept=Ca,size=1.25)
print(p1)

The Center of Accuracy is 100.42

Test at the center of accuracy

S_het <- sqrt(175)
ddply(ch9e_3,"Tx",summarise,length(Tx))
##         Tx ..1
## 1 Bloomers  64
## 2  Control 246
n.1 <- 246
n.2 <- 64

  a <-(1/n.1)+(1/n.2)
  .na <- (mean(Bloomer$IQPre)-mean(Control$IQPre))^2
  .na1 <- (Ca-mean(Control$IQPre))^2
  .na2 <- (Ca-mean(Bloomer$IQPre))^2
  .da1 <- sum((Control$IQPre-mean(Control$IQPre)^2))
  .da2 <- sum((Bloomer$IQPre-mean(Bloomer$IQPre)^2))
  
  b <- (.na1/.da1)+(.na2/.da2)
  
Sigma_t <- S_het*sqrt(a+b)
#Sigma_t

Y_control <- 30.36+0.77799*Ca
Y_Bloomer <- 15.53+0.96895*Ca
t <- (Y_control-Y_Bloomer)/Sigma_t
#t

sig <- 1-pf(t^2,df1=1,df2=n.1+n.2-4)

Test statistic at the Center of Accuracy for the Anchoet Model

jnt(mod.het,predictor="Treatment",moderator="IQPre")
## [1] 97.15018
theta_plot(mod.het,predictor="Treatment",moderator="IQPre")

Exercise 14

ch9e_14 <- read_csv("~/R/DataHouse/Delaney/Week8_stat_survey/ch9e_14.csv")
ch9e_14$Cond <- with(ch9e_14,ifelse(Condition==1,"If_What",ifelse(Condition==2,"If_Why",ifelse(Condition==3,"Dist_What",ifelse(Condition==4,"Dist_Why",NA)))))

ANOVA/ANCOVA

#ANOVA
  options(contrasts=c("contr.sum","contr.poly"))
  mod19.ANOVA <- lm(Anger~Cond,data=ch9e_14)
  Anova(mod19.ANOVA,type=3)
## Anova Table (Type III tests)
## 
## Response: Anger
##              Sum Sq  Df  F value  Pr(>F)    
## (Intercept) 1738.08   1 807.5203 < 2e-16 ***
## Cond          16.18   3   2.5058 0.06125 .  
## Residuals    325.01 151                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#ANCOVA
  options(contrasts=c("contr.sum","contr.poly"))
  mod19.ANCOVA <- lm(Anger~EmotClose+Cond,data=ch9e_14)
  Anova(mod19.ANCOVA,type=3)
## Anova Table (Type III tests)
## 
## Response: Anger
##              Sum Sq  Df  F value    Pr(>F)    
## (Intercept) 207.020   1 109.4007 < 2.2e-16 ***
## EmotClose    41.161   1  21.7516 6.812e-06 ***
## Cond         16.450   3   2.8977   0.03709 *  
## Residuals   283.846 150                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Means for ANOVA model
  lsmean.ANOVA <-   lsmeans(mod19.ANOVA, specs="Cond")
  print(lsmean.ANOVA)
##  Cond        lsmean        SE  df lower.CL upper.CL
##  Dist_What 3.461538 0.2349229 151 2.997378 3.925699
##  Dist_Why  2.820513 0.2349229 151 2.356352 3.284673
##  If_What   3.421053 0.2379939 151 2.950824 3.891281
##  If_Why    3.692308 0.2349229 151 3.228147 4.156468
## 
## Confidence level used: 0.95
  ddply(ch9e_14,"Cond",summarise,MEAN=mean(Anger))
##        Cond     MEAN
## 1 Dist_What 3.461538
## 2  Dist_Why 2.820513
## 3   If_What 3.421053
## 4    If_Why 3.692308
#Means for ANCOVA model
  lsmean.ANCOVA <-  lsmeans(mod19.ANCOVA, specs="Cond")
  print(lsmean.ANCOVA)
##  Cond        lsmean        SE  df lower.CL upper.CL
##  Dist_What 3.470766 0.2202830 150 3.035507 3.906024
##  Dist_Why  2.823355 0.2202750 150 2.388113 3.258598
##  If_What   3.392643 0.2232368 150 2.951548 3.833738
##  If_Why    3.707920 0.2202995 150 3.272628 4.143211
## 
## Confidence level used: 0.95
  contrast(lsmean.ANCOVA,list('Custom Contrast'=c(-1/3,1,-1/3,-1/3)))
##  contrast          estimate        SE  df t.ratio p.value
##  Custom Contrast -0.7004209 0.2546308 150  -2.751  0.0067
  SD <- sqrt((1.50^2*38+1.52^2*39+1.50^2*39+1.33^2*39)/sum(38+38+39+39))
  
  d = 0.70/SD