#Load Custom Functions

### Johnson Neyman Technique ###

  jnt = function(.lm, predictor, moderator, alpha=.05) {
    require(stringi)
    b1 = coef(.lm)[predictor]
    b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))]
    se_b1 = coef(summary(.lm))[predictor, 2]
    se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2]
    COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))]
    t_crit = qt(1-alpha/2, .lm$df.residual)
    # see Bauer & Curran, 2005
    a = t_crit^2 * se_b3^2 - b3^2
    b = 2 * (t_crit^2 * COV_b1b3 - b1 * b3)
    c = t_crit^2 * se_b1^2 - b1^2
    jn = c(
      (-b - sqrt(b^2 - 4 * a * c)) / (2 * a),
      (-b + sqrt(b^2 - 4 * a * c)) / (2 * a)
    )
    JN = sort(unname(jn))
    JN = JN[JN>=min(.lm$model[,moderator]) & JN<=max(.lm$model[,moderator])]
    JN
  }
2*qf(0.95,2,306)

## [1] 6.050506

### Plot Johnson Neyman Technique ###

  theta_plot = function(.lm, predictor, moderator, alpha=.05, jn=F) {
    require(dplyr)
    require(ggplot2); theme_set(theme_minimal())
    require(stringi)
    .data = data_frame(b1 = coef(.lm)[predictor],
                       b3 = coef(.lm)[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       Z = quantile(.lm$model[,moderator], seq(0,1,.01)),
                       theta = b1 + Z * b3,
                       se_b1 = coef(summary(.lm))[predictor, 2],
                       COV_b1b3 = vcov(.lm)[predictor, stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor))],
                       se_b3 = coef(summary(.lm))[stri_startswith_fixed(names(coef(.lm)), paste0(predictor,":")) | stri_endswith_fixed(names(coef(.lm)), paste0(":",predictor)), 2],
                       se_theta = sqrt(se_b1^2 + 2 * Z * COV_b1b3 + Z^2 * se_b3^2),
                       #ci.lo_theta = theta+qt(alpha/2, .lm$df.residual)*se_theta,
                       #ci.hi_theta = theta+qt(1-alpha/2, .lm$df.residual)*se_theta)
                      #USe for simulataneous Cit
                       ci.lo_theta = theta-2*qf(1-alpha,df1=2,df2=(length(.lm$residuals)-4))*se_theta, #This does not seem quite right
                       ci.hi_theta = theta+2*qf(1-alpha,df1=2,df2=(length(.lm$residuals)-4))*se_theta) #This does not seem quite right
                
    if (jn) {
      JN = jnt(.lm=.lm, predictor=predictor, moderator=moderator, alpha=alpha)
      JN_lines = geom_vline(xintercept=JN, linetype=2)
      JN_regions = ifelse(length(JN) == 0, "no significance regions", paste(round(JN,2), collapse = "; "))
      Xlab = paste0(moderator, " (JN Significance Regions: ", JN_regions,")")
    }
    else {
      Xlab = moderator
      JN_lines = NULL
    }
    .data %>%
      ggplot(aes(Z, theta, ymin=ci.lo_theta, ymax=ci.hi_theta)) + geom_ribbon(alpha = .2) + geom_line() + ggtitle(paste("Conditional Effect of", predictor, "as function of", moderator)) + geom_hline(yintercept=0, linetype=2) + labs(x = Xlab, y=expression(theta)) + JN_lines
  }

Exercise 14

library(readr)

## Warning: package 'readr' was built under R version 3.3.2

library(ggplot2)
library(plyr)
library(car)
library(lsmeans)
ch9e_3 <- read_csv("~/R/DataHouse/Delaney/Week8_stat_survey/ch9e_3.csv")
ch9e_3$Tx <- ifelse(ch9e_3$Treatment==0,"Control",ifelse(ch9e_3$Treatment==1,"Bloomers",NA))

#Means of the IQ PRe
  IQpreMEans <- ddply(ch9e_3,c("Tx"),summarise,m=mean(IQPre))
  IQpreMEans

##         Tx        m
## 1 Bloomers 101.1875
## 2  Control  97.7561

#ANCOHET Regression MODEL
options(contrasts=c("contr.treatment", "contr.poly"))
  mod.het <- lm(IQ8~IQPre*Treatment,data=ch9e_3)
  summary(mod.het)

## 
## Call:
## lm(formula = IQ8 ~ IQPre * Treatment, data = ch9e_3)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -27.334  -8.629  -0.719   7.590  57.597 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      30.36587    4.56698   6.649 1.36e-10 ***
## IQPre             0.77799    0.04591  16.945  < 2e-16 ***
## Treatment       -14.83283    9.91252  -1.496   0.1356    
## IQPre:Treatment   0.19096    0.09695   1.970   0.0498 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.24 on 306 degrees of freedom
## Multiple R-squared:  0.5846, Adjusted R-squared:  0.5806 
## F-statistic: 143.6 on 3 and 306 DF,  p-value: < 2.2e-16

#ANCOHET ANOVA Model  
  options(contrasts=c("contr.sum", "contr.poly"))
  mod.het2 <- lm(IQ8~IQPre*Tx,data=ch9e_3)
  Anova(mod.het2,type=3)

## Anova Table (Type III tests)
## 
## Response: IQ8
##             Sum Sq  Df  F value    Pr(>F)    
## (Intercept)   3759   1  21.4406 5.402e-06 ***
## IQPre        56921   1 324.6586 < 2.2e-16 ***
## Tx             393   1   2.2391   0.13559    
## IQPre:Tx       680   1   3.8793   0.04979 *  
## Residuals    53649 306                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#ANCOVA Model
  mod.ANCOVA <- lm(IQ8~Tx+IQPre,data=ch9e_3)
  Anova(mod.ANCOVA,type=3)

## Anova Table (Type III tests)
## 
## Response: IQ8
##             Sum Sq  Df  F value    Pr(>F)    
## (Intercept)   8268   1  46.7226 4.414e-11 ***
## Tx             953   1   5.3827   0.02099 *  
## IQPre        72234   1 408.1712 < 2.2e-16 ***
## Residuals    54330 307                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Regression for Control

\(\hat { Y } =30.36+0.779{ x }_{ i }^{ }\)

Regression for Bloomers

\(\hat { Y } =15.53+0.96{ x }_{ i }^{ }\)

Plot of the Regresson Lines

p1 <- ggplot(ch9e_3,aes(x=IQPre,y=IQ8,color=Tx)) + geom_point(size =1.5) +
  geom_abline(slope=0.77799,intercept=30.36,size=2,color="#00BFC4") + 
  geom_abline(slope=0.96,intercept=15.53,size=2,color="#F8766D")
print(p1)

Plot with the center of accuracy and computation of Center of Accuracy

Control <- subset(ch9e_3,Tx=="Control")
Bloomer <- subset(ch9e_3,Tx=="Bloomers")
.n1 <- sum((Bloomer$IQPre-mean(Bloomer$IQPre))^2)*mean(Control$IQPre)
.n2 <- sum((Control$IQPre-mean(Control$IQPre))^2)*mean(Bloomer$IQPre)
.d1 <- sum((Control$IQPre-mean(Control$IQPre))^2)+sum((Bloomer$IQPre-mean(Bloomer$IQPre))^2)
Ca <- (.n1+.n2)/.d1
Ca

## [1] 100.418

p1 <- p1 + geom_vline(xintercept=Ca,size=1.25)
print(p1)

The Center of Accuracy is 100.42

Test at the center of accuracy

S_het <- sqrt(175)
ddply(ch9e_3,"Tx",summarise,length(Tx))

##         Tx ..1
## 1 Bloomers  64
## 2  Control 246

n.1 <- 246
n.2 <- 64

  a <-(1/n.1)+(1/n.2)
  .na <- (mean(Bloomer$IQPre)-mean(Control$IQPre))^2
  .na1 <- (Ca-mean(Control$IQPre))^2
  .na2 <- (Ca-mean(Bloomer$IQPre))^2
  .da1 <- sum((Control$IQPre-mean(Control$IQPre)^2))
  .da2 <- sum((Bloomer$IQPre-mean(Bloomer$IQPre)^2))
  
  b <- (.na1/.da1)+(.na2/.da2)
  
Sigma_t <- S_het*sqrt(a+b)
#Sigma_t

Y_control <- 30.36+0.77799*Ca
Y_Bloomer <- 15.53+0.96895*Ca
t <- (Y_control-Y_Bloomer)/Sigma_t
#t

sig <- 1-pf(t^2,df1=1,df2=n.1+n.2-4)

Test statistic at the Center of Accuracy for the Anchoet Model

SE: 1.86
T-statistic: -2.34
p-value: 0.02

jnt(mod.het,predictor="Treatment",moderator="IQPre")

## [1] 97.15018

theta_plot(mod.het,predictor="Treatment",moderator="IQPre")

Exercise 14

ch9e_14 <- read_csv("~/R/DataHouse/Delaney/Week8_stat_survey/ch9e_14.csv")
ch9e_14$Cond <- with(ch9e_14,ifelse(Condition==1,"If_What",ifelse(Condition==2,"If_Why",ifelse(Condition==3,"Dist_What",ifelse(Condition==4,"Dist_Why",NA)))))

ANOVA/ANCOVA

#ANOVA
  options(contrasts=c("contr.sum","contr.poly"))
  mod19.ANOVA <- lm(Anger~Cond,data=ch9e_14)
  Anova(mod19.ANOVA,type=3)

## Anova Table (Type III tests)
## 
## Response: Anger
##              Sum Sq  Df  F value  Pr(>F)    
## (Intercept) 1738.08   1 807.5203 < 2e-16 ***
## Cond          16.18   3   2.5058 0.06125 .  
## Residuals    325.01 151                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#ANCOVA
  options(contrasts=c("contr.sum","contr.poly"))
  mod19.ANCOVA <- lm(Anger~EmotClose+Cond,data=ch9e_14)
  Anova(mod19.ANCOVA,type=3)

## Anova Table (Type III tests)
## 
## Response: Anger
##              Sum Sq  Df  F value    Pr(>F)    
## (Intercept) 207.020   1 109.4007 < 2.2e-16 ***
## EmotClose    41.161   1  21.7516 6.812e-06 ***
## Cond         16.450   3   2.8977   0.03709 *  
## Residuals   283.846 150                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Means for ANOVA model
  lsmean.ANOVA <-   lsmeans(mod19.ANOVA, specs="Cond")
  print(lsmean.ANOVA)

##  Cond        lsmean        SE  df lower.CL upper.CL
##  Dist_What 3.461538 0.2349229 151 2.997378 3.925699
##  Dist_Why  2.820513 0.2349229 151 2.356352 3.284673
##  If_What   3.421053 0.2379939 151 2.950824 3.891281
##  If_Why    3.692308 0.2349229 151 3.228147 4.156468
## 
## Confidence level used: 0.95

  ddply(ch9e_14,"Cond",summarise,MEAN=mean(Anger))

##        Cond     MEAN
## 1 Dist_What 3.461538
## 2  Dist_Why 2.820513
## 3   If_What 3.421053
## 4    If_Why 3.692308

#Means for ANCOVA model
  lsmean.ANCOVA <-  lsmeans(mod19.ANCOVA, specs="Cond")
  print(lsmean.ANCOVA)

##  Cond        lsmean        SE  df lower.CL upper.CL
##  Dist_What 3.470766 0.2202830 150 3.035507 3.906024
##  Dist_Why  2.823355 0.2202750 150 2.388113 3.258598
##  If_What   3.392643 0.2232368 150 2.951548 3.833738
##  If_Why    3.707920 0.2202995 150 3.272628 4.143211
## 
## Confidence level used: 0.95

  contrast(lsmean.ANCOVA,list('Custom Contrast'=c(-1/3,1,-1/3,-1/3)))

##  contrast          estimate        SE  df t.ratio p.value
##  Custom Contrast -0.7004209 0.2546308 150  -2.751  0.0067

  SD <- sqrt((1.50^2*38+1.52^2*39+1.50^2*39+1.33^2*39)/sum(38+38+39+39))
  
  d = 0.70/SD

Unstanderdized Effect Size Custom Contrast = \(0.70\)
Weighted SD = \(1.4690369\)
Estimated Effect Size = \(0.4765027\)

ch3ex9

Eric Kruger

March 1, 2017

Exercise 14

Regression for Control

Regression for Bloomers

Plot of the Regresson Lines

Plot with the center of accuracy and computation of Center of Accuracy

Test at the center of accuracy

Exercise 14

ANOVA/ANCOVA