library(knitr)

## Warning: package 'knitr' was built under R version 3.5.2

library(psych)

## Warning: package 'psych' was built under R version 3.5.2

library(car)

## Warning: package 'car' was built under R version 3.5.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 3.5.2

## 
## Attaching package: 'car'

## The following object is masked from 'package:psych':
## 
##     logit

library(lmSupport)

## Warning: package 'lmSupport' was built under R version 3.5.2

source('http://psych.colorado.edu/~jclab/R/mcSummaryLm.R')

setwd("C:/Users/Dani Grant/Desktop")
d <- read.csv("nursery.csv",header=T)

setwd("C:/Users/Dani Grant/Dropbox/CU Boulder/grad_stats/grad stats final project")
d2 <- read.csv("final_wide.csv",header=T)

Question 1: NURSERY Data

The dataset NURSERY contains data from a study of the performance of first-grade students (i.e. PEABODY scores) as a function of whether or not they had attended nursery school (NURSERY with levels YES and NO) and whether their parents had college educations (COLLEGE with levels NONE, ONE, and BOTH. Use a two-way analysis of variance to examine whether Peabody scores are related to nursery school attendance and/or college education of parents.

A. Specify your contrast codes. Say IN WORDS what question each code asks. Note: There is an obvious ordering of the levels of the COLLEGE variable; you should examine a linear trend.

ANSWER:

codes for homework 15 #1a

Nursery: This is the test to see if there is a signficant difference in Peabody score between the average of the two groups for children that attended nursery school and did not attend nursery school.

CollegeLin: This tests if there is a signficant linear effect on Peabody score for each additional parent that attends college.

CollegeQuad: This tests if there is a significant quadratic effect on Peabody score by comparing children who have one parent compared to the average of the two groups of children with neither parent attending college or both attending college.

Nurs*Lin: This tests if there is an interaction between the linear effect of college education and nursery attendance. So, does the effect of having an additional parent who attended college influencing peabody scores depend on whether or not the child attended Nursery school?

Nurs*Quad: This tests if there is an interaction between the quadratic effect of college education and preschool attendance. So, does the difference in the effect of having one parent with a college education compared to two or neither parents depend on whether or not the child attended preschool? —-

B. Run a regression in which your contrast codes predict PEABODY scores. Because the cell counts are so unequal, make sure to examine the tolerances. What do the tolerances tell you?

d$nurs <- (-1/2)*(d$NURSERY == "yes") + (1/2)*(d$NURSERY == "no")
d$collegeLin <- (-1/2)*(d$COLLEGE == "none") + (0)*(d$COLLEGE == "one") + (1/2)*(d$COLLEGE == "both")
d$collegeQuad <- (2/3)*(d$COLLEGE == "one") + (-1/3)*(d$COLLEGE == "both" | d$COLLEGE == "none")
d$nursLin <- d$nurs*d$collegeLin
d$nursQuad <- d$nurs*d$collegeQuad

m1 <- lm(PEABODY ~ nurs + collegeLin + collegeQuad + nursLin + nursQuad, data = d)

mcSummary(m1)

## $call
## lm(formula = PEABODY ~ nurs + collegeLin + collegeQuad + nursLin + 
##     nursQuad, data = d)
## 
## $anova
##                  SS df      MS EtaSq     F     p
## Model       998.205  5 199.641 0.224 2.535 0.042
## Error      3464.775 44  78.745    NA    NA    NA
## Corr Total 4462.980 49  91.081    NA    NA    NA
## 
## $extras
##        RMSE AdjEtaSq
## Model 8.874    0.135
## 
## $coefficients
##                Est StErr      t     SSR(3) EtaSq   tol  CI_2.5 CI_97.5
## (Intercept) 75.932 1.626 46.706 171776.910 0.980    NA  72.655  79.208
## nurs        -8.114 3.251 -2.495    490.359 0.124 0.621 -14.667  -1.561
## collegeLin   8.771 4.528  1.937    295.402 0.079 0.749  -0.356  17.897
## collegeQuad -1.298 2.900 -0.448     15.778 0.005 0.749  -7.142   4.546
## nursLin     -2.792 9.057 -0.308      7.482 0.002 0.619 -21.044  15.461
## nursQuad     8.971 5.799  1.547    188.431 0.052 0.677  -2.717  20.658
##                 p
## (Intercept) 0.000
## nurs        0.016
## collegeLin  0.059
## collegeQuad 0.657
## nursLin     0.759
## nursQuad    0.129

ANSWER:

If all the cell sample sizes are equal in a two-way ANOVA, tolerance would be equal to 1.0 for all predictors Here, because our cell sample sizes are not equal, the tolerances are not equal to 1.0 because the variance for the “nurs” code can be in-part explained by the other predictors (thus, decreasing tolerance).

C. Interpret the slopes for the following effects in terms of the means involved

describe(d$PEABODY[d$NURSERY == "yes" & d$COLLEGE == "none"])

##    vars  n  mean   sd median trimmed  mad min max range  skew kurtosis
## X1    1 12 76.83 7.58   77.5    77.4 5.19  60  88    28 -0.51    -0.22
##      se
## X1 2.19

describe(d$PEABODY[d$NURSERY == "yes" & d$COLLEGE == "one"])

##    vars  n  mean   sd median trimmed  mad min max range  skew kurtosis  se
## X1    1 15 76.13 9.69     75      77 7.41  51  90    39 -0.92     0.65 2.5

describe(d$PEABODY[d$NURSERY == "yes" & d$COLLEGE == "both"])

##    vars n mean   sd median trimmed   mad min max range skew kurtosis   se
## X1    1 3   87 8.54     86      87 10.38  79  96    17 0.12    -2.33 4.93

describe(d$PEABODY[d$NURSERY == "no" & d$COLLEGE == "none"])

##    vars n  mean   sd median trimmed  mad min max range skew kurtosis   se
## X1    1 8 67.12 6.15   65.5   67.12 2.22  63  82    19 1.69     1.32 2.17

describe(d$PEABODY[d$NURSERY == "no" & d$COLLEGE == "one"])

##    vars  n mean    sd median trimmed  mad min max range skew kurtosis   se
## X1    1 10   74 10.15     73   73.25 5.19  60  94    34 0.57    -0.72 3.21

describe(d$PEABODY[d$NURSERY == "no" & d$COLLEGE == "both"])

##    vars n mean    sd median trimmed   mad min max range skew kurtosis  se
## X1    1 2 74.5 13.44   74.5    74.5 14.08  65  84    19    0    -2.75 9.5

codes for homework 15 #1a

i. Quadratic effect of college

ANSWER:

Qeffect <- 75.065 - ((71.975 + 80.75)/2)
Qeffect

## [1] -1.2975

-1.298 represents the difference between the average of Peabody scores for the “one” college group and the average of the “none” and “both” college groups.

ii. Nursery X linear effect of college

ANSWER:

NLeffect <- (76.833 - 67.125) - (87 - 74.5)
NLeffect

## [1] -2.792

-2.792 is the average difference in Peabody scores for each additional parent with a college education (none, one, or both) and how the mean differences is influenced by whether the child attended nursery school. So, this is the difference of differences test - is the difference between peabody score for children who DID NOT attend preschool for none vs. both parents attending college signficantly different from peabody score difference for children who DID attend preschool for none vs. both parents attending college.

iii. Nursery X Quadratic effect of college

NQeffect <- (((76.83 + 87)/2) - 76.13) - (((67.125 + 74.5)/2) - 74)
NQeffect #rounding error I think?

## [1] 8.9725

ANSWER:

8.971 is the average difference in Peabody scores when preschool attendance interact with the qudratic effect for parental college education.

So, this is again the difference of differences test - is the difference between peabody score for children who DID NOT attend preschool for average of none and both parents minus one parent attending college and then seeing if this is signficantly different from peabody score for children who DID attend preschool for average of none and both parents attending college minus one parent attending college.

D. Provide a complete ANOVA summary table. Use the ‘kable’ function. Note that the number of observations per cell in this study are very unequal; the SSR’s will not add up. Therefore to get the SSR for the total effect of COLLEGE you will need to do a regression in which you leave out both COLLEGE codes and compare the resulting SSE with the regression in which all the codes are included. The same strategy will be necessary for testing the joint interaction effect.

#thanks for making the slides, it was fun and also horrible to do this ;)

#calculate 2 df SS (run 2-df test and model compare to get them) because unequal cell sample sizes
ma.int <- lm(PEABODY ~ nurs + collegeLin + collegeQuad + nursLin + nursQuad, data = d)
mc.int <- lm(PEABODY ~ nurs + collegeLin + collegeQuad, data = d)

modelCompare(mc.int,ma.int)

## SSE (Compact) =  3670.71 
## SSE (Augmented) =  3464.775 
## Delta R-Squared =  0.04614294 
## Partial Eta-Squared (PRE) =  0.05610223 
## F(2,44) = 1.307609, p = 0.280769

ssr.int <- 3670.71 -3464.775
ssr.int

## [1] 205.935

#calculate 2 df SS (run 2-df test and model compare to get them) because unequal cell sample sizes
ma.col <- lm(PEABODY ~ nurs + collegeLin + collegeQuad + nursLin + nursQuad, data = d)
mc.col <- lm(PEABODY ~ nurs + nursLin + nursQuad, data = d)

modelCompare(mc.col,ma.col)

## SSE (Compact) =  3781.51 
## SSE (Augmented) =  3464.775 
## Delta R-Squared =  0.0709694 
## Partial Eta-Squared (PRE) =  0.08375887 
## F(2,44) = 2.011146, p = 0.1459537

ssr.col <-  3781.51 - 3464.775 
ssr.col

## [1] 316.735

nursery <- c(-1/2, 1/2, -1/2, 1/2, -1/2, 1/2)
collegeLin <- c(-1/2, -1/2, 0, 0, 1/2, 1/2)
collegeQuad <- c(-1/3, -1/3, 2/3, 2/3, -1/3, -1/3)

codeMat <- rbind(nursery,
                 collegeLin,
                 collegeQuad,
                 nursLin = nursery*collegeLin,
                 nursQuad = nursery*collegeQuad)

colnames(codeMat) <- c("yes,none", "no, none", "yes, one", "no, one", "yes, both", "no, both")

kable(codeMat, format = "html")

	yes,none	no, none	yes, one	no, one	yes, both	no, both
nursery	-0.5000000	0.5000000	-0.5000000	0.5000000	-0.5000000	0.5000000
collegeLin	-0.5000000	-0.5000000	0.0000000	0.0000000	0.5000000	0.5000000
collegeQuad	-0.3333333	-0.3333333	0.6666667	0.6666667	-0.3333333	-0.3333333
nursLin	0.2500000	-0.2500000	0.0000000	0.0000000	-0.2500000	0.2500000
nursQuad	0.1666667	-0.1666667	-0.3333333	0.3333333	0.1666667	-0.1666667

#make function to calculate betas 
betaCalc <- function (lVec, yBarVec) {
  sum(lVec * yBarVec) / sum(lVec^2)
}

#make function to calculate SSR 
ssrCalc <- function(lVec, yBarVec, nVec){
  sum(lVec*yBarVec)^2 / sum(lVec^2/nVec)
}

#create matrix for means table
mat <- matrix (c(76.83, 67.12, 76.13, 74, 87, 74.5),
               nrow = 3,
               byrow = TRUE
)

#label rows and columns in table
rownames(mat) <- c("none", "one", "both")
colnames(mat) <- c("yes", "no")

#transform table
matT <- t(mat)

#create means vector
yBar <- c(76.83, 67.12, 76.13, 74, 87, 74.5)
#create cells n table
n <- c(12, 8, 15, 10, 3, 2)

(tmat <- matrix(
   nrow = 10, ncol = 7,
   dimnames = list(
     c(
       "model",
       "nursery",
       "college",
       "linear", 
       "quadratic",
       "interaction",
       "nurs*lin",
       "nurs*quad",
       "error",
       "total"
     ),
     c(
       "b", "df", "SS", "MS", "F", "p", "PRE"
     )
   )))

##              b df SS MS  F  p PRE
## model       NA NA NA NA NA NA  NA
## nursery     NA NA NA NA NA NA  NA
## college     NA NA NA NA NA NA  NA
## linear      NA NA NA NA NA NA  NA
## quadratic   NA NA NA NA NA NA  NA
## interaction NA NA NA NA NA NA  NA
## nurs*lin    NA NA NA NA NA NA  NA
## nurs*quad   NA NA NA NA NA NA  NA
## error       NA NA NA NA NA NA  NA
## total       NA NA NA NA NA NA  NA

tdf <- as.data.frame(tmat)

#calculate betas
tdf[,'b'] <- c(
  NA, # Model line
  betaCalc(codeMat['nursery',], yBar),
  NA, # discrepancy line
  betaCalc(codeMat['collegeLin',], yBar),
  betaCalc(codeMat['collegeQuad',], yBar),
  NA, # interaction line
  betaCalc(codeMat['nursLin',], yBar),
  betaCalc(codeMat['nursQuad',], yBar),
  NA, # Error line
  NA # Total
)

#just write in df :)
tdf[,'df'] <- c(5, 1, 2, 1, 1, 2, 1, 1, 44, 49)
tdf[,'MS'] <- tdf[,'SS'] / tdf[,'df']

#calculate SSsssss
tdf[,'SS'] <- c(
  998.21, # Model line
  ssrCalc(codeMat['nursery',], yBar, n),
  316.73, # college line
  ssrCalc(codeMat['collegeLin',], yBar, n),
  ssrCalc(codeMat['collegeQuad',], yBar, n),
  205.94, # interaction line
  ssrCalc(codeMat['nursLin',], yBar, n),
  ssrCalc(codeMat['nursQuad',], yBar, n),
  3464.78, # Error line
  NA # Total
)

tdf[,'df'] <- c(5, 1, 2, 1, 1, 2, 1, 1, 44, 49)
tdf[,'MS'] <- tdf[,'SS'] / tdf[,'df']

#calculate Fs
tdf[1:8, 'F'] <- tdf[1:8, 'MS'] / tdf['error', 'MS']

#calculate PREs
#why is this not calculating PRE correctly??
#tdf[1:8, 'PRE'] <- tdf[1:8, 'SS'] / (tdf[1:8, 'SS'] + tdf['model', 'SS'])
tdf[1:8,'PRE'] <- c(.224,.124,.084,.079,.005,.056,.002,.052)

#calculate p values
ps <- pf(q = tdf[1:8, 'F'], tdf[1:8, 'df'], 44, lower.tail = F)
tdf[1:8,'p'] <- sapply(
  ps,
  function (pv) {
    if (pv < .001) {
      'p < .001*'
    } else if (pv <= .05) {
      paste0('p = ', round(pv, 3), '*')
    } else {
      paste0('p = ', round(pv, 3))
    }
  }
                    )

tdf

##                     b df          SS         MS          F          p
## model              NA  5  998.210000 199.642000 2.53529748 p = 0.042*
## nursery     -8.113333  1  490.291531 490.291531 6.22631953 p = 0.016*
## college            NA  2  316.730000 158.365000 2.01111182  p = 0.146
## linear       8.775000  1  295.682400 295.682400 3.75493555  p = 0.059
## quadratic   -1.297500  1   15.767473  15.767473 0.20023459  p = 0.657
## interaction        NA  2  205.940000 102.970000 1.30763858  p = 0.281
## nurs*lin    -2.790000  1    7.472736   7.472736 0.09489791  p = 0.759
## nurs*quad    8.975000  1  188.606341 188.606341 2.39515323  p = 0.129
## error              NA 44 3464.780000  78.745000         NA       <NA>
## total              NA 49          NA         NA         NA       <NA>
##               PRE
## model       0.224
## nursery     0.124
## college     0.084
## linear      0.079
## quadratic   0.005
## interaction 0.056
## nurs*lin    0.002
## nurs*quad   0.052
## error          NA
## total          NA

E. Test the simple effect of NURSERY on PEABODY, when COLLEGE = none.

D1 <- c(1,1,1,-1,-1,-1)
D2 <- c(0,1,0,0,1,0)
D3 <- c(0,0,1,0,0,1)
D4 <- c(0,1,0,0,-1,0)
D5 <- c(0,0,1,0,0,-1)

codeMat <- rbind(D1, D2, D3, D4, D5)

colnames(codeMat) <- c("Y0", "N0", "Y1", "N1", "Y2", "N2")

kable(codeMat, format = "html")

	Y0	N0	Y1	N1	Y2	N2
D1	1	1	1	-1	-1	-1
D2	0	1	0	0	1	0
D3	0	0	1	0	0	1
D4	0	1	0	0	-1	0
D5	0	0	1	0	0	-1

D1 <- (1)*(d$NURSERY == "yes") + (-1)*(d$NURSERY == "no")
D2 = (1)*(d$COLLEGE == "one") #default = 0
D3 = (d$COLLEGE == "both") * (1)
D4 = D1*D2
D5 = D1*D3

m.d <- lm(d$PEABODY ~ D1 + D2 + D3 + D4 + D5)
mcSummary(m.d)

## $call
## lm(formula = d$PEABODY ~ D1 + D2 + D3 + D4 + D5)
## 
## $anova
##                  SS df      MS EtaSq     F     p
## Model       998.205  5 199.641 0.224 2.535 0.042
## Error      3464.775 44  78.745    NA    NA    NA
## Corr Total 4462.980 49  91.081    NA    NA    NA
## 
## $extras
##        RMSE AdjEtaSq
## Model 8.874    0.135
## 
## $coefficients
##                Est StErr      t    SSR(3) EtaSq   tol CI_2.5 CI_97.5     p
## (Intercept) 71.979 2.025 35.542 99475.208 0.966    NA 67.898  76.061 0.000
## D1           4.854 2.025  2.397   452.408 0.115 0.400  0.773   8.936 0.021
## D2           3.087 2.717  1.136   101.682 0.029 0.853 -2.388   8.563 0.262
## D3           8.771 4.528  1.937   295.402 0.079 0.853 -0.356  17.897 0.059
## D4          -3.787 2.717 -1.394   153.015 0.042 0.435 -9.263   1.688 0.170
## D5           1.396 4.528  0.308     7.482 0.002 0.771 -7.731  10.522 0.759

ANSWER:

We sough to test the simple effect of nursery school attendance on on peabody score for children who had neither parent attend college. We found a statitically significant difference between children who did attend nursery school compared to those who did not attend nursery school for those with neither parents attending college, t(44) = 2.397, PRE = 12, p = .021. Specifically, the children who attended preschool scored on avareage 8.77 points higher on the Peabody test that those who did not attend preschool, when parent had no college education.

F. Write a brief paragraph in the style you would use for writing a journal article that summarizes the results of your analysis. Include means and a chart to help interpret the results. Look at the R code for relatively simple ways to generate both (feel free to make the chart more sophisiticated).

barplot(
  matT, 
  beside = T,
  legend.text = T,
  ylim = c(0,100),
  ylab = "Peabody Score",
  xlab = "Parent College Education",
  main = "Effect of Parent Edu. & Preschool on Peabody Score"
  )

We conducted a 2 (nursery attendance “yes” and “no”) x 3 (parental college education ) analysis of variance using contrast coded predictors.

First, we found that on average attending preschool had an effect on child’s peabody score such that those who attended preschool scored higher (M = 77.5, SD = 9.09) on the test compared to those who did not attend preschool (M = 71.3, SD = 9.19), F(1,44) = 6.23, PRE = .12, p = .016.

Next, we found a marginally significant linear effect for parental education on peabdy score such that for each additional parent who attended college there was on average a 4.39 point increase by peabody score, F(1,44) = 3.75, PRE = .079, p = .059. However, we found no significant quadratic effect for parental education on peabody score, F(1,44) = .20, p = .657.

Lastly, we found no signficant interaction between the linear or quadratic effects of parental education level and preschool attendance, t(44) = -.31, p = .759; t(44) = 1.55, p = .129.

Question 2: Your Dataset

The dataset you used for your project last semester most likely had at least two categorical variables. If it doesn’t, feel free to turn a continuous variable into one (just for practice with this).

Using two factors from your dataset, run a two-way ANOVA including a test of the interaction.

A. Specify your codes

gen <- c(1/2, 1/2, -1/2, -1/2)
a <- c(-1/2, +1/2, -1/2, +1/2)
genAge <- gen*a

codeMatrix <- rbind(gen, a, genAge)
colnames(codeMatrix) <- c("F19+", "F18", "M19+", "M18")
rownames(codeMatrix) <- c("gender", "age", "interaction")
kable(codeMatrix, format = "html")

	F19+	F18	M19+	M18
gender	0.50	0.50	-0.50	-0.50
age	-0.50	0.50	-0.50	0.50
interaction	-0.25	0.25	0.25	-0.25

d2$gender <- ifelse(d2$Q1 == "female", .5, -.5)

#18 year old = -1/2     #19 = 0      #20+ years old = +1/2
d2$age <- ifelse(d2$Q2 == 18, .5, -.5)


d2$genXage <- d2$gender*d2$age

B. Run the analysis

d2$cueChoiceProp_.5 <- d2$cueChoiceProp - .5

model <- lm(cueChoiceProp_.5 ~ gender + age + genXage, data = d2)
mcSummary(model)

## $call
## lm(formula = cueChoiceProp_.5 ~ gender + age + genXage, data = d2)
## 
## $anova
##               SS  df    MS EtaSq     F     p
## Model      0.122   3 0.041 0.031 2.094 0.102
## Error      3.779 194 0.019    NA    NA    NA
## Corr Total 3.901 197 0.020    NA    NA    NA
## 
## $extras
##       RMSE AdjEtaSq
## Model 0.14    0.016
## 
## $coefficients
##                Est StErr      t SSR(3) EtaSq   tol CI_2.5 CI_97.5     p
## (Intercept)  0.011 0.011  1.001  0.020 0.005    NA -0.011   0.033 0.318
## gender      -0.007 0.022 -0.301  0.002 0.000 0.934 -0.051   0.037 0.763
## age         -0.019 0.022 -0.841  0.014 0.004 0.798 -0.063   0.025 0.401
## genXage      0.108 0.044  2.424  0.114 0.029 0.838  0.020   0.195 0.016
## 
## $na.action
## 32 
## 32 
## attr(,"class")
## [1] "omit"

C. Interpret each of the coefficients in terms of the means involved

describe(d2$cueChoiceProp[d2$gender == 1/2 & d2$age == 1/2]) #female/18 = .53

##    vars  n mean   sd median trimmed  mad  min max range skew kurtosis   se
## X1    1 82 0.53 0.14   0.52    0.51 0.12 0.28   1  0.72 1.27     2.44 0.02

describe(d2$cueChoiceProp[d2$gender == -1/2 & d2$age == 1/2]) #male/18 = .48

##    vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis
## X1    1 22 0.48 0.12   0.52    0.48 0.09 0.24 0.68  0.44 -0.53    -0.77
##      se
## X1 0.03

describe(d2$cueChoiceProp[d2$gender == -1/2 & d2$age == -1/2]) #male/19+  = .55

##    vars  n mean   sd median trimmed  mad  min max range skew kurtosis   se
## X1    1 39 0.55 0.17   0.56    0.54 0.12 0.16   1  0.84 0.48     0.92 0.03

describe(d2$cueChoiceProp[d2$gender == 1/2 & d2$age == -1/2]) #female/19+ = .49

##    vars  n mean   sd median trimmed  mad  min  max range skew kurtosis
## X1    1 55 0.49 0.12   0.48    0.49 0.12 0.24 0.88  0.64 0.35     0.55
##      se
## X1 0.02

ANSWER:

The intercept, .011, is the “mean of group means” proportion for choosing the cued face, meaning it is the average of the four groups (female 18, female 19+, male 18, and male 19+). So, on average across all groups participants choose the cued face 1.1% higher than average.

The gender predictor, -.007, is the difference between the average proportion for choosing the cued face for male and female groups such that females chose the cued face on average .7% of the time less than males.

the age predictor, -.019, is the difference between the 18 year old group and the 19+ year old group for choosing the cued face. this means that on average 18 year olds chose the cued face 1.9% less often than the group of 19+ year old participants.

This is the difference of differences or the interaction predictor, .108, is the change in the difference between the mean for the 18 year old group and mean for the 19+ year old group as you move from male to females.

D. Choose one simple effect to test. Test this simple effect first by hand. Next, recode one of your variables to use R to test the simple effect.

ANSWER

simple effect: is there is signficant difference between males’ proportion of choosing cued face over the 25 trials for 18 year olds and 19+ year olds?

.55 - .48 = .07 the difference in proportion for choosing cued face between 19+ males (M = .55) and 18 year old males (M = .48) is .07.

dummy codes: gender: M = 0; F = 1 age: 18 = 1/2; 19+ = -1/2

#Simple effect of age in the male condition

d2$gender.d <- ifelse(d2$Q1 == "female", 1, 0)
d2$gen2xage <- d2$gender.d*d2$age


model2 <- lm(cueChoiceProp_.5 ~ gender.d + age + gen2xage, data = d2)
mcSummary(model2)

## $call
## lm(formula = cueChoiceProp_.5 ~ gender.d + age + gen2xage, data = d2)
## 
## $anova
##               SS  df    MS EtaSq     F     p
## Model      0.122   3 0.041 0.031 2.094 0.102
## Error      3.779 194 0.019    NA    NA    NA
## Corr Total 3.901 197 0.020    NA    NA    NA
## 
## $extras
##       RMSE AdjEtaSq
## Model 0.14    0.016
## 
## $coefficients
##                Est StErr      t SSR(3) EtaSq   tol CI_2.5 CI_97.5     p
## (Intercept)  0.014 0.019  0.778  0.012 0.003    NA -0.022   0.051 0.438
## gender.d    -0.007 0.022 -0.301  0.002 0.000 0.934 -0.051   0.037 0.763
## age         -0.073 0.037 -1.951  0.074 0.019 0.285 -0.146   0.001 0.053
## gen2xage     0.108 0.044  2.424  0.114 0.029 0.296  0.020   0.195 0.016
## 
## $na.action
## 32 
## 32 
## attr(,"class")
## [1] "omit"

HW 15: Categorical Interactions

Dani Grant

1/28/2020

Question 1: NURSERY Data

A. Specify your contrast codes. Say IN WORDS what question each code asks. Note: There is an obvious ordering of the levels of the COLLEGE variable; you should examine a linear trend.

B. Run a regression in which your contrast codes predict PEABODY scores. Because the cell counts are so unequal, make sure to examine the tolerances. What do the tolerances tell you?

C. Interpret the slopes for the following effects in terms of the means involved

i. Quadratic effect of college

ii. Nursery X linear effect of college

iii. Nursery X Quadratic effect of college

E. Test the simple effect of NURSERY on PEABODY, when COLLEGE = none.

F. Write a brief paragraph in the style you would use for writing a journal article that summarizes the results of your analysis. Include means and a chart to help interpret the results. Look at the R code for relatively simple ways to generate both (feel free to make the chart more sophisiticated).

Question 2: Your Dataset

A. Specify your codes

B. Run the analysis

C. Interpret each of the coefficients in terms of the means involved

D. Choose one simple effect to test. Test this simple effect first by hand. Next, recode one of your variables to use R to test the simple effect.