Standardizing

We start by simulating some data. 2000 observations, equally allocated to treatment and control. 3 ordinal variables with 4 response options.

We want to show a treatment effect, so we use the MultiOrd package to simulate data for treatment and control observations separately. In both simulations, variables are set to correlate at 0.4. Treatment observations are simulated in such a way that total scores will be greater for treatment observations than for control observations. The probabilities of each response option for the TREATMENT simulation are as follows:

item 1: 0.1, 0.2, 0.4, 0.3

item 2: 0.1, 0.1, 0.1, 0.7

item 3: 0.1, 0.4, 0.4, 0.1

Notice that item 2 is left-skewed for treatment observations.

The probabilities of each response option for the CONTROL simulation are as follows:

item 1: 0.3, 0.2, 0.4, 0.1

item 2: 0.1, 0.2, 0.2, 0.5

item 3: 0.2, 0.4, 0.3, 0.1

The simulation code is hidden, but we can summarize the resulting dataset.

  library(likert)
  myitems <- mydata[,2:4]
  myitems <- data.frame(lapply(myitems, factor))
  dat.likert <- likert(myitems, grouping = factor(mydata$trt,
                                                  levels=c(0,1),
                                                  labels=c("control",
                                                           "treat")))
  summary(dat.likert)

##     Group  Item  low neutral high  mean     sd
## 1 control item1 49.4       0 50.6 2.320 0.9770
## 2 control item2 30.1       0 69.9 3.059 1.0875
## 3 control item3 60.8       0 39.2 2.284 0.8967
## 4   treat item1 31.5       0 68.5 2.885 0.9416
## 5   treat item2 22.3       0 77.7 3.352 1.0711
## 6   treat item3 51.2       0 48.8 2.474 0.8210

  plot(dat.likert)

plot of chunk plot-ctr

One method of creating a composite score is to just sum. There is no missing data, so we can just sum across columns.

  library(psych)
  mydata$sum <- rowSums(mydata[,2:4])
  describeBy(mydata$sum, group=mydata$trt)

## group: 0
##   vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## 1    1 1000 7.66 2.29      8    7.72 2.97   3  12     9 -0.19    -1.09
##     se
## 1 0.07
## -------------------------------------------------------- 
## group: 1
##   vars    n mean  sd median trimmed  mad min max range  skew kurtosis   se
## 1    1 1000 8.71 2.2      9     8.9 1.48   3  12     9 -0.66    -0.63 0.07

Another method is to average so that the score remains on the same metric as the input variables. This is possible since the inputs share the same response scale.

  mydata$avg <- rowSums(mydata[,2:4])/3
  describeBy(mydata$avg, group=mydata$trt)

## group: 0
##   vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## 1    1 1000 2.55 0.76   2.67    2.57 0.99   1   4     3 -0.19    -1.09
##     se
## 1 0.02
## -------------------------------------------------------- 
## group: 1
##   vars    n mean   sd median trimmed  mad min max range  skew kurtosis
## 1    1 1000  2.9 0.73      3    2.97 0.49   1   4     3 -0.66    -0.63
##     se
## 1 0.02

Finally, we can standardize each input before summing and then standardizing the sum.

  mydata[,2:4] <- scale(mydata[,2:4])
  mydata$stand <- rowSums(mydata[,2:4])
  mydata$stand <- scale(mydata$stand)
  describeBy(mydata$stand, group=mydata$trt)

## INDICES: 0
##    vars    n  mean   sd median trimmed  mad   min  max range  skew
## V1    1 1000 -0.23 0.99  -0.13   -0.21 1.37 -2.23 1.71  3.95 -0.12
##    kurtosis   se
## V1     -1.1 0.03
## -------------------------------------------------------- 
## INDICES: 1
##    vars    n mean   sd median trimmed  mad   min  max range  skew kurtosis
## V1    1 1000 0.23 0.95    0.4     0.3 0.88 -2.23 1.71  3.95 -0.58    -0.71
##      se
## V1 0.03

Now we can look at the results of regressions of our DVs on assignment to treatment.

  library(arm)
# sum
  m.sum <- lm(sum ~ trt, data=mydata)
  summary(m.sum)

## 
## Call:
## lm(formula = sum ~ trt, data = mydata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -5.711 -1.663  0.337  2.289  4.337 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    7.663      0.071   108.0   <2e-16 ***
## trt            1.048      0.100    10.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.24 on 1998 degrees of freedom
## Multiple R-squared:  0.0517, Adjusted R-squared:  0.0513 
## F-statistic:  109 on 1 and 1998 DF,  p-value: <2e-16

  sum.trt <- coefficients(m.sum)[2]
  sum.csd <- sd(mydata$sum[mydata$trt==0]) 
  sum.glass <- round(sum.trt/sum.csd, 3)
# avg
  m.avg <- lm(avg ~ trt, data=mydata)
  summary(m.avg)

## 
## Call:
## lm(formula = avg ~ trt, data = mydata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -1.904 -0.554  0.112  0.763  1.446 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.5543     0.0237   108.0   <2e-16 ***
## trt           0.3493     0.0335    10.4   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.748 on 1998 degrees of freedom
## Multiple R-squared:  0.0517, Adjusted R-squared:  0.0513 
## F-statistic:  109 on 1 and 1998 DF,  p-value: <2e-16

  avg.trt <- coefficients(m.avg)[2]
  avg.csd <- sd(mydata$avg[mydata$trt==0]) 
  avg.glass <- round(avg.trt/avg.csd, 3)
# stand
  m.stand <- lm(stand ~ trt, data=mydata)
  summary(m.stand)

## 
## Call:
## lm(formula = stand ~ trt, data = mydata)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.460 -0.784  0.127  0.948  1.938 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.2255     0.0308   -7.32  3.7e-13 ***
## trt           0.4509     0.0436   10.35  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.974 on 1998 degrees of freedom
## Multiple R-squared:  0.0509, Adjusted R-squared:  0.0504 
## F-statistic:  107 on 1 and 1998 DF,  p-value: <2e-16

  stand.trt <- coefficients(m.stand)[2]
  stand.csd <- sd(mydata$stand[mydata$trt==0]) 
  stand.glass <- round(stand.trt/stand.csd, 3)

The standardized treatment effects for the sum and average are of course equal: 0.459 and 0.459. The value we get for the standardized DV with standardized inputs is very similar: 0.453.