AwarenessRefinedReport-20130917

pre.raw <- read.csv("20130614-CC2013Pre-raw.csv", na.string = c("NA", "", " "))
post.raw <- read.csv("20130614-CC2013Post-raw.csv", na.string = c("NA", "", 
    " "))

AwarenessData <- rbind(data.frame(pre.raw[best.matches, -c(1:19, 38, 39)], time = "pre", 
    key = names(best.matches)), data.frame(post.raw[best.matches, -c(1:19, 38, 
    39)], time = "post", key = names(best.matches)))
dim(AwarenessData)
## [1] 306  20


################################# make columns appropriate factors
for (i in 1:9) {
    AwarenessData[, i] <- as.numeric(ordered(AwarenessData[, i], levels = c(c("Strongly Disagree", 
        "Disagree"), "Neutral", "Agree", "Strongly Agree"))) - 1

}

# some columns were 'negatively worded', they need to be flipped
neg.connotations <- c(1, 3)
AwarenessData[, neg.connotations] <- -1 * AwarenessData[, neg.connotations] + 
    4

# in the yes/no section, right answer = 1, wrong answer = -1, don't know=0
AwarenessData[, 10:18] <- data.frame(apply(AwarenessData[, 10:18], MAR = 2, 
    function(x) {
        as.numeric(ordered(x, levels = c("No", "Don't Know", "Yes"))) - 1
    }))
AwarenessData[, c(grep("Access.to.foreign", names(AwarenessData)), grep("Increase.in.cash.reserves", 
    names(AwarenessData)), grep("Decrease.in.cost.of.staffing", names(AwarenessData)))] <- -1 * 
    AwarenessData[, c(grep("Access.to.foreign", names(AwarenessData)), grep("Increase.in.cash.reserves", 
        names(AwarenessData)), grep("Decrease.in.cost.of.staffing", names(AwarenessData)))] + 
    2

############################ define the questions that belong to each sub-section of 'awareness'
action.potential <- c(2, 5)  #future attend workshop?, knows how to help?
empathy <- c(c(3, 4), c(6:9))  #money spending issues, followed by the 'gender diversity benefits the society...' qs
knowledge <- c(c(1), 10:18)  #i feel underinformed, + actual knowledge
names(AwarenessData)[action.potential] <- paste("a", 1:length(action.potential), 
    sep = "")
names(AwarenessData)[empathy] <- paste("e", 1:length(empathy), sep = "")
names(AwarenessData)[knowledge] <- paste("k", 1:length(knowledge), sep = "")


########################### add the 'knowledge quiz' score and other partial totals AwarenessData <-
########################### data.frame(AwarenessData, TOT = rowSums(AwarenessData[,-c(19,20)]))
AwarenessData <- data.frame(AwarenessData, k.quiz = rowSums(AwarenessData[, 
    paste("k", 2:10, sep = "")]))
AwarenessData <- data.frame(AwarenessData, a.tot = rowSums(AwarenessData[, c("a1", 
    "a2")]))
AwarenessData <- data.frame(AwarenessData, e.tot = rowSums(AwarenessData[, paste("e", 
    1:length(empathy), sep = "")]))
AwarenessData <- data.frame(AwarenessData, k.tot = rowSums(AwarenessData[, c("k1", 
    "k.quiz")]))

max.a <- length(action.potential) * 4
max.e <- length(empathy) * 4
max.k <- 4 + 9 * 2
# construct the total as an equal weighting of all parts, scaled to a
# maximum of 30 points
AwarenessData <- data.frame(AwarenessData, TOT = 10 * as.matrix(AwarenessData[, 
    c("a.tot", "e.tot", "k.tot")]) %*% c(1/max.a, 1/max.e, 1/max.k))


Awareness.diff <- subset(AwarenessData, time == "post", select = names(AwarenessData)[-c(19, 
    20)]) - subset(AwarenessData, time == "pre", select = names(AwarenessData)[-c(19, 
    20)])
###################### all scores combined descriptive statistics
mean(Awareness.diff$TOT, na.rm = T)
## [1] 1.142
median(Awareness.diff$TOT, na.rm = T)  # more robust measure of location
## [1] 1.364
qplot(x = 1, y = TOT, data = Awareness.diff, geom = "boxplot")

plot of chunk Awareness

qplot(x = TOT, data = Awareness.diff, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
## this.

plot of chunk Awareness


colMeans(Awareness.diff, na.rm = T)
##       k1       a1       e1       e2       a2       e3       e4       e5 
##  0.42017  0.07563  0.20339  0.04202  0.52101  0.12605  0.04202 -0.03361 
##       e6       k2       k3       k4       k5       k6       k7       k8 
## -0.03361 -0.22881  0.07692  0.04202 -0.02542  0.16807  0.05042  0.11017 
##       k9      k10   k.quiz    a.tot    e.tot    k.tot      TOT 
##  0.06723 -0.02521  0.20175  0.59664  0.35593  0.61404  1.14173

####################### do Finite Population Inference, assuming missing at random responses
library(survey)
## Attaching package: 'survey'
## 
## The following object is masked from 'package:ipred':
## 
## cv
## 
## The following object is masked from 'package:graphics':
## 
## dotchart
survey.Awareness <- svydesign(~0, data = Awareness.diff, fpc = rep(300, dim(Awareness.diff)[1]))
confint(svymean(~TOT, survey.Awareness, na.rm = T), level = 0.99, df = degf(survey.Awareness))
##      0.5 % 99.5 %
## TOT 0.4499  1.834
# compare with t.test, basically the same
t.test(x = Awareness.diff$TOT)
## 
##  One Sample t-test
## 
## data:  Awareness.diff$TOT
## t = 3.01, df = 112, p-value = 0.003229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
##  0.3902 1.8933
## sample estimates:
## mean of x 
##     1.142

# how about the for the different components?, adj conf int level to 0.01
confint(svymean(~a.tot, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
##       0.05 % 99.95 %
## a.tot 0.2012   0.992
confint(svymean(~e.tot, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
##        0.05 % 99.95 %
## e.tot -0.6855   1.397
confint(svymean(~k.tot, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
##       0.05 % 99.95 %
## k.tot 0.1183    1.11
confint(svymean(~k.quiz, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
##         0.05 % 99.95 %
## k.quiz -0.2291  0.6326

Summary

The sample size was n=153.

The Awareness measure was combined with equal weighting to each of the 3 parts, then scaled to a maximum point of 30 for easy interpretation.

To put things into perspective, from the validity studies, sample average score for the non-expert was found to be 17.67, while the experts scored on average 23.27 points. This is a difference of ~6 points.

Statistical Signficance

The total score

Note that we infact have a “finite population” on which we wish to infer change in awareness. The recoreded number of participants is N=300. The rational for finite population inference and adjustments is that, a sample size of n=153 out of 300 participants (~50%) gives us more confidence about our statistic, than had we only captured 1% of the participants.

The 99% confidence interval for the mean change in total score is: (0.45, 1.83)

The exact p-val << 0.001

Looking at the sub-components of awareness

The results in this section are adjusted for “data fishing” as best as we can, by constructing 99.9% confidence intervals.