pre.raw <- read.csv("20130614-CC2013Pre-raw.csv", na.string = c("NA", "", " "))
post.raw <- read.csv("20130614-CC2013Post-raw.csv", na.string = c("NA", "",
" "))
AwarenessData <- rbind(data.frame(pre.raw[best.matches, -c(1:19, 38, 39)], time = "pre",
key = names(best.matches)), data.frame(post.raw[best.matches, -c(1:19, 38,
39)], time = "post", key = names(best.matches)))
dim(AwarenessData)
## [1] 306 20
################################# make columns appropriate factors
for (i in 1:9) {
AwarenessData[, i] <- as.numeric(ordered(AwarenessData[, i], levels = c(c("Strongly Disagree",
"Disagree"), "Neutral", "Agree", "Strongly Agree"))) - 1
}
# some columns were 'negatively worded', they need to be flipped
neg.connotations <- c(1, 3)
AwarenessData[, neg.connotations] <- -1 * AwarenessData[, neg.connotations] +
4
# in the yes/no section, right answer = 1, wrong answer = -1, don't know=0
AwarenessData[, 10:18] <- data.frame(apply(AwarenessData[, 10:18], MAR = 2,
function(x) {
as.numeric(ordered(x, levels = c("No", "Don't Know", "Yes"))) - 1
}))
AwarenessData[, c(grep("Access.to.foreign", names(AwarenessData)), grep("Increase.in.cash.reserves",
names(AwarenessData)), grep("Decrease.in.cost.of.staffing", names(AwarenessData)))] <- -1 *
AwarenessData[, c(grep("Access.to.foreign", names(AwarenessData)), grep("Increase.in.cash.reserves",
names(AwarenessData)), grep("Decrease.in.cost.of.staffing", names(AwarenessData)))] +
2
############################ define the questions that belong to each sub-section of 'awareness'
action.potential <- c(2, 5) #future attend workshop?, knows how to help?
empathy <- c(c(3, 4), c(6:9)) #money spending issues, followed by the 'gender diversity benefits the society...' qs
knowledge <- c(c(1), 10:18) #i feel underinformed, + actual knowledge
names(AwarenessData)[action.potential] <- paste("a", 1:length(action.potential),
sep = "")
names(AwarenessData)[empathy] <- paste("e", 1:length(empathy), sep = "")
names(AwarenessData)[knowledge] <- paste("k", 1:length(knowledge), sep = "")
########################### add the 'knowledge quiz' score and other partial totals AwarenessData <-
########################### data.frame(AwarenessData, TOT = rowSums(AwarenessData[,-c(19,20)]))
AwarenessData <- data.frame(AwarenessData, k.quiz = rowSums(AwarenessData[,
paste("k", 2:10, sep = "")]))
AwarenessData <- data.frame(AwarenessData, a.tot = rowSums(AwarenessData[, c("a1",
"a2")]))
AwarenessData <- data.frame(AwarenessData, e.tot = rowSums(AwarenessData[, paste("e",
1:length(empathy), sep = "")]))
AwarenessData <- data.frame(AwarenessData, k.tot = rowSums(AwarenessData[, c("k1",
"k.quiz")]))
max.a <- length(action.potential) * 4
max.e <- length(empathy) * 4
max.k <- 4 + 9 * 2
# construct the total as an equal weighting of all parts, scaled to a
# maximum of 30 points
AwarenessData <- data.frame(AwarenessData, TOT = 10 * as.matrix(AwarenessData[,
c("a.tot", "e.tot", "k.tot")]) %*% c(1/max.a, 1/max.e, 1/max.k))
Awareness.diff <- subset(AwarenessData, time == "post", select = names(AwarenessData)[-c(19,
20)]) - subset(AwarenessData, time == "pre", select = names(AwarenessData)[-c(19,
20)])
###################### all scores combined descriptive statistics
mean(Awareness.diff$TOT, na.rm = T)
## [1] 1.142
median(Awareness.diff$TOT, na.rm = T) # more robust measure of location
## [1] 1.364
qplot(x = 1, y = TOT, data = Awareness.diff, geom = "boxplot")
qplot(x = TOT, data = Awareness.diff, geom = "histogram")
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust
## this.
colMeans(Awareness.diff, na.rm = T)
## k1 a1 e1 e2 a2 e3 e4 e5
## 0.42017 0.07563 0.20339 0.04202 0.52101 0.12605 0.04202 -0.03361
## e6 k2 k3 k4 k5 k6 k7 k8
## -0.03361 -0.22881 0.07692 0.04202 -0.02542 0.16807 0.05042 0.11017
## k9 k10 k.quiz a.tot e.tot k.tot TOT
## 0.06723 -0.02521 0.20175 0.59664 0.35593 0.61404 1.14173
####################### do Finite Population Inference, assuming missing at random responses
library(survey)
## Attaching package: 'survey'
##
## The following object is masked from 'package:ipred':
##
## cv
##
## The following object is masked from 'package:graphics':
##
## dotchart
survey.Awareness <- svydesign(~0, data = Awareness.diff, fpc = rep(300, dim(Awareness.diff)[1]))
confint(svymean(~TOT, survey.Awareness, na.rm = T), level = 0.99, df = degf(survey.Awareness))
## 0.5 % 99.5 %
## TOT 0.4499 1.834
# compare with t.test, basically the same
t.test(x = Awareness.diff$TOT)
##
## One Sample t-test
##
## data: Awareness.diff$TOT
## t = 3.01, df = 112, p-value = 0.003229
## alternative hypothesis: true mean is not equal to 0
## 95 percent confidence interval:
## 0.3902 1.8933
## sample estimates:
## mean of x
## 1.142
# how about the for the different components?, adj conf int level to 0.01
confint(svymean(~a.tot, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
## 0.05 % 99.95 %
## a.tot 0.2012 0.992
confint(svymean(~e.tot, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
## 0.05 % 99.95 %
## e.tot -0.6855 1.397
confint(svymean(~k.tot, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
## 0.05 % 99.95 %
## k.tot 0.1183 1.11
confint(svymean(~k.quiz, survey.Awareness, na.rm = T), level = 0.999, df = degf(survey.Awareness))
## 0.05 % 99.95 %
## k.quiz -0.2291 0.6326
The sample size was n=153.
The Awareness measure was combined with equal weighting to each of the 3 parts, then scaled to a maximum point of 30 for easy interpretation.
To put things into perspective, from the validity studies, sample average score for the non-expert was found to be 17.67, while the experts scored on average 23.27 points. This is a difference of ~6 points.
Note that we infact have a “finite population” on which we wish to infer change in awareness. The recoreded number of participants is N=300. The rational for finite population inference and adjustments is that, a sample size of n=153 out of 300 participants (~50%) gives us more confidence about our statistic, than had we only captured 1% of the participants.
The 99% confidence interval for the mean change in total score is: (0.45, 1.83)
The exact p-val << 0.001
The results in this section are adjusted for “data fishing” as best as we can, by constructing 99.9% confidence intervals.
The 99.9% confidence interval for the change in “action potential” component of awareness of the values of gender diversity is: (0.2, 0.99)
The 99.9% confidence interval for the change in “empathy” component of awareness of the values of gender diversity is:(-0.69, 1.4)
The 99.9% confidence interval for the change in “knowledge” component of awareness of the values of gender diversity is:(0.12, 1.11)
The 99.9% confidence interval for the change in “knowledge of the business case” for the values of gender diversity is:(-0.23, 0.63)