Preprocessing
CLASS_output_pre <- read.csv("CLASS_output_pre.csv")
CLASS_output_post <- read.csv("CLASS_output_post.csv")
library(plyr)
CLASS_output_pre = rename(CLASS_output_pre, c("Gender"="gender", "Letter.Grade"="grade"))
CLASS_output_post = rename(CLASS_output_post, c("Gender"="gender", "Letter.Grade"="grade"))
levels(CLASS_output_pre$grade)
## [1] "A " "A-" "A+" "B " "B-" "B+" "C " "C-" "C+" "D " "F "
CLASS_output_pre$grade <- ordered(CLASS_output_pre$grade, levels = c("F ", "D ", "C-", "C ", "C+", "B-", "B ", "B+", "A-", "A ", "A+"))
CLASS_output_post$grade <- ordered(CLASS_output_post$grade, levels = c("F ", "D ", "C-", "C ", "C+", "B-", "B ", "B+", "A-", "A ", "A+"))
levels(CLASS_output_pre$grade)
## [1] "F " "D " "C-" "C " "C+" "B-" "B " "B+" "A-" "A " "A+"
library(ggplot2)
library(RColorBrewer)
library(scales)
library(gridExtra)
Combine pre and post
combined = merge(CLASS_output_pre, CLASS_output_post, by=c("studentdata.uniqueID", "gender", "grade"))
combined$diff_fav = combined$percent_favourable.y - combined$percent_favourable.x
combined$diff_unfav = combined$un_percent_favourable.y - combined$un_percent_favourable.x
Grades vs. Gender

Chi-square test of independence: the difference of grades between gender
tbl = table(CLASS_output_pre$gender, CLASS_output_pre$grade)
chisq.test(tbl)
##
## Pearson's Chi-squared test
##
## data: tbl
## X-squared = 52.415, df = 10, p-value = 9.56e-08
Histogram of Percent_favourable
library(reshape2)
combined_fav = combined[,c('percent_favourable.x', 'percent_favourable.y')]
combined_fav$id = seq(1, 695)
names(combined_fav) = c('pre_percent_favorable', 'post_percent_favorable', 'id')
combined_fav_reshape = melt(combined_fav, id='id')
ggplot(aes(x=value), data=combined_fav_reshape) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(fill = 'light blue', alpha=0.2) + facet_wrap(~variable)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

ggplot(aes(x=value), data=combined_fav_reshape) + geom_density(aes(group=variable, color=variable))

Histogram of Percent_unfavourable
combined_unfav = combined[,c('un_percent_favourable.x', 'un_percent_favourable.y')]
combined_unfav$id = seq(1, 695)
names(combined_unfav) = c('pre_percent_unfavorable', 'post_percent_unfavorable', 'id')
combined_unfav_reshape = melt(combined_unfav, id='id')
ggplot(aes(x=value), data=combined_unfav_reshape) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(fill = 'light blue', alpha=0.2) + facet_wrap(~variable)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

ggplot(aes(x=value), data=combined_unfav_reshape) + geom_density(aes(group=variable, color=variable))

Percent_favourable & Percent_unfavourable vs. Grades
pre_fav_grade = ggplot(aes(x=grade, y=percent_favourable), data=CLASS_output_pre) + geom_boxplot() + ylab("Percent favourable")
pre_unfav_grade = ggplot(aes(x=grade, y=un_percent_favourable), data=CLASS_output_pre) + geom_boxplot() + ylab("Percent unfavourable")
grid.arrange(pre_fav_grade, pre_unfav_grade, ncol=2, top="Distribution of grades before the course")

post_fav_grade = ggplot(aes(x=grade, y=percent_favourable), data=CLASS_output_post) + geom_boxplot() + ylab("Percent favourable")
post_unfav_grade = ggplot(aes(x=grade, y=un_percent_favourable), data=CLASS_output_post) + geom_boxplot() + ylab("Percent unfavourable")
grid.arrange(post_fav_grade, post_unfav_grade, ncol=2, top="Distribution of grades after the course")

ANOVA
# before the class, favorable
aov.prefav = aov(percent_favourable~grade, data=CLASS_output_pre)
summary(aov.prefav)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade 10 3.137 0.31368 9.824 1.21e-15 ***
## Residuals 823 26.277 0.03193
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(CLASS_output_pre$percent_favourable, CLASS_output_pre$grade, p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: CLASS_output_pre$percent_favourable and CLASS_output_pre$grade
##
## F D C- C C+ B- B B+ A-
## D 1.00000 - - - - - - - -
## C- 1.00000 1.00000 - - - - - - -
## C 0.26506 1.00000 0.59766 - - - - - -
## C+ 0.76026 1.00000 1.00000 1.00000 - - - - -
## B- 1.00000 1.00000 1.00000 1.00000 1.00000 - - - -
## B 0.00359 1.00000 0.01805 1.00000 1.00000 0.15197 - - -
## B+ 0.03702 1.00000 0.09135 1.00000 1.00000 0.45530 1.00000 - -
## A- 9.3e-07 1.00000 2.0e-05 0.44835 0.18645 0.00048 1.00000 1.00000 -
## A 3.0e-07 0.55575 5.4e-06 0.13543 0.05421 0.00013 1.00000 1.00000 1.00000
## A+ 1.5e-08 0.02281 1.4e-07 0.00249 0.00096 2.5e-06 0.05471 0.11542 1.00000
## A
## D -
## C- -
## C -
## C+ -
## B- -
## B -
## B+ -
## A- -
## A -
## A+ 1.00000
##
## P value adjustment method: bonferroni
# before the class, unfavorable
aov.preunfav = aov(un_percent_favourable~grade, data=CLASS_output_pre)
summary(aov.preunfav)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade 10 1.465 0.14646 7.739 6.46e-12 ***
## Residuals 823 15.576 0.01893
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(CLASS_output_pre$un_percent_favourable, CLASS_output_pre$grade, p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: CLASS_output_pre$un_percent_favourable and CLASS_output_pre$grade
##
## F D C- C C+ B- B B+ A-
## D 1.00000 - - - - - - - -
## C- 1.00000 1.00000 - - - - - - -
## C 0.13800 1.00000 0.78375 - - - - - -
## C+ 1.00000 1.00000 1.00000 1.00000 - - - - -
## B- 1.00000 1.00000 1.00000 1.00000 1.00000 - - - -
## B 0.00154 1.00000 0.02655 1.00000 1.00000 1.00000 - - -
## B+ 0.10365 1.00000 0.46758 1.00000 1.00000 1.00000 1.00000 - -
## A- 0.00017 0.70479 0.00450 1.00000 1.00000 0.76187 1.00000 1.00000 -
## A 1.4e-06 0.07434 7.4e-05 0.49968 0.06150 0.03517 1.00000 1.00000 1.00000
## A+ 2.2e-07 0.00447 4.9e-06 0.02272 0.00269 0.00156 0.33180 0.19867 1.00000
## A
## D -
## C- -
## C -
## C+ -
## B- -
## B -
## B+ -
## A- -
## A -
## A+ 1.00000
##
## P value adjustment method: bonferroni
# after the class, favorable
aov.postfav = aov(percent_favourable~grade, data=CLASS_output_post)
summary(aov.postfav)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade 10 4.728 0.4728 14.47 <2e-16 ***
## Residuals 894 29.200 0.0327
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(CLASS_output_post$percent_favourable, CLASS_output_post$grade, p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: CLASS_output_post$percent_favourable and CLASS_output_post$grade
##
## F D C- C C+ B- B B+ A-
## D 1.00000 - - - - - - - -
## C- 1.00000 1.00000 - - - - - - -
## C 0.39792 1.00000 1.00000 - - - - - -
## C+ 0.01128 1.00000 1.00000 1.00000 - - - - -
## B- 0.17807 1.00000 1.00000 1.00000 1.00000 - - - -
## B 0.00090 1.00000 0.33119 1.00000 1.00000 1.00000 - - -
## B+ 7.9e-05 1.00000 0.04087 1.00000 1.00000 1.00000 1.00000 - -
## A- 9.5e-12 0.57769 1.7e-06 0.00098 0.01791 0.02079 0.19031 1.00000 -
## A 4.1e-13 0.05290 3.2e-08 2.0e-05 0.00041 0.00058 0.00575 0.23419 1.00000
## A+ 1.1e-14 0.00069 1.8e-10 8.1e-08 1.6e-06 2.4e-06 2.4e-05 0.00166 0.20537
## A
## D -
## C- -
## C -
## C+ -
## B- -
## B -
## B+ -
## A- -
## A -
## A+ 1.00000
##
## P value adjustment method: bonferroni
# after the class, unfavorable
aov.postunfav = aov(un_percent_favourable~grade, data=CLASS_output_post)
summary(aov.postunfav)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade 10 2.748 0.27480 13.75 <2e-16 ***
## Residuals 894 17.866 0.01998
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(CLASS_output_post$un_percent_favourable, CLASS_output_post$grade, p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: CLASS_output_post$un_percent_favourable and CLASS_output_post$grade
##
## F D C- C C+ B- B B+ A-
## D 1.00000 - - - - - - - -
## C- 1.00000 1.00000 - - - - - - -
## C 0.20635 1.00000 1.00000 - - - - - -
## C+ 0.27660 1.00000 1.00000 1.00000 - - - - -
## B- 0.03074 1.00000 0.63732 1.00000 1.00000 - - - -
## B 2.0e-05 1.00000 0.00572 1.00000 1.00000 1.00000 - - -
## B+ 0.00016 1.00000 0.01475 1.00000 1.00000 1.00000 1.00000 - -
## A- 1.2e-10 0.82677 8.8e-07 0.00870 0.00242 0.29219 1.00000 1.00000 -
## A 2.2e-12 0.06747 1.1e-08 0.00014 3.4e-05 0.00845 0.13396 0.30155 1.00000
## A+ 8.9e-13 0.00298 7.4e-10 4.1e-06 1.1e-06 0.00022 0.00359 0.00933 0.51043
## A
## D -
## C- -
## C -
## C+ -
## B- -
## B -
## B+ -
## A- -
## A -
## A+ 1.00000
##
## P value adjustment method: bonferroni
CLASS fav/unfav distribution grouped by grades
ggplot(aes(x=un_percent_favourable, y=percent_favourable, color=gender), data=CLASS_output_pre) +
geom_jitter(size=1.25) + facet_wrap(~grade) + labs(x="Percentage unfavourable", y="Percentage favourable") +
ggtitle("CLASS fav/unfav distribution before the course grouped by grades")

ggplot(aes(x=un_percent_favourable, y=percent_favourable, color=gender), data=CLASS_output_post) +
geom_jitter(size=1.25) + facet_wrap(~grade) + labs(x="Percentage unfavourable", y="Percentage favourable") +
ggtitle("CLASS fav/unfav distribution after the course grouped by grades")

Distribution of difference between Pre/Post percentage favourable and percentage unfavourable
fav = ggplot(aes(x=diff_fav), data=combined) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(fill = 'light blue', alpha=0.2)
unfav = ggplot(aes(x=diff_unfav), data=combined) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(fill = 'light blue', alpha=0.2)
grid.arrange(fav, unfav, ncol=2)
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

Difference between Pre/Post percentage favourable and percentage unfavourable grouped by grades
ggplot(aes(x=diff_unfav, y=diff_fav, color=gender), data=combined) +
geom_jitter(position=position_jitter(w=.01,h=.01),size=1.25) +facet_wrap(~grade) +
labs(x="Post percentage unfavourable - Pre percentage unfavourable", y="Post percentage favourable - Pre percentage favourable") +
ggtitle("Difference between Pre/Post percentage favourable and\n percentage unfavourable grouped by grades")

diff_fav_grade = ggplot(aes(x=grade, y=diff_fav), data=combined) + geom_boxplot() + ylab("Post percentage favourable - Pre percentage favourable")
diff_unfav_grade = ggplot(aes(x=grade, y=diff_unfav), data=combined) + geom_boxplot() + ylab("Post percentage unfavourable - Pre percentage unfavourable")
grid.arrange(diff_fav_grade, diff_unfav_grade, ncol=2, top="Difference between Pre/Post percentage favourable and percentage unfavourable grouped by grades")

Anova
# differece in percent favorable before and after the class, favorable
aov.difffav = aov(diff_fav~grade, data=combined)
summary(aov.difffav)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade 10 0.487 0.04873 2.714 0.00283 **
## Residuals 684 12.281 0.01795
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(combined$diff_fav, combined$grade, p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: combined$diff_fav and combined$grade
##
## F D C- C C+ B- B B+ A- A
## D 1.00 - - - - - - - - -
## C- 1.00 1.00 - - - - - - - -
## C 1.00 1.00 1.00 - - - - - - -
## C+ 1.00 1.00 1.00 1.00 - - - - - -
## B- 0.64 1.00 1.00 1.00 1.00 - - - - -
## B 1.00 1.00 1.00 1.00 1.00 0.70 - - - -
## B+ 1.00 1.00 1.00 1.00 1.00 1.00 1.00 - - -
## A- 0.27 1.00 1.00 0.68 1.00 1.00 0.33 1.00 - -
## A 0.19 1.00 1.00 0.45 1.00 1.00 0.22 1.00 1.00 -
## A+ 0.54 1.00 1.00 0.95 1.00 1.00 0.56 1.00 1.00 1.00
##
## P value adjustment method: bonferroni
# differece in percent favorable before and after the class, unfavorable
aov.diffunfav = aov(diff_unfav~grade, data=combined)
summary(aov.diffunfav)
## Df Sum Sq Mean Sq F value Pr(>F)
## grade 10 0.328 0.03280 2.912 0.0014 **
## Residuals 684 7.706 0.01127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise.t.test(combined$diff_unfav, combined$grade, p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: combined$diff_unfav and combined$grade
##
## F D C- C C+ B- B B+ A- A
## D 1.0000 - - - - - - - - -
## C- 1.0000 1.0000 - - - - - - - -
## C 1.0000 1.0000 1.0000 - - - - - - -
## C+ 1.0000 1.0000 1.0000 1.0000 - - - - - -
## B- 1.0000 1.0000 1.0000 1.0000 1.0000 - - - - -
## B 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 - - - -
## B+ 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 - - -
## A- 0.0024 1.0000 0.0184 0.4610 1.0000 1.0000 1.0000 1.0000 - -
## A 0.0475 1.0000 0.1810 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 -
## A+ 0.2805 1.0000 0.6583 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000 1.0000
##
## P value adjustment method: bonferroni
clustering
combined_sub = combined[,-c(1,2,3)]
distMatrix = dist(combined_sub)
hclustering = hclust(distMatrix)
combined_matrix = as.matrix(combined_sub)
heatmap(combined_matrix, margins = c(8,4))

groups<-cutree(hclustering, k=3)
combined = cbind(combined, groups)
mytable = table(combined$groups, combined$grade)
prop.table(mytable, 1)
##
## F D C- C C+ B-
## 1 0.03225806 0.01290323 0.05161290 0.07741935 0.07096774 0.04516129
## 2 0.14760915 0.02494802 0.10810811 0.12058212 0.12058212 0.09563410
## 3 0.40677966 0.03389831 0.18644068 0.08474576 0.10169492 0.06779661
##
## B B+ A- A A+
## 1 0.10322581 0.10322581 0.18709677 0.16129032 0.15483871
## 2 0.12681913 0.07276507 0.09563410 0.06444906 0.02286902
## 3 0.05084746 0.05084746 0.01694915 0.00000000 0.00000000
by(combined[,-c(1,2,3)], combined$groups, FUN=colMeans)
## combined$groups: 1
## percent_favourable.x personal_interest.x
## 0.74881720 0.75591398
## real_world.x PS_general.x
## 0.73548387 0.84129032
## PS_confidence.x PS_sophistication.x
## 0.85645161 0.71059908
## PS_sensemaking_effort.x conceptual_connections.x
## 0.83870968 0.75852535
## conceptual_learning.x atomic_molecular.x
## 0.67741935 0.83548387
## un_percent_favourable.x un_personal_interest.x
## 0.10365591 0.07096774
## un_real_world.x un_PS_general.x
## 0.10161290 0.04838710
## un_PS_confidence.x un_PS_sophistication.x
## 0.02903226 0.11152074
## un_PS_sensemaking_effort.x un_conceptual_connections.x
## 0.04731183 0.09861751
## un_conceptual_learning.x un_atomic_molecular.x
## 0.15391705 0.05053763
## percent_favourable.y personal_interest.y
## 0.78394265 0.79892473
## real_world.y PS_general.y
## 0.75645161 0.86451613
## PS_confidence.y PS_sophistication.y
## 0.88709677 0.74377880
## PS_sensemaking_effort.y conceptual_connections.y
## 0.87240143 0.78064516
## conceptual_learning.y atomic_molecular.y
## 0.68940092 0.90752688
## un_percent_favourable.y un_personal_interest.y
## 0.08315412 0.04731183
## un_real_world.y un_PS_general.y
## 0.09838710 0.03032258
## un_PS_confidence.y un_PS_sophistication.y
## 0.01451613 0.08663594
## un_PS_sensemaking_effort.y un_conceptual_connections.y
## 0.04157706 0.07926267
## un_conceptual_learning.y un_atomic_molecular.y
## 0.15391705 0.01075269
## diff_fav diff_unfav
## 0.03512545 -0.02050179
## groups
## 1.00000000
## --------------------------------------------------------
## combined$groups: 2
## percent_favourable.x personal_interest.x
## 0.51374451 0.49203049
## real_world.x PS_general.x
## 0.52546778 0.51767152
## PS_confidence.x PS_sophistication.x
## 0.57120582 0.35937036
## PS_sensemaking_effort.x conceptual_connections.x
## 0.60452760 0.43688744
## conceptual_learning.x atomic_molecular.x
## 0.38699139 0.62716563
## un_percent_favourable.x un_personal_interest.x
## 0.22143682 0.19126819
## un_real_world.x un_PS_general.x
## 0.18762994 0.17027027
## un_PS_confidence.x un_PS_sophistication.x
## 0.13669439 0.31868132
## un_PS_sensemaking_effort.x un_conceptual_connections.x
## 0.15523216 0.27739828
## un_conceptual_learning.x un_atomic_molecular.x
## 0.35313335 0.13374913
## percent_favourable.y personal_interest.y
## 0.47294987 0.41580042
## real_world.y PS_general.y
## 0.39604990 0.46652807
## PS_confidence.y PS_sophistication.y
## 0.50207900 0.28927829
## PS_sensemaking_effort.y conceptual_connections.y
## 0.56086856 0.36441936
## conceptual_learning.y atomic_molecular.y
## 0.32462132 0.63825364
## un_percent_favourable.y un_personal_interest.y
## 0.24901825 0.24774775
## un_real_world.y un_PS_general.y
## 0.26663202 0.20415800
## un_PS_confidence.y un_PS_sophistication.y
## 0.18711019 0.38550639
## un_PS_sensemaking_effort.y un_conceptual_connections.y
## 0.16562717 0.31838432
## un_conceptual_learning.y un_atomic_molecular.y
## 0.41877042 0.12820513
## diff_fav diff_unfav
## -0.04079464 0.02758143
## groups
## 2.00000000
## --------------------------------------------------------
## combined$groups: 3
## percent_favourable.x personal_interest.x
## 0.31525424 0.22598870
## real_world.x PS_general.x
## 0.27542373 0.23728814
## PS_confidence.x PS_sophistication.x
## 0.27966102 0.07990315
## PS_sensemaking_effort.x conceptual_connections.x
## 0.37099812 0.18644068
## conceptual_learning.x atomic_molecular.x
## 0.16707022 0.47740113
## un_percent_favourable.x un_personal_interest.x
## 0.47419962 0.56779661
## un_real_world.x un_PS_general.x
## 0.50847458 0.49661017
## un_PS_confidence.x un_PS_sophistication.x
## 0.45338983 0.72639225
## un_PS_sensemaking_effort.x un_conceptual_connections.x
## 0.39548023 0.60290557
## un_conceptual_learning.x un_atomic_molecular.x
## 0.63438257 0.34180791
## percent_favourable.y personal_interest.y
## 0.27683616 0.14971751
## real_world.y PS_general.y
## 0.17796610 0.17966102
## PS_confidence.y PS_sophistication.y
## 0.19067797 0.04358354
## PS_sensemaking_effort.y conceptual_connections.y
## 0.30696798 0.15012107
## conceptual_learning.y atomic_molecular.y
## 0.12348668 0.45197740
## un_percent_favourable.y un_personal_interest.y
## 0.51148776 0.66384181
## un_real_world.y un_PS_general.y
## 0.61016949 0.58305085
## un_PS_confidence.y un_PS_sophistication.y
## 0.61016949 0.81113801
## un_PS_sensemaking_effort.y un_conceptual_connections.y
## 0.41807910 0.60048426
## un_conceptual_learning.y un_atomic_molecular.y
## 0.70944310 0.37853107
## diff_fav diff_unfav
## -0.03841808 0.03728814
## groups
## 3.00000000