Loading data, calculate mean of each question
mc_2012 = read.csv("2012_MC.csv")
mc2012_percent = sapply(mc_2012[,-1], mean, na.rm = TRUE) / mc_2012[1,-1]
mc_2013 = read.csv("2013_MC.csv")
mc2013_percent = sapply(mc_2013[,-1], mean, na.rm = TRUE) / mc_2013[1,-1]
names(mc2012_percent) = NULL
Make sure all the questions in year 2012 and 2013 are the same
mc2013_percent[,46] = (mc2013_percent[,46] + mc2013_percent[,47])/2
mc2013_percent[, 47] = NULL
mc2013_percent[, 48] = NULL
mc2013_percent[,c(57, 58, 59)] = mc2013_percent[,c(59, 57, 58)]
names(mc2013_percent) = NULL
Add year column and concat data, add column of difference calculating whether mean of each question from 2013 is higher than 2012
mc2012 = cbind(Avereage = t(mc2012_percent)[-60, ], Year = rep(2012, 59), ID = seq(1, 59))
mc2013 = cbind(Avereage = t(mc2013_percent)[-60, ], Year = rep(2013, 59), ID = seq(1, 59))
mc = as.data.frame(rbind(mc2012, mc2013))
mc$Year = as.factor(mc$Year)
sum(mc2013[,1] - mc2012[,1] > 0, 2)
## [1] 42
mc$Difference = rep(mc2013[,1] - mc2012[,1] > 0, 2)
Plot the difference and compare
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.2.3
ggplot(aes(x = Year, y = Avereage, group = ID, lable=ID), data = mc) + geom_point() + geom_line(aes(color = Difference)) + geom_text(aes(label=ID), hjust=2, size=3)

Enlarge
ggplot(aes(x = Year, y = Avereage, group = ID, label-ID), data = mc) + geom_point() +
geom_line(aes(color = Difference)) + scale_y_continuous(limits = c(0.45, 0.95)) +
geom_text(aes(label=ID), hjust=2, size=3)
## Warning: Removed 10 rows containing missing values (geom_point).
## Warning: Removed 10 rows containing missing values (geom_text).

The difference of mean for each question between year 2013 and 2012 (2013-2012)
df = data.frame(t(mc2013_percent) - t(mc2012_percent))
ggplot(df, aes(x=X1)) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(alpha=.2, fill="#FF6666") + xlab("Difference of mean")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Data munging, concat final grades from year 2012 and 2013
mc2012_total = as.data.frame(mc_2012$Total.MC...80./80)
mc2012_total$year = 2012
colnames(mc2012_total)[1] = 'final'
mc2013_total = as.data.frame(mc_2013$FinalMC..81.5/81.5)
mc2013_total$year = 2013
colnames(mc2013_total)[1] = 'final'
final_all = rbind(mc2012_total, mc2013_total)
final_all$year = as.factor(final_all$year)
Histogram and density plots of final grades from 2012 and 2013
ggplot(aes(x=final), data=final_all) + geom_histogram(aes(y=..density..), colour="black", fill="white") + geom_density(fill = 'light blue', alpha=0.2) + facet_wrap(~year)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 102 rows containing non-finite values (stat_bin).
## Warning: Removed 102 rows containing non-finite values (stat_density).

Compare the density plot from year 2012 and 2013
ggplot(aes(x=final), data=final_all) + geom_density(aes(group=year, color=year))
## Warning: Removed 102 rows containing non-finite values (stat_density).

t-test
t.test(mc2012_total$final, mc2013_total$final)
##
## Welch Two Sample t-test
##
## data: mc2012_total$final and mc2013_total$final
## t = -3.8782, df = 2265.8, p-value = 0.0001082
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03493932 -0.01147174
## sample estimates:
## mean of x mean of y
## 0.7106277 0.7338333