library(tidyverse)
library(ggthemes)
library(plyr)
library(kableExtra)
library(magrittr)
library(here)
library(ggrepel)
library(tippy)

theme_alan <- function(base_size = 12 , base_family = "")
{
  half_line <- base_size/2
  colors <- ggthemes_data$few
  gray <- colors$medium["gray"]
  black <- colors$dark["black"]
  
  theme(
    line = element_line(colour = "black", size = 0.5, linetype = 1, lineend = "butt"),
    rect = element_rect(fill = "white", 
                        colour = "black", size = 0.5, linetype = 1),
    text = element_text(family = base_family, face = "plain", colour = "black", 
                        size = base_size, lineheight = 0.9, hjust = 0.5, vjust = 0.5,
                        angle = 0, margin = margin(), debug = FALSE),
    
    axis.line = element_blank(),
    axis.line.x = NULL,
    axis.line.y = NULL, 
    axis.text = element_text(size = rel(0.8), colour = "grey30"),
    axis.text.x = element_text(margin = margin(t = 0.8 * half_line/2), vjust = 1),
    axis.text.x.top = element_text(margin = margin(b = 0.8 * half_line/2), vjust = 0),
    axis.text.y = element_text(margin = margin(r = 0.8 * half_line/2), hjust = 1),
    axis.text.y.right = element_text(margin = margin(l = 0.8 * half_line/2), hjust = 0), 
    axis.ticks = element_line(colour = "grey20"), 
    axis.ticks.length = unit(half_line/2, "pt"),
    axis.title.x = element_text(margin = margin(t = half_line), vjust = 1),
    axis.title.x.top = element_text(margin = margin(b = half_line), vjust = 0),
    axis.title.y = element_text(angle = 90, margin = margin(r = half_line), vjust = 1),
    axis.title.y.right = element_text(angle = -90, margin = margin(l = half_line), vjust = 0),
    
    legend.background = element_rect(colour = NA),
    legend.spacing = unit(0.4, "cm"), 
    legend.spacing.x = NULL, 
    legend.spacing.y = NULL,
    legend.margin = margin(0.2, 0.2, 0.2, 0.2, "cm"),
    legend.key = element_rect(fill = "white", colour = NA), 
    legend.key.size = unit(1.2, "lines"), 
    legend.key.height = NULL,
    legend.key.width = NULL,
    legend.text = element_text(size = rel(0.8)), 
    legend.text.align = NULL,
    legend.title = element_text(hjust = 0),
    legend.title.align = NULL,
    legend.position = "right", 
    legend.direction = NULL,
    legend.justification = "center", 
    legend.box = NULL,
    legend.box.margin = margin(0, 0, 0, 0, "cm"),
    legend.box.background = element_blank(),
    legend.box.spacing = unit(0.4, "cm"),
    
    panel.background = element_rect(fill = "white", colour = NA),
    panel.border = element_rect(fill = NA, colour = "grey20"),
    panel.grid.major = element_line(colour = "grey92"),
    panel.grid.minor = element_line(colour = "grey92", size = 0.25),
    panel.spacing = unit(half_line, "pt"),
    panel.spacing.x = NULL,
    panel.spacing.y = NULL,
    panel.ontop = FALSE,
    
    strip.background = element_rect(fill = "NA", colour = "NA"),
    strip.text = element_text(colour = "grey10", size = rel(0.8)),
    strip.text.x = element_text(margin = margin(t = half_line, b = half_line)),
    strip.text.y = element_text(angle = 0, margin = margin(l = half_line, r = half_line)),
    strip.placement = "inside",
    strip.placement.x = NULL, 
    strip.placement.y = NULL,
    strip.switch.pad.grid = unit(0.1, "cm"), 
    strip.switch.pad.wrap = unit(0.1, "cm"), 
    
    plot.background = element_rect(colour = "white"),
    plot.title = element_text(size = rel(1.2), hjust = 0, vjust = 1, margin = margin(b = half_line * 1.2)),
    plot.subtitle = element_text(size = rel(0.9), hjust = 0, vjust = 1, margin = margin(b = half_line * 0.9)),
    plot.caption = element_text(size = rel(0.9), hjust = 1, vjust = 1, margin = margin(t = half_line * 0.9)), 
    plot.margin = margin(half_line, half_line, half_line, half_line),
    
    complete = TRUE)
}

# Wrapper Function for Long Graph Titles
wrapper <- function(x, ...) 
{
  paste(strwrap(x, ...), collapse = "\n")
}


pd <- position_dodge(width = 0.8)       #My standard dodging for graphs


#RDFZ Reds
RDFZPink <- "#cf8f8d"
RDFZRed1 <- "#ae002b"
RDFZRed2 <- "#991815"
RDFZRed3 <- "#78011e"
RDFZRed4 <- "#4b0315"

#Grade Colors
GradeColors <- c("#8900df", "#0092df", "#00df76", "#94df00", "#94df00", "#df5300", "#9b0f00")

#function for allowing inline code chunks to be shown verbatim
rinline <- function(code){
  html <- '<code  class="r">``` `r CODE` ```</code>'
  sub("CODE", code, html)
}
## Read in the Data File
MockGrades <- read.csv(here("MockGrades.csv"))

## Make a Unique Column for Grouping
MockGrades %<>%
  unite(Combo, Paper, Teacher, Student, Anon, sep = "-", remove = FALSE) 

## Split by Paper (this obtains us some values we need)
Paper1 <- subset(MockGrades, Paper == 1)
Paper2 <- subset(MockGrades, Paper == 2)

## Obtain Maximum Values (total score of test)
P1MaxVal <- as.numeric(sum(head(Paper1, length(unique(Paper1$Question)))$Value))
P2MaxVal <- as.numeric(sum(head(Paper2, length(unique(Paper2$Question)))$Value))

## Summarize the Data for Each Participant
Paper1Totals <- 
  Paper1 %>%
    group_by(Combo) %>%
    dplyr::summarise(sum = sum(Grade)) %>%
    mutate(Grade = round(sum/P1MaxVal*100, 1)) %>%
    separate(Combo, into = c("Paper", "Teacher", "Student", "Anon"), sep = "-", remove = TRUE) %>%
    arrange(Student) 

Paper2Totals <- 
  Paper2 %>%
    group_by(Combo) %>%
    dplyr::summarise(sum = sum(Grade)) %>%
    mutate(Grade = round(sum/P2MaxVal*100, 1)) %>%
    separate(Combo, into = c("Paper", "Teacher", "Student", "Anon"), sep = "-", remove = TRUE) %>%
    arrange(Student) 
#FILL THIS IN WHEN SETTING UP THE SHEET - IT WILL HELP WITH NAMING THE TABLES AND IMAGES THAT ARE OUTPUT, SAVING YOU TIME

Class <- "AS Psychology"            #Name of the Class Goes Here
Eval <- "Cambridge Mock Exam"       #Name of what is being evaluated goes here

Paper 1

Questions

Paper 1 consisted of questions testing student’s knowledge of the 12 core studies of AS Psychology. The questions and their values can be seen in the table below.

##KNITTED TABLE OF QUESTIONS AND POINT VALUES
QTitle <- paste(Class, Eval, "Paper 1", "Questions and Values", sep = " - ")

QT1 <- 
  Paper1 %>%
    head(length(unique(Paper1$Question))) %>%
    subset(select = c(Qnum, Question, Value)) %>%
    setNames(c("Number", "Question", "Value"))
    
QuestionsTableP1 <-
  QT1 %>%
    knitr::kable(caption = QTitle, row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

save_kable(QuestionsTableP1, paste(QTitle, ".png", sep = ""))

QuestionsTableP1
AS Psychology - Cambridge Mock Exam - Paper 1 - Questions and Values
Number Question Value
1a Outline the aim of the study by Baron-Cohen et al. (eyes test) 2
1b Describe the results for the Autism Spectrum Quotient (AQ) test for two groups of participants. You must use data in your answer. 3
2a Duncan believes that the results of Pepperberg (parrot learning) support the nurture side of the nature-nurture debate. Outline what is meant by the nature-nurture debate 2
2b Outline why you think Duncan is correct, using evidence in your answer 4
3a In the study by Dement and Kleitman (sleep and dreams) the procedure that the researchers first used to measure participants’ estimations of REM sleep duration was unsuccesful and had to be revised. Describe how the researchers first attempted to measure participants’ estimations of REM sleep duration. 2
3b Describe the revised procedure used to measure participants’ estimations of REM sleep duration 2
4a From the study by Andrade (doodling): Identify two examples where a response would be recorded as a “false alarm” for the recall of names 2
4b Suggest one problem with the sample used in this study 2
4c Outline one conclusion from this study 2
5a From the study by Laney et al (false memory). Outline the rating scale used in the Food History Inventory 2
5b Outline one result from Experiment 1, using data collected from the Food History Inventory 2
6 Two friends, Lok and Hiruni, are discussing the ethics of the study by Schachter and Singer (two factors in emotion). Lok thinks the study is ethical but Hiruni thinks that it is unethical. Explain one reason why Lok is correct and one reason why Hiruni is correct, using evidence from this study 6
7 Outline two quantiative results about “imitative aggression” from the study by Bandura et al. (aggresion) 4
8 Evaluate the study by Milgram (obedience) in terms of two strengths and two weaknesses. At least one of your evaluation points must be about observation 10

Overall Performance

#Single Histogram

OverallTitle <- paste(Class, Eval, "Paper 1", "Histogram of Grades", sep = " - ")

ggplot(data=Paper1Totals, aes(x=Grade)) +
  geom_histogram(aes(y=..density..), alpha = 1, position = "identity", fill = RDFZRed2) +
  labs(x="Grade (Percentage)", y="Density") +
  stat_function(fun=dnorm, args = list(mean=mean(Paper1Totals$Grade), sd=sd(Paper1Totals$Grade)), 
                color=RDFZRed4, size   = 1.4) +
  scale_x_continuous(limits = c(0,100)) + 
  theme_alan() +
  theme(legend.position = "none") +
  ggtitle(wrapper(OverallTitle, width = 45))

ggsave(here(paste(OverallTitle, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 10, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

Overall performance can be seen in the graph above. We had a fairly normal distribution of grades (Shapiro-Wilk Normality test: W= 0.89, p= 0.008) with a mean score of 79.5% (Median = 82.2%) and a standard deviation of 13.7%.

The high score on the test was 44 out of 45 marks (97.8%), which was achieved by 1 student(s).

The low score on the test was 15 out of 45 marks (33.3%), which was achieved by 1 student(s).

Letter Grades (Raw)

Typical raw letter grade boundaries for the school can be seen in the table below:

#Note that this already includes rounding up any grade above 0.5 to the next letter grade (e.g. 79.5% = A)
Letters <- c("A*", "A", "B", "C", "D", "E", "U")
MinVal <- c(89.5, 79.5, 69.5, 59.5, 49.5, 39.5, 0)
MaxVal <- c (100, 89.49, 79.49, 69.49, 59.49, 49.49, 39.49)

LetterBoundaries <- 
  cbind.data.frame(Letters, MinVal, MaxVal) %>%
    setNames(c("Letter", "Bottom", "Top"))

LetterGradesTable <-
  LetterBoundaries  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) %>%
    footnote(general = "Note this includes rounding of grades like 79.5% up a grade boundary" )

LetterGradesTable
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary
A* 89.5 100.00
A 79.5 89.49
B 69.5 79.49
C 59.5 69.49
D 49.5 59.49
E 39.5 49.49
U 0.0 39.49
Note:
Note this includes rounding of grades like 79.5% up a grade boundary

If we assign grade boundaries based on these we can see the following distribution of grades

GB1Title <- paste(Class, Eval, "Paper 1", "Letter Grades (Raw)", sep = " - ")
  
  
Paper1Totals %<>%
  mutate(Letter = cut(x= Paper1Totals$Grade, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
  mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))
    

ggplot(data=Paper1Totals, aes(x=Letter, fill = Letter)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(GradeColors[7], rev(GradeColors[1:4]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade", y="Count") +
  theme_alan() +
  ggtitle(wrapper(GB1Title, width = 45))

ggsave(here(paste(GB1Title, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)


LetterCounts <- count(Paper1Totals$Letter)

LetterBoundaries2 <-
  LetterBoundaries  %>%
    mutate(Count = plyr::mapvalues(Letter, from = LetterCounts$x, to = LetterCounts$freq))  %>%
    mutate(Count = as.numeric(Count))  %>%
    mutate(Count = replace_na(Count, 0))

This is actually a pretty good distribution of Grade Boundaries, suggesting that performance on this assessment was at a pretty high level: A total of 3 students earned an A or A-star, with only 13 students earning an E or a U on the exam. The total counts of students in each grade category can be seen in the table below

LetterBoundaries2  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Count")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Count
A* 89.5 100.00 2
A 79.5 89.49 1
B 69.5 79.49 3
C 59.5 69.49 4
D 49.5 59.49 5
E 39.5 49.49 6
U 0.0 39.49 7

Between Class Performance

How do the two classes for AS Psychology compare to one another on Cambridge Mock Exam - Paper 1?

Below, you can see a histogram of performance between the two classes.

BetweenTitle <- paste(Class, Eval, "Paper 1", "Histogram of Grades by Class", sep = " - ")

#Histograms by Class
JuneP1Grades <- subset(Paper1Totals, Teacher == "June")
AlanP1Grades <- subset(Paper1Totals, Teacher == "Alan")

ggplot(data=Paper1Totals, aes(x=Grade, fill = Teacher)) +
  geom_histogram(aes(y=..density..), alpha = 0.75, position = "identity") +
  labs(x="Grade (Percentage)", y="Density") +
  stat_function(fun=dnorm, args = list(mean=mean(JuneP1Grades$Grade), sd=sd(JuneP1Grades$Grade)), color=RDFZRed1, size = 1.2) +
  stat_function(fun=dnorm, args = list(mean=mean(AlanP1Grades$Grade), sd=sd(AlanP1Grades$Grade)), color=RDFZRed4, size = 1.2) +
  scale_fill_manual(values= c(RDFZRed4, RDFZRed1)) +
  scale_x_continuous(limits = c(0,100)) +
  theme_alan() +
  ggtitle(wrapper(BetweenTitle, width = 45))

ggsave(here(paste(BetweenTitle, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 10, height = 4, units = c("in", "cm", "mm"),
       dpi = 600)

testcompP1 <- t.test(JuneP1Grades$Grade, AlanP1Grades$Grade, warning = FALSE, message = FALSE)

We can see that there isn’t much difference at all between the classes. Alan’s AS Psychology class averaged a grade of 80.4% (sd = 10.1%), while June’s class averaged a grade of 78.6% (sd = 17%). This difference is not significant: t(19.5) = -0.32, p= 0.75.

Paper 2

Questions

Paper 2 consisted of 20 Questions testing student’s knowledge of the application of research methodology. The questions and their values can be seen in the table below.

##KNITTED TABLE OF QUESTIONS AND POINT VALUES
QTitle2 <- paste(Class, Eval, "Paper 1", "Questions and Values", sep = " - ")

QT2 <- 
  Paper2 %>%
    head(length(unique(Paper2$Question))) %>%
    subset(select = c(Qnum, Question, Value)) %>%
    setNames(c("Number", "Question", "Value"))
    
QuestionsTableP2 <-
  QT2 %>%
    knitr::kable(caption = QTitle, row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

save_kable(QuestionsTableP2, paste(QTitle, ".png", sep = ""))

QuestionsTableP2
AS Psychology - Cambridge Mock Exam - Paper 1 - Questions and Values
Number Question Value
1a Outline the sampling technique used in the study by Milgram (obedience) 2
1b Suggest one advantage of using this sampling technique in this study 2
2a A hypothesis in a study states “Recall will be better after a short delay than after a long delay”. Is this a directional (one-tailed) or non-directional (two-tailed) hypothesis? Include a reason for your answer. 1
2bi Suggest one way to operationalise ‘recall’ in this study 1
2bii Suggest one way to operationalise ‘short delay’ and ‘long delay’ in this study 1
3ai State what is meant by the term ‘demand characteristics’ 1
3aii State why demand characteristics are a problem in research 1
3b Describe one way in which demand characteristics could be avoided in this study 1
4a Suggest one advantage of using an fMRI scanner to investigate emotions 2
4b Suggest one disadvantage of using an fMRI scanner to investigate emotions 2
5 Describe inter-rater reliability and test-retest reliability, using any examples 6
6a Freya is planning a semi-structured interview to find out whether people sleep better in the winter or the summer. State what is meant by a ‘semi-structured’ interview 1
6b Suggest one open question that Freya could ask 1
6c Explain one advantage of using a semi-structured interview in Freya’s study 2
7a Olivia is conducting an experiment to investigate whether students concentrate better in class before or after eating. Her experimental design is a repeated measures design. Identify the independent variable in this experiment 1
7b Explain what is meant by a ‘repeated measures design’, using this experiment as an example 2
7ci Suggest how Olivia could counterbalance the conditions in her experiment 2
7cii Explain one advantage of counterbalancing in this experiment 2
8a Describe how Fazli could conduct a correlational study to investigate this relationship. Your study must be ethical 10
8b Identify one practical weakness/limitation with the procedure you have described in your answer to part (a) and suggest how your study might be done differently to overcome the problem. Do not refer to ethics or sampling in your answer. 4

Overall Performance

#Single Histogram

OverallTitle2 <- paste(Class, Eval, "Paper 2", "Histogram of Grades", sep = " - ")

ggplot(data=Paper2Totals, aes(x=Grade)) +
  geom_histogram(aes(y=..density..), alpha = 1, position = "identity", fill = RDFZRed2) +
  labs(x="Grade (Percentage)", y="Density") +
  stat_function(fun=dnorm, args = list(mean=mean(Paper2Totals$Grade), sd=sd(Paper2Totals$Grade)), 
                color=RDFZRed4, size   = 1.4) +
  scale_x_continuous(limits = c(0,100)) + 
  theme_alan() +
  theme(legend.position = "none") +
  ggtitle(wrapper(OverallTitle2, width = 45))

ggsave(here(paste(OverallTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 10, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

Overall performance can be seen in the graph above. We had a fairly normal distribution of grades (Shapiro-Wilk Normality test: W= 0.91, p= 0.022) with a mean score of 73.8% (Median = 73.3%) and a standard deviation of 15.4%.

The high score on the test was 42 out of 45 marks (93.3%), which was achieved by 2 student(s).

The low score on the test was 12 out of 45 marks (26.7%), which was achieved by 1 student(s).

Letter Grades (Raw)

Typical raw letter grade boundaries for the school can be seen in the table below:

LetterGradesTable
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary
A* 89.5 100.00
A 79.5 89.49
B 69.5 79.49
C 59.5 69.49
D 49.5 59.49
E 39.5 49.49
U 0.0 39.49
Note:
Note this includes rounding of grades like 79.5% up a grade boundary

If we assign grade boundaries based on these we can see the following distribution of grades

GB1Title2 <- paste(Class, Eval, "Paper 1", "Letter Grades (Raw)", sep = " - ")
  
Paper2Totals %<>%
  mutate(Letter = cut(x= Paper2Totals$Grade, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
  mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*")))
    
ggplot(data=Paper2Totals, aes(x=Letter, fill = Letter)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(GradeColors[7], rev(GradeColors[1:5]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade", y="Count") +
  theme_alan() +
  ggtitle(wrapper(GB1Title2, width = 45))

ggsave(here(paste(GB1Title2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

LetterCounts2 <- count(Paper2Totals$Letter)

  LetterBoundaries2  %<>%
    mutate(Count2 = plyr::mapvalues(Letter, from = LetterCounts2$x, to = LetterCounts2$freq))  %>%
    mutate(Count2 = as.numeric(Count2))  %>%
    mutate(Count2 = replace_na(Count2, 0))

This is actually a pretty good distribution of Grade Boundaries, suggesting that performance on this assessment was at a pretty high level: A total of 3 students earned an A or A-star, with only 11 students earning an E or a U on the exam. The total counts of students in each grade category can be seen in the table below

LetterBoundaries2  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary", "Paper 1", "Paper 2")) %>%
    knitr::kable(caption = "RDFZ Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 
RDFZ Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary Paper 1 Paper 2
A* 89.5 100.00 2 2
A 79.5 89.49 1 1
B 69.5 79.49 3 3
C 59.5 69.49 4 4
D 49.5 59.49 5 2
E 39.5 49.49 6 5
U 0.0 39.49 7 6

Between Class Performance

How do the two classes for AS Psychology compare to one another on Cambridge Mock Exam - Paper 2?

Below, you can see a histogram of performance between the two classes.

BetweenTitle2 <- paste(Class, Eval, "Paper 2", "Histogram of Grades by Class)", sep = " - ")

#Histograms by Class
JuneP2Grades <- subset(Paper2Totals, Teacher == "June")
AlanP2Grades <- subset(Paper2Totals, Teacher == "Alan")

ggplot(data=Paper2Totals, aes(x=Grade, fill = Teacher)) +
  geom_histogram(aes(y=..density..), alpha = 0.75, position = "identity") +
  labs(x="Grade (Percentage)", y="Density") +
  stat_function(fun=dnorm, args = list(mean=mean(JuneP2Grades$Grade), sd=sd(JuneP2Grades$Grade)), color=RDFZRed1, size = 1.2) +
  stat_function(fun=dnorm, args = list(mean=mean(AlanP2Grades$Grade), sd=sd(AlanP2Grades$Grade)), color=RDFZRed4, size = 1.2) +
  scale_fill_manual(values= c(RDFZRed4, RDFZRed1)) +
  scale_x_continuous(limits = c(0,100)) +
  theme_alan() +
  ggtitle(wrapper(BetweenTitle2, width = 45))

ggsave(here(paste(BetweenTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 10, height = 4, units = c("in", "cm", "mm"),
       dpi = 600)

testcompP2 <- t.test(JuneP2Grades$Grade, AlanP2Grades$Grade, warning = FALSE, message = FALSE)

We can see that there isn’t much difference at all between the classes. Alan’s AS Psychology class averaged a grade of 73.7% (sd = 11.8%), while June’s class averaged a grade of 73.8% (sd = 18.8%). This difference is not significant: t(20.2) = 0.03, p= 0.98.

Between Paper Comparison

It should be fairly obvious from the above analyses that the results of Paper 1 and Paper 2 were quite different from one another. Recall that the average performance on Paper 1 was 82%, while for Paper 2 the average was 73%. The distributions of grades in the two papers can be seen relative to one another in the Histogram below:

#Creating a Dataframe with Scores for both tests
MockTotals <- rbind.data.frame(Paper1Totals, Paper2Totals)

ComparisonTitle <- paste(Class, Eval, "Comparison of Paper 1 and Paper 2", sep = " - ")

#Histograms 
ggplot(data=MockTotals, aes(x=Grade, fill = Paper)) +
  geom_histogram(aes(y=..density..), alpha = 0.6, position = "identity") +
  labs(x="Grade (Percentage)", y="Density") +
  stat_function(fun=dnorm, args = list(mean=mean(Paper1Totals$Grade), sd=sd(Paper1Totals$Grade)), color=RDFZRed4, size = 1.2) +
  stat_function(fun=dnorm, args = list(mean=mean(Paper2Totals$Grade), sd=sd(Paper2Totals$Grade)), color=RDFZRed1, size = 1.2) +
  scale_fill_manual(values= c(RDFZRed4, RDFZRed1)) +
  scale_x_continuous(limits = c(0,100)) +
  theme_alan() +
  ggtitle(wrapper(ComparisonTitle, width = 45))

ggsave(here(paste(ComparisonTitle, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 10, height = 4, units = c("in", "cm", "mm"),
       dpi = 600)

#Adding Scores for Omnibus Score
TestMaxVal <- P1MaxVal + P2MaxVal

MockTotals2 <-
  Paper1Totals %>%
    mutate(sum2 = as.numeric(plyr::mapvalues(Student, from = Paper2Totals$Student, to = Paper2Totals$sum)))  %>%
    mutate(Grade2 = as.numeric(plyr::mapvalues(Student, from = Paper2Totals$Student, to = Paper2Totals$Grade))) %>%
    setNames(c("Paper", "Teacher", "Student", "Anon", "Score1", "Grade1", "Letter1", "Score2", "Grade2")) %>%
    mutate(Score3 = Score1 + Score2) %>%
    mutate(Grade3 = round(Score3/TestMaxVal*100, 1)) %>%
    mutate(Letter = cut(x= Grade3, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*"))) %>%
    mutate(Diff = Score1 - Score2)

#T test comparing values
testcompP1P2 <- t.test(MockTotals2$Grade1, MockTotals2$Grade2, paired = TRUE, warning = FALSE, message = FALSE)

#Correlation of Values
P1P2Corr <- cor.test(MockTotals2$Grade1, MockTotals2$Grade2)

The difference between the scores on the two tests is significant: t(25) = 3.22, p= 0.004. This is a highly robust difference - of the 26 students in AS Psychology, a total of 16 scored lower on Paper 2 than on Paper 1 by an average of 5.2 points, compared to the 7 students who scored higher on Paper 2 by an average of 2.3 points.

Despite Paper 2 being more difficult than Paper 1, scores between the two were highly correlated with one another (Pearson’s r= 0.81; t(24)= 6.85, p= <0.001). A graph of the correlation can be seen below:

ComparisonTitle2 <- paste(Class, Eval, "Correlation of Paper 1 and Paper 2 Scores", sep = " - ")

ggplot(data=MockTotals2, aes(x=Grade1, y = Grade2)) +
  geom_point(color = RDFZRed3, size = 2) +
  geom_smooth(method = "lm", se = FALSE, size = 1.2, color = RDFZRed4) +
  geom_label_repel(aes(label = Anon)) +
  scale_x_continuous(breaks = c(25, 50, 75, 100), limits= c(20,100) ) +
  scale_y_continuous(breaks = c(25, 50, 75, 100), limits= c(20,100) ) +
  labs(x="Paper 1 Grade", y="Paper 2 Grade") +
  theme_alan() +
  ggtitle(wrapper(ComparisonTitle2, width = 45))

ggsave(here(paste(ComparisonTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 6, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

The difficulty of Paper 2 relative to Paper 1 is a bit surprising - for AS Psychology results are typically pretty similar between the two papers, with Paper 2 actually being slightly easier. This might suggest that we chose a difficult set of questions for Paper 2, or that we have given our students less practice handling Paper 2 style questions. Likely both of these are true.

Grade Correction

There are broadly two ways that we can correct grades on these tests. First, we can correct for any mistakes we made as teachers - either by giving unfair questions, questions that are too hard, or by not preparing our students adequately for certain types of questions. The second is to curve the entire grade upward to be in line with achievement goals for the course and to align our results with what students could expect from their actual CAIE exams.

Curving Paper 2

As noted above, our students performed significantly worse on Paper 2 than on Paper 1, counter to what is typical with CAIE examinations in AS Psychology. As such, we will be “correcting” Paper 2 Grades to make them more in line with the grades from Paper 1. On average, students scored 2.6 points higher (out of 45) on Paper 1. To be generous, we decided to round this up, and awarded all students an additional 3 points on their Paper 2 scores (capped at a maximum score of 45).

This change results in the Distribution of Grades seen below:

#Curving and Assigning Letter Boundaries
MockTotalsCurved <-
  MockTotals2 %>%
    subset(select = c("Teacher", "Student", "Anon", "Score1", "Score2")) %>%
    mutate(Score2 = Score2 +3) %>%
    mutate(SumScore = Score1 + Score2) %>%
    mutate(Grade = round(SumScore/TestMaxVal*100,1)) %>%
    mutate(Letter = cut(x= Grade, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
    mutate(Letter = factor(Letter, levels =c("U", "E", "D", "C", "B", "A", "A*"))) 


#Plotting Distribution of Letter Grades
CurveTitle1 <- paste(Class, Eval, "Distribution of Grades after Curving Paper 2", sep = " - ")
  

ggplot(data=MockTotalsCurved, aes(x=Letter, fill = Letter)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(GradeColors[7], rev(GradeColors[1:4]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade", y="Count") +
  theme_alan() +
  ggtitle(wrapper(CurveTitle1, width = 45))

ggsave(here(paste(CurveTitle1, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

Unsurprisingly, when we curve this up the overall distribution looks a lot like the distribution did for Paper 1 alone. So there isn’t much to say about this.

Applying an Overall Curve

Rather than correcting for poor performance on individual questions, Cambridge curved performance on both Papers as a whole (for letter Grades not include A-star) and for overall performance.

Here, we will take the second of these approaches.

It should be noted that for SY Option students (AS Psychology Students taking only Paper 1 and Paper 2) Cambridge does not set an A-star level. We will, however, do so here.

In 2019, Cambridge set the boundaries below for their grade levels:

Letters <- c("A*", "A", "B", "C", "D", "E", "U")
MinVal <- c(94, 85, 75, 66, 57, 49, 0)
MaxVal <- c(120, 93, 84, 74, 65, 56, 48)

CambridgeBoundaries <- 
  cbind.data.frame(Letters, round(MinVal/120*100,2), round(MaxVal/120*100,2)) %>%
    setNames(c("Letter", "Bottom", "Top")) 

TableNote <- "These are transformed from the Raw Score Boundaries out of 120 marks from the 2019 Cambridge Psychology Exam" #to be wrapped

CambridgeGradesTable <-
  CambridgeBoundaries  %>%
    setNames(c("Letter Grade", "Bottom Boundary", "Top Boundary")) %>%
    knitr::kable(caption = "Cambridge Letter Grade Boundaries", row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) %>%
    footnote(general = wrapper(TableNote, width = 40) )

CambridgeGradesTable
Cambridge Letter Grade Boundaries
Letter Grade Bottom Boundary Top Boundary
A* 78.33 100.00
A 70.83 77.50
B 62.50 70.00
C 55.00 61.67
D 47.50 54.17
E 40.83 46.67
U 0.00 40.00
Note:
These are transformed from the Raw
Score Boundaries out of 120 marks from
the 2019 Cambridge Psychology Exam

These move every grade boundary down by approximately 10 percent. If we were to apply this curve to our AS Psychology grades, they would be distributed thusly:

#Curving and Assigning Letter Boundaries
MockTotalsCurved %<>%
    mutate(`Letter (Revised- Cambridge)` = cut(x= Grade, 
                      breaks = c(CambridgeBoundaries$Bottom,100), 
                      labels= map_df(CambridgeBoundaries,rev)$Letter)) %>%
    mutate(`Letter (Revised- Cambridge)` = factor(`Letter (Revised- Cambridge)`, levels =c("U", "E", "D", "C", "B", "A", "A*"))) 


#Plotting Distribution of Letter Grades
CurveTitle2 <- paste(Class, Eval, "Distribution of Grades after Applying Full Cambridge Curve", sep = " - ")
  

ggplot(data=MockTotalsCurved, aes(x=`Letter (Revised- Cambridge)`, fill = `Letter (Revised- Cambridge)`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(GradeColors[7], rev(GradeColors[1:3]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade  (Cambridge Curved)", y="Count") +
  theme_alan() +
  ggtitle(wrapper(CurveTitle2, width = 45))

ggsave(here(paste(CurveTitle2, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

This would almost certainly be too much of a curve for a variety of reasons

  • We almost certainly grade the longer-format questions easier than Cambridge does (i.e. we doubt that Cambridge gives many 10s to Evaluate/Design questions, but obviously with Cambridge’s data never being opened we cannot know)
  • We have already curved up the Paper 2 Grades
  • Students were provided with a number of practice tests, included in which were some of the exact questions given on these Mock Exams
  • We want students to remain motivated to continue to study for their CAIE examination

Collectively these three things gave students an advantage over their probable Cambridge Exam grades. Because of this, we chose to apply a lesser curve (and one that would help our lowest-performing student, who was unaffected by the Cambridge Curve) and to award all of our students an additional 2 marks, capped at a total of 90 (100%). After applying these corrections we kept the actual grade boundaries at their standard school values (e.g. A* = 90%+).

The distribution of grades after this adjustment can be seen below:

#Curving and Assigning Letter Boundaries
MockTotalsCurvedFinal <-
  MockTotalsCurved %>%
    mutate(SumScore = SumScore + 2) %>%
    mutate(SumScore = ifelse(SumScore > 90, 90, SumScore)) %>%
    mutate(Grade = round(SumScore/TestMaxVal*100,1)) %>%
    mutate(`Letter (Revised)` = cut(x= Grade, 
                      breaks = c(LetterBoundaries$Bottom,100), 
                      labels= map_df(LetterBoundaries,rev)$Letter)) %>%
    mutate(`Letter (Revised)` = factor(`Letter (Revised)`, levels =c("U", "E", "D", "C", "B", "A", "A*"))) %>%
    subset(select = c("Teacher", "Student", "Anon", "Score1", "Score2", "SumScore", "Grade", "Letter (Revised)")) %>%
    setNames(c("Teacher", "Student", "Anon", "Paper 1", "Paper 2 (R)", "Total (R)", "Grade (R)", "Letter"))

#Plotting Distribution of Letter Grades
CurveTitle3 <- paste(Class, Eval, "Distribution of Grades after Final Curve", sep = " - ")

ggplot(data=MockTotalsCurvedFinal, aes(x=`Letter`, fill = `Letter`)) +
  geom_bar(stat = "count", position = pd, width = 0.8) +
  scale_x_discrete(drop = FALSE) +
  scale_fill_manual(values = (c(GradeColors[7], rev(GradeColors[1:4]))  )) +  #Janky because of missing grade boundaries (no students in bins)
  labs(x="Letter Grade  (Revised)", y="Count") +
  theme_alan() +
  ggtitle(wrapper(CurveTitle3, width = 45))

ggsave(here(paste(CurveTitle3, ".png", sep = "")), plot = last_plot(), device = NULL, path = NULL,
       width = 8, height = 6, units = c("in", "cm", "mm"),
       dpi = 600)

Tables

Tables for Engage

# Alan
AlanTitle <- paste(Class, Eval, "Final Grades - Alan", sep = " - ")

AlanTable <- 
  MockTotalsCurvedFinal %>%
    subset(Teacher == "Alan") %>%
    subset(select = -Teacher) %>%
    arrange(-`Grade (R)`) %>%
    knitr::kable(caption = AlanTitle, row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

save_kable(AlanTable, paste(AlanTitle, ".png", sep = ""))

# June
JuneTitle <- paste(Class, Eval, "Final Grades - June", sep = " - ")

JuneTable <- 
  MockTotalsCurvedFinal %>%
    subset(Teacher == "June") %>%
    subset(select = -Teacher) %>%
    arrange(-`Grade (R)`) %>%
    knitr::kable(caption = JuneTitle, row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

save_kable(JuneTable, paste(JuneTitle, ".png", sep = ""))

Anonymised Tables

# Alan
AnonTitle <- paste(Class, Eval, "Final Grades - Anonymised", sep = " - ")

AnonTable <- 
  MockTotalsCurvedFinal %>%
    subset(select = -c(Student, Teacher)) %>%
    arrange(-`Grade (R)`) %>%
    knitr::kable(caption = AnonTitle, row.names = F) %>%
    row_spec(0, bold = T, color = "white", background = RDFZRed3)%>%
    kable_styling(full_width = FALSE, 
                  bootstrap_options = c("striped", "hover", "condensed"),
                  fixed_thead = TRUE) 

save_kable(AnonTable, paste(AnonTitle, ".png", sep = ""))

AnonTable
AS Psychology - Cambridge Mock Exam - Final Grades - Anonymised
Anon Paper 1 Paper 2 (R) Total (R) Grade (R) Letter
A15 44 43 89 98.9 A*
A18 42 45 89 98.9 A*
A23 41 44 87 96.7 A*
A14 43 42 87 96.7 A*
A20 39 45 86 95.6 A*
A24 39 43 84 93.3 A*
A5 42 39 83 92.2 A*
A10 39 42 83 92.2 A*
A11 41 39 82 91.1 A*
A26 38 42 82 91.1 A*
A6 37 42 81 90.0 A*
A21 40 34 76 84.4 A
A22 38 36 76 84.4 A
A9 35 36 73 81.1 A
A3 35 35 72 80.0 A
A1 37 33 72 80.0 A
A19 35 34 71 78.9 B
A17 34 34 70 77.8 B
A2 29 36 67 74.4 B
A16 29 35 66 73.3 B
A7 31 32 65 72.2 B
A8 28 33 63 70.0 B
A25 35 26 63 70.0 B
A12 32 29 63 70.0 B
A13 32 27 61 67.8 C
A4 15 15 32 35.6 U

By-Question Analysis and Comments

In the sections below, we analyse the AS Psychology - Cambridge Mock Exam responses on a question by question basis. For each question, we: * Show the distribution of student grades obtained * Show the Cambridge Mark Scheme for the question * Discuss any general comments on student responses, including where students typically went wrong * Provide two exemplar responses + One exemplar response chosen from a student who gave a strong response to the question + One exemplar response provided by the Teacher (you should note that this exemplar response will be at a much higher level than you would be expected to provide in either AS or A2 Psychology)

Paper 1

Question 1a-

QNumber <- 1

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for a brief aim
    • “To improve the eyes test”
    • “To test for theory of mind”
  • 2 marks for a full aim/detailed aim
    • To test people on a revised Eyes Test to see if some of the original deficits were no longer seen
    • To see if females would score higher on the eyes test compared to males

Comments

Every single one of you guys got the full two marks here - Good Work

However, this probably means that I was too lenient with the grading

So pay attention, especially if you received a “2, probably 1” as your grade here

Exemplar Response (Student)

To test the performance and score of Autism Spectrum participants on the revised eye test. To swee whether the deficit on the previous eye test is replicated.

Exemplar Response (Teacher)

The main aim of Baron-Cohen et al. was to investigate the relationship between autism (Autism Quotient Scores) and Theory of Mind (Eyes test scores) to investigate whether the Improved Eyes Test was a valid measure for assessing Theory of Mind.

Question 1b-

QNumber <- 2

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 2 marks available for the results of two groups (a comparison is needed)
    • The AS/HFA group scored the highest (1 mark) compared to students/IQ matched controls (1 mark)
  • 1 mark for the correct use of data for at least one group
    • The average score for the AS/HFA group was 34.4/50

Comments

Performance was okay here, other than many students losing marks for not supplying data

You know how I feel about memorising data (it’s useless, not something psychologists do, and doesn’t test your understanding of anything), but its unfortunately part of Cambridge

  • However you should remember that data is only ever at most worth one mark on any given question - i.e. memorising it shouldn’t be what you prioritize

Note: You cannot simply refer to groups with non-descriptive identifiers like “Group 1”, “Group 3”, etc.

Exemplar Response (Student)

The AQ test score for the AS/HFA group ishigher than the score from Group 3, which are the students from University of Cambridge. AQ score for AS/HFA group has a mean of 34, which is higher than Group 3’s mean.

Exemplar Response (Teacher)

Baron-Cohen et al. found that participants who had been screened for High Functioning Autism scored the highest on the Autism Spectrum Quotient (M= 34.4/50); highest than both non-autistic students (M=19/50) and IQ-matched controls from the normal population (M=18/50).

Question 2a-

QNumber <- 3

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for the nature side of the argument
    • The nature side of the debate is about what behaviors we are born with (1 mark)
  • 1 mark for the nurture side of the argument
    • The nurture side of the debate is about what we learn in our lives

Comments

Most of you got this, so good work

Make sure you are completely explicit about what is nature and what is nurture
* i.e. “The nature/nurture debate is about whether behaviors are inborn or are learned” isn’t a creditable full mark response

Exemplar Response (Student)

Nature refers to the behaviors can be thought from genetic, that people are born with certain characteristics. However, nurture refers to the behaviors can be determined from previous experience after people born. Therefore, this lets a debate.

Exemplar Response (Teacher)

The nature-nurture debate refers to the debate about the best explanation for animal and human behavior. According to the nature side of the debate, behaviors are in-born - determined by our biology and fixed at birth. According to the nurture side of the debate, behaviors are acquired from the environment - our interactions with others organisms and the things we learn along the way.

Question 2b-

QNumber <- 4

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for each relevant point made
    • Alex was taught/trained how to use same/different, so this was learned
    • Alex was rewarded for his efforts and got better at the skill of same/different
    • Alex was taught to label colors and materials
    • Alex learned skills through operant conditioning/social learning/Model-Rival technique
    • Alex could transfer his knowledge to novel objects which could only happen if he had truly learned labels
    • Alex demonstrated behaviors that are not common among wild parrots
    • This shows that Alex was not born with these abilities
    • Alex had already learned some vocabulary in previous studies

Comments

Everyone got at least two marks here, which is good

However, you need to pay attention to what a question is worth points-wise
* For a question like this that is worth 4 marks, you need to be certain that you’re saying 4 things!

Exemplar Response (Student)

Duncan is correct, because in the study by Pepperberg the African Grey parrot Alex does not born with the skill of labelling the concepts vocally. There is a long training process. Alex was trained for years to learn how to speak, and then spend months to understand the concept of same and different. THe study proves that learning is important. Through operant learning, parrots can gain skills taht they are not born with. Another African Gray Parrot cannot perform the task like Alex does. For example getting 77.6% correct in the first trial, because Alex has been trained. This supports nature.

Exemplar Response (Teacher)

Duncan suggests that Pepperberg’s results with Alex support the nurture side of the debate for a number of reasons. First, Alex has a history of learning language in Pepperberg’s laboratory, including having already learned the words for different colors and shapes of objects. Much like in the current experiment, these skills were gained through conditioning. Second, Alex acquired the linguistic categories taught in this study through a specific form of learning known as the Model-Rival approach. Third, Alex could transfer his knowledge to novel objects. These all support the nurture debate because Alex is an African Gray parrot, who is not normally able to use language in this way. Alex’s acquisition of abilities that are not shared with other parrots suggests that Alex acquired them via learning and interaction with his environment.

Question 3a-

QNumber <- 5

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for each correct point made
    • Participants were woken at different increments of time (during REM sleep)
    • They were asked to estimate the amount of time that they had been dreaming
    • To the nearest minute
    • They were not given a fixed choice of how to respond

Comments

A tough question that many of you missed entirely

For my class, this is a part of the design I covered explicitly in the review despite no one asking me about it directly

Pay attention to your teachers - if we are voluntarily covering something again, it is probably important

Exemplar Response (Student)

Researchers at first just simply asked the participants about their estimation of sleep duration in open-ended questions and record the answer in qualitative data and correctness in quantitative data.

Exemplar Response (Teacher)

In the first version of their study, Dement & Kleitman woke their participants at random times during REM sleep and asked the participants to estimate how long they had been sleeping for, rounded to the nearest minute.

Question 3b-

QNumber <- 6

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for each correct point made
    • Participants were woken at either 5 or 15 minutes after the onset of REM sleep
    • They were asked to choose whether they had been dreaming for 5 or 15 minutes

Comments

No additional comments

Unfortunately some of you put this revised version of the measure as your answer for the previous question, then talked about something else (incorrect) here

Exemplar Response (Student)

Participants get into REM sleep with electrode attached on the scalep. After 5 or 15 minutes woke by door bell. THey were asked to estimate their length of REM sleep by choose 5 minutes or 15 minutes

Exemplar Response (Teacher)

In their initial procedure, Dement & Kleitman found that participants could not accurately determine how long they had been sleeping. To make this question both easier to answer and to analyse, Dement & Kleitman instead woke participants after either 5 or 15 minutes of REM sleep and asked them whether they had been dreaming for 5 or 15 minutes, meaning they could score responses as simply correct or incorrect.

Question 4a-

QNumber <- 7

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for each correct point made
    • Names mentioned on the tape as lures
    • Any new name
    • Words like “sister” were explicitly just ignored (not counted as responses OR false alarms), as were other things like names of places!

Comments

This question is poorly worded by Cambridge, which led to many of you guys giving weird response like “Jeff”

That’s technically correct - but so would be any name other than the 8 names in the study (you could just write down your own name)

However, the mark scheme suggests that Cambridge was looking for something else (which makes more sense to ask)

Revised Question: Identify two criteria that were used for determining whether a name provided during the Recall Stage of Andrade (doodling) would be recorded as a false alarm

Exemplar Response (Student)

False alarm oppose when participant named a lure in the recording or a named a new name (e.g. Dog, Alex)

Exemplar Response (Teacher)

In the recall phase of Andrade, participants were asked to recall the eight names that had been mentioned in the recording. If participants provided either any of the names that had been included in the recording as “lures” (names that were mentioned as NOT attending the party), or any name not mentioned in the recording, this was scored as a “False Alarm”. Both other descriptors like “sister” and non-names (e.g. location of places) were not included as either correct recall or as false alarms.

Question 4b-

QNumber <- 8

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for suggesting problem
    • Might not be a representative sample
    • The sample was gender biased
  • 1 mark for contextualising with the study
    • All were members of a Psychology Unit participant panel and so might have been more motivated
    • There were many more females so the results may not generalise to males, for whom doodling might differentially affect memory

Comments

Pretty good here

Most of the students who lost marks here were just incorrect (i.e. they wrote something wrong about the sample)

But the responses could be better and the link to the study itself could be stronger (especially as we prepare for A2 Psychology)

Exemplar Response (Student)

Samples were recruited from the psychological panel, they may mroe interested in psychology and show more motivation toward the study. The result therefore cannot generalize to normal people.

Exemplar Response (Teacher)

One problem with the sample used in Andrade is a problem common to opportunity samples obtained from university populations : generalisability. In addition to the issue that university students tend to be of similar ages and socioeconomic status, they are also often more knowledgeable about psychology, and so might be more likely to be highly motivated to participate, or to figure out the aims of an experiment and thus to produce demand characteristics. For example in this case they may have been more likely to realize that the deception was false and that they would indeed be tested on their memory for the names or places mentioned in the recording.

Question 4c-

QNumber <- 9

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for a brief conclusion
    • Doodling aids concentration
  • 2 marks for a detailed conclusion
    • Recall performance was better for doodlers than for controls both in terms of monitored (names) and incidental (places) information

Comments

Good work here

Just be certain to earn that second mark!

Exemplar Response (Student)

Doodling on a piece of paper can actually aid concentration towards the primary boring task

Exemplar Response (Teacher)

One conclusion of Andrade was that doodling aided recall of both monitored information (names) and incidental information (places) mentioned in the recording. However, doodlers also had higher monitoring performance: they wrote down more correct names while listening. This makes it unclear whether doodling aided initial attention, memory, or both.

Question 5a-

QNumber <- 10

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for each correct point
    • Each item had to be rated on a scale from 1-8
    • 1= Definitely did not happen (before the age of 10)
    • 8= Definitely did happen (before the age of 10)

Comments

Most of you identified that this was an 8 point scale

However, many of you wrote that the FHI rates how much people like food, which isn’t right!

To remember this, focus on the word “History” - which refers to things that happened in the past.

Exemplar Response (Student)

The rating scale was used to find out the food preference in childhood. 1 (definitely not happened) to 8 (definitely did happen before the age of 10)

Exemplar Response (Teacher)

Laney et al. administered a Food History Inventory to their participants twice. Participants were asked to provide a rating from 1 to 8 about their history (before age 10) with a large number of foods to statements like “Loved asparagus the first time you tried it”. On this scale, a 1 representd “Definitely did not happen” and an 8 represented “Definitely did happen”.

Question 5b-

QNumber <- 11

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for a result
    • Only the Love group’s ratings changed after manipulation
    • The Control group’s ratings on the FHI did not change
  • 1 mark for correct data
    • The Love group’s average rating increased by 2.6 points
    • The Control group’s average rating increased by 0.2 points

Comments

Not much to say here - data use was again weak (but again, not something you should focus on)

Exemplar Response (Student)

In Food History Inventory, Love group has significant higher score by increasing 2.6 point on false memory believe (like asparagus as a child) than control group which only increase by 0.2 point.

Exemplar Response (Teacher)

Laney et al. were particularly interested in how participants would respond to their focal question on their Food History Inventory: “I loved Asparagus the first time I had it” after having a false memory suggested to them. They found that FHI ratings on this item remained the same for participants in the control group (they increased by a non-significant 0.2 points), but increased significantly for participants in the false memory condition of the experiment (by 2.6 points).

Question 6-

QNumber <- 12

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 3 marks for the answer from Lok
    • Data collected was kept confidential
    • No individual data was published
    • All we know is that these were students from the University of Minnesota
  • 3 marks for the answer from Hiruni
    • The participants were deliberately deceived about the purpose of the experiment (test of Suproxin)
    • The participants were told the wrong information about the injection
    • For example one gorup would told that they would experience numbness and/or headaches after injection

Comments

Responses were very strong here - just slightly lacking in sufficient detail for some of you

You should make sure to use the appropriate psychological terminology

Pay attention to the question - you are being asked to explain one* explain one reason, not to list multiple reasons, so you can’t get credit just for listing a bunch of things.

Exemplar Response (Student)

The study is ethical because participants were give informed consent. They were told that during the experiment they will get injection of suproxin and researchers asked if they agree or not. THis gives participants an overview of the study, so if they don’t want to get injection, they still have chance to withdraw. For example, there is one participant drop out before experiment.

However, the study is also unethical becuase there is protection problem. First, participants were given an injection which may cause physiological pain. Also, participants were under euphoria and anger condition, so they may have psychological stress and emotional change. Thus, they may not have the same emotional state as they enter in the study.

Exemplar Response (Teacher)

Hiruni might be correct that Schachter and Singer’s experiment was unethical because of the possibility that they failed to adequately protect their experimental participants. For example, the participants in this study were injected with Epinephrine, which could produce a dangerous physiological response for which they were unprepared. This was made worse by the fact that many of the participants were either ignorant to, or deliberately misinformed, about the physiological response that would likely be produced by the injection, which they were told was a vision-enhancing drug called Suproxin.

However, Lok might respond that the study was actually ethical because Schachter & Singer did everything they could to protect their participants without revealing the purpose of the experiment. They obtained ethical approval because they consulted with the University Health center to ensure that their participants were in good enough health that they would be unlikely to experience significant negative effects from the administered epinephrine. Additionally, the epinephrine injection was given by a trained medical professional who was on site in case of any adverse reaction to the drug

Question 7-

QNumber <- 13

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for each partial answer/partially correct answer
    • Girls were more verbally aggressive (with a same sex model)
    • Boys had a higher aggression rate than girls
  • 2 marks for each full answer/fuly correct answer
    • Boys who witnessed an aggressive male model had the highest imitative aggression score
    • Children exposed to the same sex model imiated them more than children exposed to opposite sex models

Comments

Note that this question did not require you to provide data in your response - it was just asking you to discuss the quantiative data, rather than some of the qualitative results from the study

Overall, responses were pretty strong here. Good work

Exemplar Response (Student)

The quantitative results rae observation that recorded via behavioural checklists. It was found that the aggressive group were likely to reach higher physical aggression score than non-aggressive gorup. And boys are more likely to show physical aggression with a male model than with a female model.

Exemplar Response (Teacher)

The study by Bandura et al. found a number of differences on their measures of “imitative aggression”. First, they found that overall, children were more likely to imitate the behavior of a same sex model; for example, boys imitated more behavior when they observed a male model than when they observed a female model. Second, girls were more likely to imitate verbal aggression than boys, who were more likely to imitate physical aggression.

Question 8-

QNumber <- 14

FocalData <- 
  Paper1 %>%
    subset(Qnum == QT1$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 1 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT1$Number[QNumber], " - ", QT1$Question[QNumber], " [", QT1$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

Cambridge 10 Point Evaluate Mark Scheme

Cambridge 10 Point Evaluate Mark Scheme

Comments

Many of you are getting really strong with these responses - a total of 6 of you got 10/10 - great work

Some of the rest of you are still struggling a lot with structure - make totally explicit what it is that you’re talking about - i.e. have topic sentences to name your evaluation points and whether they are a strength or weakness right away.

For the love of all that is holy, you must discuss the named issue

Exemplar Response (Student)

The study by Milgram has lots of advantages. First, it has lots of controls including the scream from the tape recorder, the 45 volts shock trial, and the draw paper to eliminate demand characteristic. In the study, participants all are given a 45 volts electric shock before the experiment starrts to let them believe that the electricity was real and study indeed test the aim of punishment and learning. Therefore, participant will not guess the aim of the true study is not about electric punishment and behave in the way that they thought about the aim of the experiment. Because behavior of participants are nature without demand characteristic, researchers can get valid result, and make sure that the authority have indeed affect the level of electric shock given by the participants to learner, increase the validity of the experiment.

Second, the study had covert and objective observation. Participants did not know that they were beign observed, so they will behave more naturally, so the researchers can get the valid data and test whether obedience due to the order increase the validity. Moreover, observer make observation through the mind perspective can provide objective data which really reveal the mental state of participants giving a backup to the result of self-report of the pain level of learner. Researchers will confident that all the participants believed the experiment and give a valid result, researchers confident that the authoring higher affect the level of electric shock given to learner, increase validity.

However, there are some disadvantage. The study is generally unethical.It ignore the ethical guideline of protection, deception, and right to withdraw. Participants believe that they give the real electricity to the helpless learner. THey experience the extreme stress, according to the observation, they were trembling andsweting, therefore their psychological state change from before to after the experiment. Moreover, if the participants wanted to give up the experiment, researchers will give the probe like “please continue” to ban participant to leave, which rebel the guideline that participants can leave the study anytime and do not give the reason. Problem the ethical guideline makes the participants felt that they were not well protected and there will be less people want to participate the psychological experiment. However, rebelling some guideline is necessary to researchers because they wants to study obedience which happen at extreme stress and hard condition, so they have to do that.

Second, the study lacks mundane realism, which means that the task in the experiment is the daily task. In the study, asking people to give the elctric shocks to a helpless learnerr because he give the wrong pairs of words does not happen in real life, so it is not sure whether the result of teh study didn’t obedience can fit to the real life task

Exemplar Response (Teacher)

The study by Milgram is an important one in the history of psychology. The features that make it important, however, are not all positive - the study has both strengths and weaknesses that are important to consider when evaluating it. For strengths, I will discuss the use of observation as a research method, and the applicability of the study results to real life. For weaknesses, I will discuss the ethics of the study, as well as the validity of the results (specifically with reference to demand characteristics).

The first strength of Milgram et al. is that the observation of participant responses was as unobtrusive as possible. Although participants understood that their behavior was being watched (because of the presence of a stooge acting in the role of “experimenter”), they could not see the actual covert observer making notes on their behaviors while administering shocks to the “learner”, both in terms of the quantitative data (the level of shock they administered) collected and in terms of notes about their physiological and behavioral responses (qualitative data) to administering shocks, such as sweating, expressing doubt/remorse, etc. This method of observation thus avoided some possible demand characteristics while also enabling the researchers to record rich qualitative descriptions of how participants responded to the obedience requested of them.

Despite the fact that the use of semi-covert observation avoided some demand characteristics, a number of of issues with the design and interpretation of the data in Milgram et al. make its validity questionable. First, there is substantial evidence that participants were aware that they were unlikely to be administering potentially fatal shocks to the “learner”. Part of this is almost certainly to do with their understanding that they would not be asked to seriously harm someone else in the context of a study about learning. At the same time, some parts of the experimental apparatus like the shock generator were not especially convincing - attempting to look like a piece of scientific equipment while also labeling its highest levels with “XXX” - not exactly a scientific notation!. There is also evidence that the supposedly well-standardized prods to be given by the “experimenter” were not followed systematically. Collectively, these problems raise serious concerns about the validity of Milgram’s results.

These problems with the validity of Milgram’s results are compounded by many issues surrounding the ethics of the study. In Milgram’s study, participants were deceived about the purpose of the study, inadequately protected from the possibility of lasting harm, and in many cases not adequately debriefed about the purpose of the experiment and that they had not actually harmed the “learner”. In fact, the publication of Milgram’s book about his experiments was one of the factors that drove the American Psychological Association to reformulate their ethical guidelines in 1973.

Defenders of Milgram would likely respond to these criticisms by discussing a second strength of his work: that even if it isn’t perfect, an explanation for this type of behavior is necessary. At the time of Milgram’s original study, the world was attempting to grapple with the "banality of evil of Nazi atrocities during WWII. Subsequently, there have been many more wars and genocides, and psychology requires an explanation of these evils, especially regarding the question of whether “good people” are capable of doing evil things. So even if Milgram’s experiment was unethical, it might have been necessary to explain behavior and avoid such atrocities in the future."

Overall I think the weaknesses of Milgram outweigh the strength by a substantial margin. Although I am sympathetic to the idea that understanding these issues might be “necessary”, I think the problems of validity in Milgram’s work render this argument moot. If we could trust the data obtained and that it was not only valid but could also be generalised to the real world we might be able to ignore the large ethical problems of the study. However with the number of problems with the actual data and its interpretation, it seems that Milgram unethically treated participants for no real gain in scientific understanding."

Paper 2

Question 1a-

QNumber <- 1

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for naming the sampling technique
    • “Volunteer/Self-selected”
  • 1 mark for explaining how this was done by Milgram
    • participants gained by asking through advertisements

Comments

Almost all students god that this was opportunity sampling

Many of you failed to link this explicitly to the named study, which is part of the question!

We’ll see this problem many more times on this test.

Exemplar Response (Student)

Volunteer sampling, it was advertised and participants can participate based on their willingness, and can get rewards of money after the experiment

Exemplar Response (Teacher)

Milgram recruited his experimental participants by placing advertisements in local newspapers offering $4.50 for participating in a psychological experiment. Because the participants were self-selected, this was an example of volunteer sampling.

Question 1b-

QNumber <- 2

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for naming an advantage
    • participants are more committed to the task/less likely to drop out
    • sample is more likely to be representative
  • 1 mark for linking this to Milgram
    • participants would have been prepared to participate in a study on punishment
    • participants represented a sample that was less narrow than one recruited from Milgram’s university

Comments

Almost all of you failed to make the explicit link to Milgram

Many of you discussed “exclusion criteria” - i.e. only allowing certain types of people to participate. These are separate from sampling technique (i.e. we can (and often do) apply exclusion criteria to opportunity samples!)

Exemplar Response (Student)

Since they were volunteered, the drop-out rate will decrease, which means that all participants would be likely to finish the orders, for example to hurt the learner.

Exemplar Response (Teacher)

One advantage of volunteer sampling is that the self-selected group of participants is highly motivated and less likely to withdraw from the experiment or to fail to attend their appointment. Because they know the (false) goal of the task in advance of signing up, they are more likely to be comfortable with administering punishment.

Question 2a-

QNumber <- 3

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for a definitive answer with a direction + a reason

Comments

Most of you got this - great work

Exemplar Response (Student)

Directional hypothesis. Because it state a one-sided result and its detailed, instead of only pointing out recall relates to time period.

Exemplar Response (Teacher)

This is an example of a one-tailed hypothesis because it suggests a direction of the association between its variables. A two-tailed hypothesis for this study might be “Recall performance will be affected by the length of delay after exposure”.

Question 2bi-

QNumber <- 4

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for operationalising recall
    • the number of things recalled
    • the time taken to perform the recall

Comments

Your operationalisation requires… operationalisation - how do we arrive at the specific number that is used for analysis?

Exemplar Response (Student)

The amount of figures in a phone number they can recall and write down after a period of time (the phone number is offered by researchers)

Exemplar Response (Teacher)

Recall in this study could be operationalised as the number of correctly recalled names from a list of names, minus the number of false alarms (as was done in Laney et al.)

Question 2bii-

QNumber <- 5

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for operationalising short and long delays
    • e.g. short = 30 seconds, long = 10 years

Comments

Your operationalisation requires… operationalisation - what do the levels of the IV represent?

Be explicit about which level of your IV is which…

Exemplar Response (Student)

Some participants complete the recall task after 3 days (short delay) and others complete the task after 30 days (long delay).

Exemplar Response (Teacher)

Researchers could base the length of time between exposure and recall on distinctions between basic memory systems. Thus, they could operationalise short delays as 30 seconds (part of short term memory) and long delays as 10 minutes (part of long term memory)

Question 3ai-

QNumber <- 6

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for definition
    • Features of the setting which indicate the aim/change the participants responses

Comments

Great Work here overall

Exemplar Response (Student)

Demand characteristics are the features of experiments or setting that lead the participants behavior give away from the aim.

Exemplar Response (Teacher)

Demand characteristics refer to features of an experiment’s design that change the way that participants respond to questions. One type of demand characteristics arises when participants believe they have figured out the aim of the study (whether correct or not) and intentionally act in a way that will either confirm or refute the hypotheses of the experimenter.

Question 3aii-

QNumber <- 7

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for explanation of problem
    • Changes in the participant responses mean that differences in the DV are not due to the IV
    • Because participants are not responding only to the IV the findings will lack validity
    • Because it means researchers cannot judge causation

Comments

Pretty good work here. Most students who failed to score a point did so because their answer was unclear, or because they talked about reliability

Reliability has nothing to do with demand characteristics - in fact a test with very strong demand characteristics is likely more reliable (but this isn’t a good thing, because it is reliably measuring the wrong thing!)

Some of you also confuse social desirabilitty with demand characteristics - the two sometimes overlap but they are not the same thing!

Exemplar Response (Student)

As participants don’t show their real thoughts/reaction, the level of DV will be affected, so the relation between IV and DV may be wrongly judged

Exemplar Response (Teacher)

Demand characteristics are problematic because they obscure the relationship between the IV and the DV, which makes judging causation difficult. When participants choose to act in a certain way, instead of responding only to manipulation of the DV, their results cannot be applied to the aims of the study validly.

Question 3b-

QNumber <- 8

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for description of a way to avoid demand characteristics linked to the study
    • Ensure participants do not know that willingness to share is the aim of the study
    • Hide the questions about sharing among others (to distract participants/disguise the aim)

Comments

No comments of note

Exemplar Response (Student)

Do not use questionnaire, but use natural observation. In an observation, when participants are not aware of the observation, they behave naturally, and we can observe the real condition of sharing.

Exemplar Response (Teacher)

In the case of this study demand characteristics likely arise because the aim of the study is so transparent. They could be avoided by making the aim less clear, likely by adding some filler questions to the questionnaire to disguise that the goal is to study sharing.

Question 4a-

QNumber <- 9

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark suggesting an advantage
    • It produces functional information
    • It produces very detailed information about the location of activity
  • 1 mark linking directly to emotions
    • You can see which parts of the brain are active for different emotions
    • So you can look at brains of emotional and non-emotional people for differences

Comments

Another question where many of you lost marks for not explicitly linking to emotions

We’re seeing a pattern - this is a requirement for almost all Paper 2 questions!

And its a good idea anyways- answer the question you were asked!

Exemplar Response (Student)

It collects functional information which is objective and easy to analyses. Since the brain activity can show people’s situation when experiencing different emotions.

Exemplar Response (Teacher)

fMRI is a good candidate for studying emotions because it can give us access to detailed information about not only what parts of the brain are involved in processing emotional stimuli (e.g. the amygdala), but also how active those parts of the brain are when experiencing different intensities and types of emotions.

Question 4b-

QNumber <- 10

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark suggesting a disadvantage
    • The scanner is very small/claustrophobic
    • The scanner is very noisy
  • 1 mark linking directly to emotions
    • You cannot easily tell which emotions relate to the test vs fear of the situation
    • this could distract/frighten the participant, making the test of emotions less valid

Answers discussing the disadvantages of quantitative data are irrelevant - the question is about fMRI

Comments

Another question where many of you lost marks for not explicitly linking to emotions

In addition to that, many of you also didn’t explicitly link to fMRI!

Exemplar Response (Student)

The fMRI scanner is used movement of blood to investigate the emotion, however, movement of blood may also be affected by drugs, illness, or something else. So the results of the fMRI showed may not be caused by emotions

Exemplar Response (Teacher)

One disadvantage of using fMRI to study emotions comes from the fact that fMRI machines are large and stationary. This means that the measurements they produce of brain activity associated with emotions might not be natural - participants may not respond in the same way to emotional stimuli because of the artificial environment of the fMRI machine, including the fact that they might be afraid of the machine itself, which could activate emotional areas of the brain, obscuring the influence of the IV.

Question 5-

QNumber <- 11

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

Students may earn a maximum of 4 marks for any one definition.

  • Up to 4 marks for the definition of inter-rater reliability
    • It is a measure of whether different researchers are scoring consistently
    • Indicates whether two scorers are rating responses in the same way
    • If they are reliable, ratings should be positively correlated with each other
    • For example, if data from an interview are interpreted in the same way
    • For example, Bandura et al. checked the scoring of aggressiveness between their raters/observers
  • Up to 4 marks for a definition of test-retest reliability
    • A way to find out whether a measure is consistent over time/measures whether a test is consistent
    • calculated by using the same test on the same participants twice (close in time)
    • two results for each participant should be roughly the same
    • There should be a strong positive correlation between the two sets of scores
    • for example if an IQ test was done on the same people twice, each person should get the same score
    • For example if Baron-Cohen et al used the AQ in the same group twice, they should get the same AQ score each time

Answers discussing the disadvantages of quantitative data are irrelevant - the question is about fMRI

Comments

Pretty good overall here

However, many of you confused test-retest reliability with replication - these aren’t the same thing!

Exemplar Response (Student)

Inter-rater reliability is the extent to which different researcher interpreting same qualitative data will provide same result from same raw materials. IF the inter-rater reliability is high, it means taht the result could be more reliable and less biased. For example, in Bandura et al there are 2 observers record children’s aggression behaviors through a one-way mirror. It could avoid researcher misinterpreting the data (matter their own belief). These 2 observers have high inter-rater reliability.

Test-retest reliability means whether a participants respond to the same question is consistent over time. Same question to asked twice in a questionnaire early and later to see if the answer is the same. For example Baron-Cohen et al measure the AQ twice in a group to ensure the result.

Exemplar Response (Teacher)

Inter-rater reliability is a metric that is used to quantify the consistency with which two researchers categorize or rate behaviors. For example, in a structured observation in Bandura’s study of aggression, two independent raters scored children for how aggressively they behaved on the playground. Inter-rater reliability was calculated by comparing their aggression scores via correlation - and it was found that there was a strong positive correlation between the ratings of the two researchers (a high inter-rater reliability)

Test-retest reliability is a metric that is used to quantify the stability with which an instrument measures responses. Test-retest reliability is calculated by comparing the scores from two separate administrations of a test (usually relatively close in time) via correlation - if the scores are highly correlated with one another, we can consider the instrument to have good test-retest reliability. For example, if I take an IQ test one day and receive a score of 115, when I take another IQ test the next day I should receive a similar score (perhaps from 110 to 120).

Question 6a-

QNumber <- 12

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for a correct answer referring to both fixed and variable questions
    • Ask some fixed questions, then ones that are specific to the interviewee/differ between participants

Comments

Great work here overall

Exemplar Response (Student)

When some questions in interview are fixed, interviewer can add or change the order of the existed questions when asking to participants.

Exemplar Response (Teacher)

A semi-structured interview is midway between a structured interview, where all participants are asked the same questions in the same order (fixed questions) and an unstructured interview, where there are no fixed questions. In a semi-structured interview, the interviewer must ask a set of fixed questions, however they may also include variable questions or change the order they ask questions in depending on how participants respond.

Question 6b-

QNumber <- 13

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for any open question that relates to sleep and/or seasons
    • Describe how well you sleep in the winter
    • Explain whether you believe you sleep better in the summer or winter
    • Why do you think you sleep better in the summer?

Comments

Everyone got this mark. Good work

Exemplar Response (Student)

Why do you sleep better in the winter?

Exemplar Response (Teacher)

Freya could ask participants why they sleep better in the summer.

Question 6c-

QNumber <- 14

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for identifying an advantage
    • Different questions can be asked to different participants
    • There will be some standardisation between participants (making it more reliable)
  • 1 mark for linking to sleep or seasons
    • people may differ in their views about what ‘better sleep’ is, and so need different questions
    • so the opinions about winter versus summer can be compared more easily

Comments

On this question I was too nice on the linking part - i.e. many of you received a second mark without this explicit link!

Exemplar Response (Student)

Semi-structured interview makes a interview not only limited in the set of questions, but can also collect information from new ones, so the information will be rich and comprehensive. For example, Freya can ask the question about the reason of participants’ previous choice of season

Exemplar Response (Teacher)

The main advantage of semi-structured interviews is that they allow for the collection of easily comparable data due to fixed questions (e.g. we would have responses from every participant to the question “do you sleep better in winter or summer”) while also allowing us to capture rich descriptive data that captures the unique experiences and opinions of individual participants. We might, for example, find out that one participant sleeps badly in the summer because they are afraid of bees.

Question 7a-

QNumber <- 15

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for identifying the IV
    • When participants eat
    • Before or after eating

Comments

Overall good

Exemplar Response (Student)

Before eating lunch vs. after eating lunch

Exemplar Response (Teacher)

The Independent Variable in Olivia’s experiment would be the time at which participants eat relative to one they attend class - either before or after class

Question 7b-

QNumber <- 16

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for explanation of repeated measures design
    • the same group of participants in all levels of the IV/conditions
  • 1 mark for link to this experiment
    • all participants are tested both before and after eating

Comments

Overall good

Exemplar Response (Student)

This is when every participants in the study experience all levels of IV. This test with same participants should be conducted at both condition of before eating or after eating

Exemplar Response (Teacher)

A repeated-measures design refers to an experimental design where participants take part in multiple (usually all) conditions of an experiment. For this experiment, a repeated-measures design would involve all participants taking part in the eating before class and the eating after class condition.

Question 7ci-

QNumber <- 17

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for generic suggestion
    • By having different groups of participants do the conditions in different orders
  • 1 mark for link to this experiment
    • by having one group do the “before eating” one day and “after eating” the next day and the second group do the opposite.

Comments

Overall good

Exemplar Response (Student)

Half of the participants will have class before eating on the first day, then have class after eating on second day. Anotehr half of the participants will have class after eating on the first day, then have class before eating ont he second day.

Exemplar Response (Teacher)

Olivia could counterbalance her experiment using an ABBA design. This means that she could split her experimental participants into two groups, with one group participating in the “before eating” condition (A) on day 1 and the “after eating” condition (B) on day 2, and a second group participating in B then A.

Question 7cii-

QNumber <- 18

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • 1 mark for generic advantage
    • It controls for practice/fatigue effects
  • 1 mark for link to this experiment
    • if people were bored with the task the second time so they did not concentrate as much
    • if people were better at the task the second time so they looked like their concentration had improved

Comments

Missing Links

Exemplar Response (Student)

It could increase the validity and shows t hat the change in DV is due to the IV rather than other variables. FOr example, to eliminate order effects to make students feel fatigue and the ymay guess the iam of the study, thus, have higher score of concentration.

Exemplar Response (Teacher)

The primary advantage of counterbalancing is that it can avoid order effects, principally fatigue and practice effects. For example if this experiment was not counterbalances and all participants did the “before eating” condition on Day 1 and then the “after eating” condition on Day 2, they might perform better on Day 2 not because of the manipulation of the IV (they have eaten before class), but instead because of a practice effect where they are now more familiar with the task.

Question 8a-

QNumber <- 19

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

  • Three Major Omissions for a Correlational Study
    • What: Variable 1 (details of operationalisation)
    • What: Variable 2 (details of operationalisation)
    • How: Technique for producing/collecting data, i.e. procedure (tests, observations, questionnaires
  • The minor omissions are:
    • Where- location of participants when data is collected
    • Who- participants, sample size
    • Hypothesis
    • Description of how closed questions will be scored
    • Data analysis (e.g. scatterplot)
    • Ethics
Cambridge 10 Point Design Mark Scheme

Cambridge 10 Point Design Mark Scheme

Comments

All over the board here…

We obviously need more practice with these - some of you still aren’t operationalising variables!

This is a correlation… there is no IV/DV

You need two measured variables. These have to be measured in different ways - if you only have a single observation then of course the two things will be perfectly correlated!

Exemplar Response (Teacher)

  • Aim- To investigate the correlational relationship between kindness and obedience
  • Hypothesis- There will be a positive correlation between kindness and obedience
  • Covariable 1 (Kindness)- Scores on the kindness test, operationalised as the sum of scores (from 0-15) on each of five key questions about kindness
  • Coviariable 2 (Obedience)- Teaching ratings of obedience, operationalised as the average rating given by three teachers from 1 (completely disobedient) to 10 (completely obedient)
  • Sample- 50 students (25 female) chosen via opportunity sampling from RDFZ Chaoyang, aged from 15-18, selected semi-randomly from a larger pool of 200 participants
  • Procedures-
  • 1- Prior to the study, the researchers had all teachers at RDFZ Chaoyang provide ‘obedience ratings’ on a scale from 1 to 10 for each of their students
  • 2- From this group, the researchers selected a total 50 students based on the following criteria:
    • Each student must have been given at least three obedience ratings
    • The inter-rater reliability of their 3 closest ratings must be at least 0.9
    • The 50 students chosen must equally cover the range of obedience ratings from 1 to 10 (5 in each level)
  • 3- The 50 selected students were brought into a meeting room and told they were taking part in a study about student attitudes towards learning
  • 4- They were given a questionnaire consisting of 30 questions where they provided answers from 0 (not at all) to 3 (all of the time)
    • Included in these 30 questions were 5 focal questions about kindness, e.g. “I enjoy helping other students succeed”
  • 5- After participating, all participants were debriefed about the true goals of the study, and given the opportunity to retroactively withdraw their data/participation

Data Analysis- Data was analysed by calculating the correlation coefficient of the average obedience score for each participant with the sum of their scores on the kindness scale. Data was visualised using a scatterplot

Question 8b-

QNumber <- 20

FocalData <- 
  Paper2 %>%
    subset(Qnum == QT2$Number[QNumber]) %>%
    mutate(GradeF = factor(Grade, levels = c(0:Value[QNumber])))
    
FocalTitle <- paste(Class, Eval, "Paper 2 - ", sep = " - ")
FocalSubTitle <- paste("Question ", QT2$Number[QNumber], " - ", QT2$Question[QNumber], " [", QT2$Value[QNumber], " marks]", sep = "")

ggplot(data=FocalData, aes(x=GradeF)) +
  geom_bar(stat = "count", position = pd, width = 0.8, fill= RDFZRed3) +
  scale_x_discrete(drop = FALSE) +
  labs(x="Question Score", y="Count") +
  theme_alan() +
  ggtitle(wrapper(FocalTitle, width = 50), 
          subtitle = wrapper(FocalSubTitle, width = 100))

Cambridge Mark Scheme

Comments

Too many of you talked about ethics/sampling/samples here

Many of you ran out of time to answer this (fair enough)

Exemplar Response (Student)

I use questionnaire to detect and define the degree of participant’s obedience. However, the 10 question on the questionnaire is all about the obedience of different situatino, so participants may easily to figure out the aim of the study and give away the the aim, so next time, I will add some question like “what’s yoru favorite candy”, “What’s yoru favorite shoe brand”, which is not related to the obedience to reduce the demand characteristics.

Exemplar Response (Teacher)

My experiment may include some problems of generalisability because of the way that obedience measures are calculated. To be specific, all obedience ratings were given by teachers, and whether a student is (perceived to be) obedient at school may not give an accurate picture of how obedient they are overall. For example a student might be exceptionally obedient at school but a terror at home or in their free time. This would limit my ability to make generalised claims about the relationship between obedience and kindness.