The class had completed a quiz of 10 questions. The result was analysed to better understand how the students performed in class and whether there was any question that students needed help with.
pacman::p_load(readxl,tidyverse, reshape, lubridate, ggthemes, tinytex)
2 data-sets were given - answer key (“key”) and the students’ answers (“response”).
response <- read_excel("D:/Documents/Family/TKC/20180818_MTEBA/Academic/2019Sem1/EBA5001/3/Lab/assignment/data/response.xlsx", sheet = "Responses")
key <- read_excel("D:/Documents/Family/TKC/20180818_MTEBA/Academic/2019Sem1/EBA5001/3/Lab/assignment/data/key.xlsx", sheet = "key")
#view the data structure of the students' responses
response.factor <- response %>%
mutate_all(as.factor)
summary(response.factor)
## Student ID Q-1 Q-2 Q-3 Q-4 Q-5
## AX1001 : 1 FALSE:18 Integer: 1 FALSE: 6 FALSE:68 NA:75
## AX1002 : 1 TRUE :57 Numeric:74 TRUE :69 TRUE : 7
## AX1003 : 1
## AX1004 : 1
## AX1005 : 1
## AX1006 : 1
## (Other):69
## Q-6 Q-7 Q-8 Q-9 Q-10
## Error:65 Error:75 0 : 1 str(df) : 2 data.frame:70
## Even : 1 Error:74 summary(df):73 Error : 2
## Odd : 9 list : 2
## matrix : 1
##
##
##
#view actual students' response data
knitr::kable(
response[1:5, ],
caption = "Students' Answers")
| Student ID | Q-1 | Q-2 | Q-3 | Q-4 | Q-5 | Q-6 | Q-7 | Q-8 | Q-9 | Q-10 |
|---|---|---|---|---|---|---|---|---|---|---|
| AX1001 | TRUE | Numeric | TRUE | TRUE | NA | Error | Error | Error | summary(df) | data.frame |
| AX1002 | TRUE | Numeric | TRUE | TRUE | NA | Error | Error | Error | summary(df) | data.frame |
| AX1003 | TRUE | Numeric | TRUE | FALSE | NA | Error | Error | Error | summary(df) | data.frame |
| AX1004 | TRUE | Numeric | TRUE | FALSE | NA | Error | Error | Error | summary(df) | data.frame |
| AX1005 | TRUE | Numeric | TRUE | FALSE | NA | Error | Error | Error | summary(df) | data.frame |
#view the data structure of the key
key.factor <- key %>%
mutate_all(as.factor)
summary(key.factor)
## Q-1 Q-2 Q-3 Q-4 Q-5 Q-6 Q-7
## TRUE:1 Numeric:1 TRUE:1 FALSE:1 NA:1 Error:1 Error:1
## Q-8 Q-9 Q-10
## Error:1 summary(df):1 data.frame:1
#view actual key data
knitr::kable(
key[1, ],
caption = "Answer Key")
| Q-1 | Q-2 | Q-3 | Q-4 | Q-5 | Q-6 | Q-7 | Q-8 | Q-9 | Q-10 |
|---|---|---|---|---|---|---|---|---|---|
| TRUE | Numeric | TRUE | FALSE | NA | Error | Error | Error | summary(df) | data.frame |
It was important to take note of any missing data as it might affect the analysis or interpretation
missing = response %>%
filter(!complete.cases(.))
nrow(missing)/nrow(response) #no missing data
## [1] 0
Reshaping made it into a long format suitable for visualisation
response.reshaped <- response %>%
gather(key="question", value="answer", -`Student ID`)
key.reshaped <- key %>%
gather(key="question", value="model.answer")
response.reshaped <- response.reshaped %>%
left_join(key.reshaped) %>%
mutate(correctness=ifelse(answer==model.answer,"Correct","Wrong")) %>%
select(-contains("answer"))
## Joining, by = "question"
str(response.reshaped) #check data structure before reshaping
## Classes 'tbl_df', 'tbl' and 'data.frame': 750 obs. of 3 variables:
## $ Student ID : chr "AX1001" "AX1002" "AX1003" "AX1004" ...
## $ question : chr "Q-1" "Q-1" "Q-1" "Q-1" ...
## $ correctness: chr "Correct" "Correct" "Correct" "Correct" ...
response.reshaped <- response.reshaped %>%
mutate(question=as.factor(question),
question=factor(question,levels=c("Q-1","Q-2","Q-3","Q-4","Q-5","Q-6","Q-7","Q-8","Q-9","Q-10"))) #order the questions
How did each student perform?
#individual scores showing questions and individual (grouped by their scoring)
agg.ind.score <- response.reshaped %>%
group_by(`Student ID`,correctness) %>%
summarise(n.count=n()) %>%
filter(!correctness=="Wrong") %>%
select(-correctness) %>%
arrange(desc(n.count)) %>%
ungroup()
#change the student id into a factor
agg.ind.score <- agg.ind.score %>%
mutate(`Student ID` = factor(`Student ID`, levels = .$`Student ID`)) %>%
arrange(desc(n.count))
#recode the factor so that it can be re-arranged according to the score
agg.ind.score$`Student ID` = with(agg.ind.score, factor(`Student ID`, levels = rev(levels(`Student ID`))))
# lollipop chart
ggplot(agg.ind.score, aes(n.count, reorder(`Student ID`,n.count))) +
geom_segment(aes(x = 0, y = `Student ID`, xend = n.count, yend = `Student ID`), color = "#adadad" , size=1) +
geom_point(color="#014d64", size=3, alpha=0.6) +
geom_point(data=agg.ind.score[67:75,], aes(x=n.count, y=`Student ID`), colour="red", alpha=0.5 , size=3) +
theme_economist() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(size=7)) +
labs(x= "Student Score", y= "Student ID",
title = "Individual Student Score", subtitle = "Identify students who may require help")
Insight: 9 students might need help with their work and they have been identified.
What were the categories that the students belonged to?
#create score band
agg.ind.score <- agg.ind.score %>%
mutate(score_band=case_when(n.count==10 ~ "10_points",
n.count==9 ~ "9_points",
TRUE ~ "8_&_below_points"))
#bar plot ======
agg.ind.score %>%
group_by(score_band) %>%
summarise(n.count=n()) %>%
mutate(percent=round(n.count/nrow(agg.ind.score)*100,1),
score_band=as.factor(score_band)) %>%
arrange(desc(percent)) %>%
ggplot(aes(x=reorder(score_band,percent), y=percent, fill=score_band)) +
geom_bar(stat="identity", alpha=0.8, width = 0.6)+
#geom_bar(stat="identity", alpha=0.8)+
scale_fill_manual(values=c("#adadad","#7c260b" ,"#adadad"))+
geom_text(aes(label=paste0(n.count," ","(",percent,"%",")")), hjust=1.2 , vjust=0.5, color="white", size=6)+
theme_economist() +
coord_flip() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(size=12,hjust=1),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="none") +
labs(x= "Score Band", y= "Percentage of Students (%)",
title = "% of Students Banded by Scores", subtitle = "Identify the potential size of students who may need help")
Insight: Generally, the class performed well - 9 or 10 marks. There were 9 students who might need help.
What were the questions that students needed help?
#bar plot ======
#question level =====
agg.qn.score <- response.reshaped %>%
group_by(question,correctness) %>%
summarise(n.count=n()) %>%
filter(!correctness=="Wrong") %>%
ungroup() %>%
mutate(percent=round(n.count/75*100,1)) %>%
arrange(desc(percent)) %>%
select(-correctness)
#create grouping flag to highlight the bar chart accordingly
agg.qn.score <- agg.qn.score %>%
mutate(question = factor(question, levels = .$question),
red_flag=case_when(percent<97 ~ "bad",
percent==100 ~ "excellent",
TRUE ~ "good"))
ggplot(agg.qn.score, aes(x=reorder(question,percent), y=percent, fill=red_flag)) +
geom_bar(stat="identity", alpha=0.8, width = 0.6)+
#geom_bar(stat="identity", alpha=0.8)+
scale_fill_manual(values=c("#7c260b","#76c0c1","#adadad"))+
geom_text(aes(label=paste0(n.count," ","(",percent,"%",")")), hjust=1.2 , vjust=0.5, color="white", size=6)+
theme_economist() +
coord_flip() +
theme(panel.grid.major.y = element_blank(),
panel.border = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(size=12,hjust=1.2),
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank(),
legend.position="none") +
labs(x= "Question", y= "Percentage of Students (%)",
title = "Percentage of Correct Answers by Questions", subtitle = "Identify questions that the students may need revision")
Insight: All students knew how to answer Questions 5 and 7. Q1, Q3, Q4, Q6 and Q10 were the questions the students might need revision in.