# Question - Create algorithm which will create a bunch of questions for user based on test results
# Assumption - When the user logs in to the app, we know what his weakest and strongest areas are.
# Install relevent packages. The below package is useful to obtain random 'stratified' samples. A stratified sample is a sample that is drawn from a number of separate strata of the population, rather than at random from the whole population.
#install.packages("splitstackshape")
library(splitstackshape) # Load the package into the R environment
## Loading required package: data.table
# Sample database of questions, the operation they are categorized into & their level of difficulty.
question_data = read.csv("ques_data.csv", header=TRUE)
# Top few rows of the data
head(question_data)
## QID Operation HardLevel
## 1 Q001 Subtraction Easy
## 2 Q002 Multiplication Hard
## 3 Q003 Subtraction Easy
## 4 Q004 Multiplication Hard
## 5 Q005 Addition Easy
## 6 Q006 Subtraction Medium
# Column names for reference
colnames(question_data)
## [1] "QID" "Operation" "HardLevel"
# Step 1: As the user logs in, create a bunch of questions to act as the "cache" repository of questions. However, the cache is created using two variables in this case; Operation & the level of difficulty. The below code will throw out 5 questions from each of the combinations: (Addition, Subtraction, Multiplication) & (Easy, Medium, Hard)
stratified_sample = stratified(question_data, c("Operation", "HardLevel"), 5)
# How the data sample looks like. This forms our "cache" repository
stratified_sample
## QID Operation HardLevel
## 1: Q098 Addition Easy
## 2: Q005 Addition Easy
## 3: Q025 Addition Easy
## 4: Q054 Addition Easy
## 5: Q007 Addition Easy
## 6: Q151 Addition Hard
## 7: Q148 Addition Hard
## 8: Q072 Addition Hard
## 9: Q084 Addition Hard
## 10: Q068 Addition Hard
## 11: Q052 Addition Medium
## 12: Q128 Addition Medium
## 13: Q141 Addition Medium
## 14: Q093 Addition Medium
## 15: Q185 Addition Medium
## 16: Q081 Multiplication Easy
## 17: Q029 Multiplication Easy
## 18: Q131 Multiplication Easy
## 19: Q088 Multiplication Easy
## 20: Q140 Multiplication Easy
## 21: Q083 Multiplication Hard
## 22: Q077 Multiplication Hard
## 23: Q037 Multiplication Hard
## 24: Q182 Multiplication Hard
## 25: Q129 Multiplication Hard
## 26: Q100 Multiplication Medium
## 27: Q030 Multiplication Medium
## 28: Q184 Multiplication Medium
## 29: Q164 Multiplication Medium
## 30: Q010 Multiplication Medium
## 31: Q163 Subtraction Easy
## 32: Q094 Subtraction Easy
## 33: Q046 Subtraction Easy
## 34: Q066 Subtraction Easy
## 35: Q087 Subtraction Easy
## 36: Q035 Subtraction Hard
## 37: Q137 Subtraction Hard
## 38: Q076 Subtraction Hard
## 39: Q150 Subtraction Hard
## 40: Q015 Subtraction Hard
## 41: Q062 Subtraction Medium
## 42: Q073 Subtraction Medium
## 43: Q034 Subtraction Medium
## 44: Q177 Subtraction Medium
## 45: Q092 Subtraction Medium
## QID Operation HardLevel
# One view of the child level data could contain a score that we can compute (For the purpose of this example, I have randomly assigned scores keeping in mind only one variable: Percetage of correct answers for each operation) for each operation.
child_data = read.csv("ChildData.csv", header=T)
# Top few rows of the child data
head(child_data)
## ChildID Addition Subtraction Multiplication
## 1 C001 0.7126775 0.782950039 0.4305377
## 2 C002 0.6342301 0.000936738 0.5894451
## 3 C003 0.5581110 0.081617649 0.6917496
## 4 C004 0.2702424 0.240686355 0.4661299
## 5 C005 0.9363213 0.421840191 0.5206721
## 6 C006 0.2814887 0.841525197 0.5335251
# Column names for reference
colnames(child_data)
## [1] "ChildID" "Addition" "Subtraction" "Multiplication"
# Identifying the weakest & the strongest operations from the child level data
# To find out the weakest operation for child ID C005 (the fifth row in the dataset), I'm going to pull out the column name (that is, the operation) where the child has lowest percentage of correct answers.
weakest_op = colnames(child_data[,2:4])[apply(child_data[5,2:4],1,which.min)]
weakest_op
## [1] "Subtraction"
# Using a similar method, I'm identifying the operation where the child has the highest percentage of correct answers.
strongest_op = colnames(child_data[,2:4])[apply(child_data[5,2:4],1,which.max)]
strongest_op
## [1] "Addition"
# As I'm using three operations for this example, I'm now finding the second weakest operation. This I'm doing by sorting the columns based on the descending order of scores. After this, I'm truncating the final column (which would have the least score). Then, I just pick out the final column from this truncated table to get my second minimum score.
second_weakest_op = colnames(sort(child_data[5,2:4],decreasing=TRUE,partial=length(child_data[5,2:4])-1)[length(child_data[5,2:4])-1])
second_weakest_op
## [1] "Multiplication"
# Assign weights for each operation. Here, I'm assuming that we will need to showcase the most number of questions from the topic that the child is weak in, so that the child is able to practise more in this operation. My current assumption is that 50% of the questions will be shown from the weakest operation, 30% from the second weakest operation and 20% from the strongest operation. Also, my assumption is that we will be showcasing a total of 10 questions per child.
wo_weight = 5 #Weight for weakest operation
swo_weight = 3 #Weight for second weakest operation
so_weight = 2 #Weight for strongest operation
# One of the other things I'm considering is that I'm not going to showcase any hard questions from the weakest operation. For the other more 'comfortable' operations, I do not have this constraint.
# Replace = FALSE ensures that none of the questions repeat
qid_weakest = stratified(stratified_sample,c("Operation"),wo_weight,select=list(Operation=weakest_op, HardLevel = c("Easy","Medium")),replace=FALSE)
qid_second_weakest = stratified(stratified_sample,c("Operation"),swo_weight,select=list(Operation=second_weakest_op),replace=FALSE)
qid_strongest = stratified(stratified_sample,c("Operation"),so_weight,select=list(Operation=strongest_op),replace=FALSE)
qid_list_final = rbind.data.frame(qid_weakest,qid_second_weakest,qid_strongest)
qid_list_final
## QID Operation HardLevel
## 1: Q062 Subtraction Medium
## 2: Q073 Subtraction Medium
## 3: Q163 Subtraction Easy
## 4: Q066 Subtraction Easy
## 5: Q092 Subtraction Medium
## 6: Q100 Multiplication Medium
## 7: Q131 Multiplication Easy
## 8: Q182 Multiplication Hard
## 9: Q084 Addition Hard
## 10: Q098 Addition Easy