library('RODBC')
library('digest')
library('tidyr')
library('dplyr')
knitr::opts_chunk$set(comment = NA)

Data

source("../ODBC Source/odbc.source.r")
table <- 'spr'

query <- paste("
select spr_code, spr.awd_code, spr.rou_code
  , replace(initcap(awd_snam),'.','') as awd_snam
  , eqa_code
  , rou_name
  , crs_code, crs_name

from intuit.ins_spr             spr -- Stu Programme Route a record of study
left outer join intuit.ins_awd  awd -- Award
  on spr.awd_code = awd.awd_code
left outer join intuit.ins_eqa  eqa -- External Qual Aim
  on awd.awd_eqac = eqa.eqa_code
left outer join intuit.ins_rou  rou -- Route subjects attached to an award
  on spr.rou_code = rou.rou_code
left outer join intuit.srs_scj  scj -- Stu Course Join a record of registration
  on spr_code = scj_sprc
left outer join intuit.srs_crs  crs -- Course
  on scj_crsc = crs_code

where spr_edate >= '30-JUN-2014'
  and rou_name = 'Social Anthropology'

order by rou_name
  ")
dfr <- sqlQuery(odbc, query, stringsAsFactors = FALSE)

Anonymise personal id:

# http://jangorecki.github.io/blog/2014-11-07/Data-Anonymization-in-R.html
fun_anon <- function(x, algo="crc32"){
  unq_hashes <- vapply(unique(x), function(object) digest(object, algo=algo)
                       , FUN.VALUE="", USE.NAMES=TRUE)
  unname(unq_hashes[x])
}
dfr <- dfr %>% 
  mutate(SPR_CODE = fun_anon(SPR_CODE))

Award title unites an Award & Route

Different routes may share the same name, but use different codes related to different awards, either a BA or BSc depending on options taken, and MA or MSc for a student at a higher level of study.

head(dfr)
  SPR_CODE AWD_CODE ROU_CODE AWD_SNAM EQA_CODE            ROU_NAME
1 a3d3e8dc       BA      SAN       BA        B Social Anthropology
2 6feab9e5       BS      SAN      BSc        B Social Anthropology
3 a567ee65       MA      SAN       MA        M Social Anthropology
4 7031af50       MS      SAN      MSc        M Social Anthropology
5 df74f859       MP      SAN    MPhil       MP Social Anthropology
6  618bfbd      PHD      SAN      PhD      PHD Social Anthropology
  CRS_CODE                         CRS_NAME
1    UBSAN Bachelors in Social Anthropology
2    UBSAN Bachelors in Social Anthropology
3    TMSAN   Masters in Social Anthropology
4    TMSAN   Masters in Social Anthropology
5    RPSAN MPhil/PhD in Social Anthropology
6    RPSAN MPhil/PhD in Social Anthropology

A student on an MPhil/PhD programme may start with a lower award aim, until upgraded from MPhil to PhD.

Unite Award Abbreviation & Route Name

So an award title (AWD_TITL) is the combination of an award’s short name and the route, seperated by ‘in’. Use unite to generate the award title:

dfr_unit <- dfr %>% 
  unite('AWD_TITL', c(AWD_SNAM, ROU_NAME), sep = ' in ')

dfr_unit %>% 
  head()
  SPR_CODE AWD_CODE ROU_CODE                     AWD_TITL EQA_CODE
1 a3d3e8dc       BA      SAN    BA in Social Anthropology        B
2 6feab9e5       BS      SAN   BSc in Social Anthropology        B
3 a567ee65       MA      SAN    MA in Social Anthropology        M
4 7031af50       MS      SAN   MSc in Social Anthropology        M
5 df74f859       MP      SAN MPhil in Social Anthropology       MP
6  618bfbd      PHD      SAN   PhD in Social Anthropology      PHD
  CRS_CODE                         CRS_NAME
1    UBSAN Bachelors in Social Anthropology
2    UBSAN Bachelors in Social Anthropology
3    TMSAN   Masters in Social Anthropology
4    TMSAN   Masters in Social Anthropology
5    RPSAN MPhil/PhD in Social Anthropology
6    RPSAN MPhil/PhD in Social Anthropology
dput(head(dfr))
structure(list(SPR_CODE = c("a3d3e8dc", "6feab9e5", "a567ee65", 
"7031af50", "df74f859", "618bfbd"), AWD_CODE = c("BA", "BS", 
"MA", "MS", "MP", "PHD"), ROU_CODE = c("SAN", "SAN", "SAN", "SAN", 
"SAN", "SAN"), AWD_SNAM = c("BA", "BSc", "MA", "MSc", "MPhil", 
"PhD"), EQA_CODE = c("B", "B", "M", "M", "MP", "PHD"), ROU_NAME = c("Social Anthropology", 
"Social Anthropology", "Social Anthropology", "Social Anthropology", 
"Social Anthropology", "Social Anthropology"), CRS_CODE = c("UBSAN", 
"UBSAN", "TMSAN", "TMSAN", "RPSAN", "RPSAN"), CRS_NAME = c("Bachelors in Social Anthropology", 
"Bachelors in Social Anthropology", "Masters in Social Anthropology", 
"Masters in Social Anthropology", "MPhil/PhD in Social Anthropology", 
"MPhil/PhD in Social Anthropology")), .Names = c("SPR_CODE", 
"AWD_CODE", "ROU_CODE", "AWD_SNAM", "EQA_CODE", "ROU_NAME", "CRS_CODE", 
"CRS_NAME"), row.names = c(NA, 6L), class = "data.frame")

A ‘Course’ and ‘Instance’ of study

This is how a ‘course’ is defined in UK HE statistics, as the combination of an award aim and subject(s) studied as the route towards it.

Since external statistics don’t distinguish BA from BSc, or MA from MSc, but do distinguish an MPhil from a PhD, the courses would unite CRS_CODE and EQA_CODE:

dfr_crse <- dfr %>% 
  unite('CourseID', c(CRS_CODE, EQA_CODE), sep = '/')

dfr_crse %>% 
  head() %>% 
  select(CourseID, SPR_CODE) %>% 
  count(CourseID)
Source: local data frame [4 x 2]

   CourseID n
1  RPSAN/MP 1
2 RPSAN/PHD 1
3   TMSAN/M 2
4   UBSAN/B 2

Similarly, an ‘instance’ of study is defined as a student “aiming towards the award of a qualification(s)”. They may transfer course, but they remain on the same instance if their qualification aim remains the same.

An instance is the combination of a unique student, HUSID and qualification aim derived from their course. Each unique combination requires a new student instance identifier, HUMHUS.

odbcCloseAll()