Analysis of Autonomy and Discretionary Learning in Graduate Apprenticeship Programme

Introduction

This analysis explores the relationship between autonomy and discretionary learning outcomes in the Graduate Apprenticeship Programme. The study examines both alumni and current students to understand how workplace autonomy and support influence learning outcomes.

Data Preparation

Loading Required Packages

Importing and Preparing Data

# Function to clean and prepare data
prepare_data <- function(file_path, dataset_type) {
  message(paste("Reading", dataset_type, "data from", file_path))
  
  # Try to read the CSV data with error handling
  tryCatch({
    data <- read_csv(file_path)
    message(paste("Successfully read", nrow(data), "rows and", ncol(data), "columns"))
    
    # Add dataset identifier
    data$dataset <- dataset_type
    
    return(data)
  }, error = function(e) {
    message(paste("Error reading file:", e$message))
    message("Trying alternative approach with base R...")
    
    # Try with base R as a fallback
    data <- read.csv(file_path, stringsAsFactors = FALSE)
    message(paste("Successfully read", nrow(data), "rows and", ncol(data), "columns with base R"))
    
    # Add dataset identifier
    data$dataset <- dataset_type
    
    return(data)
  })
}

# Import alumni data
alumni_data <- prepare_data("standardized_alumni_survey_complete.csv", "Alumni")

# Import current students data
student_data <- prepare_data("standardized_current_survey_complete.csv", "Current")

Exploring the Data

Alumni Data

# Convert all relevant columns to numeric to avoid issues
columns_to_check <- c(
  paste0("Q11_", 1:5),
  paste0("Q12_", 1:5),
  paste0("Q13_", 1:5),
  paste0("Q14_", 1:5),
  paste0("Q15_", 1:5)
)

for(col in columns_to_check) {
  if(col %in% names(alumni_data) && !is.numeric(alumni_data[[col]])) {
    alumni_data[[col]] <- as.numeric(alumni_data[[col]])
  }
}

# Create composite scores using rowMeans
alumni_data$employer_support_score <- rowMeans(
  alumni_data[, paste0("Q11_", 1:5)], 
  na.rm = TRUE
)

alumni_data$university_support_score <- rowMeans(
  alumni_data[, paste0("Q12_", 1:5)], 
  na.rm = TRUE
)

alumni_data$work_learning_score <- rowMeans(
  alumni_data[, paste0("Q13_", 1:5)], 
  na.rm = TRUE
)

alumni_data$academic_integration_score <- rowMeans(
  alumni_data[, paste0("Q14_", 1:5)], 
  na.rm = TRUE
)

alumni_data$learning_outcome_score <- rowMeans(
  alumni_data[, paste0("Q15_", 1:5)], 
  na.rm = TRUE
)

# Create binary outcomes for logistic regression
alumni_data$high_work_learning <- ifelse(alumni_data$work_learning_score >= 4, 1, 0)
alumni_data$high_academic_integration <- ifelse(alumni_data$academic_integration_score >= 4, 1, 0)
alumni_data$high_learning_outcome <- ifelse(alumni_data$learning_outcome_score >= 4, 1, 0)

Alumni Demographics

# Extract demographics
alumni_demo <- alumni_data %>%
  dplyr::select(Q1, Q2, Q3, Q4, Q5, Q6) %>%
  dplyr::rename(
    "Age Group" = Q1,
    "Gender" = Q2,
    "University" = Q3,
    "Programme" = Q4,
    "Year of Completion" = Q5,
    "Level Achieved" = Q6
  )

# Summary table
alumni_demo %>%
  group_by(`Gender`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Gender Distribution of Alumni") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Gender Distribution of Alumni
Gender	Count	Percentage
Female	10	66.7
Male	4	26.7
NA	1	6.7

alumni_demo %>%
  group_by(`Age Group`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Age Distribution of Alumni") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Age Distribution of Alumni
Age Group	Count	Percentage
25-34	4	26.7
35-44	4	26.7
45-54	6	40.0
NA	1	6.7

Alumni Support and Learning Scores

# Create summary of alumni scores
alumni_scores <- alumni_data %>%
  dplyr::select(employer_support_score, university_support_score, 
         work_learning_score, academic_integration_score, learning_outcome_score) %>%
  dplyr::rename(
    "Employer Support" = employer_support_score,
    "University Support" = university_support_score,
    "Work-based Learning" = work_learning_score,
    "Academic Integration" = academic_integration_score,
    "Learning Outcomes" = learning_outcome_score
  )

# Summary statistics
alumni_scores_summary <- alumni_scores %>%
  summarise(across(everything(), 
                  list(Mean = ~mean(., na.rm = TRUE),
                       SD = ~sd(., na.rm = TRUE),
                       Min = ~min(., na.rm = TRUE),
                       Max = ~max(., na.rm = TRUE)))) %>%
  pivot_longer(cols = everything(), 
               names_to = c("Variable", "Statistic"), 
               names_pattern = "(.*)_(.*)") %>%
  pivot_wider(names_from = Statistic, values_from = value)

alumni_scores_summary %>%
  kable(caption = "Summary Statistics for Alumni Scores", digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Summary Statistics for Alumni Scores
Variable	Mean	SD	Min	Max
Employer Support	2.93	1.20	1.0	5
University Support	4.04	0.83	1.6	5
Work-based Learning	4.09	0.94	1.6	5
Academic Integration	4.17	0.95	2.0	5
Learning Outcomes	4.33	0.67	2.6	5

Current Students Data

# Current students have direct autonomy measure (Q9)
# Transform autonomy variable for analysis
if("Q9" %in% names(student_data)) {
  # Create function to map autonomy levels
  map_autonomy <- function(x) {
    ifelse(x == "No autonomy", 1,
           ifelse(x == "Little autonomy", 2,
                  ifelse(x == "Moderate autonomy", 3,
                         ifelse(x == "Significant autonomy", 4,
                                ifelse(x == "Complete autonomy", 5, NA)))))
  }
  
  # Apply the mapping function
  student_data$autonomy_level <- map_autonomy(student_data$Q9)
} else {
  # Create a placeholder autonomy_level if Q9 doesn't exist
  student_data$autonomy_level <- NA
}

# Convert all relevant columns to numeric to avoid issues
for(col in columns_to_check) {
  if(col %in% names(student_data) && !is.numeric(student_data[[col]])) {
    student_data[[col]] <- as.numeric(student_data[[col]])
  }
}

# Create composite scores for students
student_data$employer_support_score <- rowMeans(
  student_data[, paste0("Q11_", 1:5)], 
  na.rm = TRUE
)

student_data$university_support_score <- rowMeans(
  student_data[, paste0("Q12_", 1:5)], 
  na.rm = TRUE
)

student_data$self_directed_learning_score <- rowMeans(
  student_data[, paste0("Q13_", 1:5)], 
  na.rm = TRUE
)

student_data$skill_application_score <- rowMeans(
  student_data[, paste0("Q14_", 1:5)], 
  na.rm = TRUE
)

student_data$learning_integration_score <- rowMeans(
  student_data[, paste0("Q15_", 1:5)], 
  na.rm = TRUE
)

# Create binary outcomes for logistic regression
student_data$high_self_directed <- ifelse(student_data$self_directed_learning_score >= 4, 1, 0)
student_data$high_skill_application <- ifelse(student_data$skill_application_score >= 4, 1, 0)
student_data$high_learning_integration <- ifelse(student_data$learning_integration_score >= 4, 1, 0)

Current Students Demographics

# Extract demographics
student_demo <- student_data %>%
  dplyr::select(Q1, Q2, Q3, Q4, Q5, Q9) %>%
  dplyr::rename(
    "Age Group" = Q1,
    "Gender" = Q2,
    "University" = Q3,
    "Programme" = Q4,
    "Year of Study" = Q5,
    "Autonomy Level" = Q9
  )

# Summary tables
student_demo %>%
  group_by(`Gender`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Gender Distribution of Current Students") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Gender Distribution of Current Students
Gender	Count	Percentage
Female	9	64.3
Male	3	21.4
NA	2	14.3

student_demo %>%
  group_by(`Autonomy Level`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Autonomy Level Distribution of Current Students") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Autonomy Level Distribution of Current Students
Autonomy Level	Count	Percentage
Complete autonomy	2	14.3
Little autonomy	1	7.1
No autonomy	1	7.1
Significant autonomy	8	57.1
NA	2	14.3

Current Students Autonomy and Learning Scores

# Create summary of student scores
student_scores <- student_data %>%
  dplyr::select(autonomy_level, employer_support_score, university_support_score, 
         self_directed_learning_score, skill_application_score, learning_integration_score) %>%
  dplyr::rename(
    "Autonomy Level" = autonomy_level,
    "Employer Support" = employer_support_score,
    "University Support" = university_support_score,
    "Self-directed Learning" = self_directed_learning_score,
    "Skill Application" = skill_application_score,
    "Learning Integration" = learning_integration_score
  )

# Summary statistics
student_scores_summary <- student_scores %>%
  summarise(across(everything(), 
                  list(Mean = ~mean(., na.rm = TRUE),
                       SD = ~sd(., na.rm = TRUE),
                       Min = ~min(., na.rm = TRUE),
                       Max = ~max(., na.rm = TRUE)))) %>%
  pivot_longer(cols = everything(), 
               names_to = c("Variable", "Statistic"), 
               names_pattern = "(.*)_(.*)") %>%
  pivot_wider(names_from = Statistic, values_from = value)

student_scores_summary %>%
  kable(caption = "Summary Statistics for Current Students Scores", digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Summary Statistics for Current Students Scores
Variable	Mean	SD	Min	Max
Autonomy Level	3.75	1.14	1	5.0
Employer Support	2.17	0.82	1	3.8
University Support	2.07	1.11	1	5.0
Self-directed Learning	2.80	1.26	1	5.0
Skill Application	2.20	0.92	1	3.4
Learning Integration	1.96	0.70	1	2.8

Preparing Analysis Datasets

# Create analysis subset for alumni - INCLUDE ALL NEEDED VARIABLES
alumni_complete <- subset(alumni_data, 
                         Finished == "True" | Finished == TRUE,
                         select = c(employer_support_score, university_support_score, 
                                   work_learning_score, academic_integration_score, learning_outcome_score,
                                   high_work_learning, high_academic_integration, high_learning_outcome))

# Remove rows with NA in key variables
alumni_complete <- na.omit(alumni_complete)

# Create analysis subset for students - INCLUDE ALL NEEDED VARIABLES
student_complete <- subset(student_data,
                          Finished == "True" | Finished == TRUE,
                          select = c(autonomy_level, employer_support_score, university_support_score,
                                   self_directed_learning_score, skill_application_score, learning_integration_score,
                                   high_self_directed, high_skill_application, high_learning_integration))

# Remove rows with NA in key variables
student_complete <- na.omit(student_complete)

# Display data sizes
data_sizes <- data.frame(
  Dataset = c("Alumni (Complete Records)", "Current Students (Complete Records)"),
  Records = c(nrow(alumni_complete), nrow(student_complete))
)

data_sizes %>%
  kable(caption = "Number of Complete Records for Analysis") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Number of Complete Records for Analysis
Dataset	Records
Alumni (Complete Records)	11
Current Students (Complete Records)	9

Correlation Analysis

Alumni Correlations

# Install corrplot if not already installed
if (!requireNamespace("corrplot", quietly = TRUE)) {
  install.packages("corrplot")
}
# Load the corrplot package
library(corrplot)

# Create correlation matrix for key variables in alumni data
alumni_cor_vars <- alumni_complete[, c("employer_support_score", "university_support_score", 
                                       "work_learning_score", "academic_integration_score", 
                                       "learning_outcome_score")]

alumni_cor <- cor(alumni_cor_vars, use = "pairwise.complete.obs")

# Create a nicer correlation plot
corrplot::corrplot(alumni_cor, 
                  method = "circle", 
                  type = "upper", 
                  tl.col = "black",
                  tl.srt = 45,
                  tl.cex = 0.8,
                  diag = FALSE,
                  title = "Correlation Matrix for Alumni Variables",
                  mar = c(0, 0, 2, 0))

# Also display as a table with rounded values
round(alumni_cor, 2) %>%
  as.data.frame() %>%
  rownames_to_column("Variable") %>%
  kable(caption = "Correlation Matrix for Alumni Variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = TRUE)

Correlation Matrix for Alumni Variables
Variable	employer_support_score	university_support_score	work_learning_score	academic_integration_score	learning_outcome_score
employer_support_score	1.00	0.25	0.08	-0.27	0.31
university_support_score	0.25	1.00	0.95	0.73	0.92
work_learning_score	0.08	0.95	1.00	0.87	0.87
academic_integration_score	-0.27	0.73	0.87	1.00	0.59
learning_outcome_score	0.31	0.92	0.87	0.59	1.00

Current Students Correlations

# Create correlation matrix for key variables in student data
student_cor_vars <- student_complete[, c("autonomy_level", "employer_support_score", 
                                         "university_support_score", "self_directed_learning_score", 
                                         "skill_application_score", "learning_integration_score")]

student_cor <- cor(student_cor_vars, use = "pairwise.complete.obs")

# Create a nicer correlation plot
corrplot::corrplot(student_cor, 
                  method = "circle", 
                  type = "upper", 
                  tl.col = "black",
                  tl.srt = 45,
                  tl.cex = 0.8,
                  diag = FALSE,
                  title = "Correlation Matrix for Current Students Variables",
                  mar = c(0, 0, 2, 0))

# Also display as a table with rounded values
round(student_cor, 2) %>%
  as.data.frame() %>%
  rownames_to_column("Variable") %>%
  kable(caption = "Correlation Matrix for Current Students Variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = TRUE)

Correlation Matrix for Current Students Variables
Variable	autonomy_level	employer_support_score	university_support_score	self_directed_learning_score	skill_application_score	learning_integration_score
autonomy_level	1.00	-0.09	-0.17	0.03	0.01	0.24
employer_support_score	-0.09	1.00	0.88	0.74	0.52	0.66
university_support_score	-0.17	0.88	1.00	0.73	0.23	0.33
self_directed_learning_score	0.03	0.74	0.73	1.00	0.40	0.56
skill_application_score	0.01	0.52	0.23	0.40	1.00	0.83
learning_integration_score	0.24	0.66	0.33	0.56	0.83	1.00

Visualization of Key Relationships

Alumni Visualizations

# Create plots with consistent styling
theme_set(theme_minimal() +
          theme(
            plot.title = element_text(face = "bold", size = 14),
            axis.title = element_text(size = 12),
            legend.title = element_text(face = "bold"),
            panel.grid.minor = element_blank()
          ))

# Plot 1: Employer support vs. work learning for alumni
plot_alumni_1 <- ggplot(alumni_complete, aes(x = employer_support_score, y = work_learning_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "Employer Support and Work-based Learning",
       subtitle = "Alumni Respondents",
       x = "Employer Support Score",
       y = "Work-based Learning Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 2: University support vs. academic integration for alumni
plot_alumni_2 <- ggplot(alumni_complete, aes(x = university_support_score, y = academic_integration_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "University Support and Academic Integration",
       subtitle = "Alumni Respondents",
       x = "University Support Score",
       y = "Academic Integration Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 3: University support vs. learning outcomes for alumni
plot_alumni_3 <- ggplot(alumni_complete, aes(x = university_support_score, y = learning_outcome_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "University Support and Learning Outcomes",
       subtitle = "Alumni Respondents",
       x = "University Support Score",
       y = "Learning Outcome Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 4: Employer support vs. learning outcomes for alumni
plot_alumni_4 <- ggplot(alumni_complete, aes(x = employer_support_score, y = learning_outcome_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "Employer Support and Learning Outcomes",
       subtitle = "Alumni Respondents",
       x = "Employer Support Score",
       y = "Learning Outcome Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Combine plots in a grid
(plot_alumni_1 + plot_alumni_2) / (plot_alumni_3 + plot_alumni_4) +
  plot_annotation(
    title = "Relationships Between Support and Learning Outcomes for Alumni",
    theme = theme(plot.title = element_text(face = "bold", size = 16))
  )

Current Students Visualizations

# Plot 1: Autonomy vs. self-directed learning for students
plot_student_1 <- ggplot(student_complete, 
                         aes(x = factor(autonomy_level), y = self_directed_learning_score)) +
  geom_boxplot(fill = "lightblue", alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.7, color = "steelblue") +
  labs(title = "Autonomy and Self-directed Learning",
       subtitle = "Current Students",
       x = "Autonomy Level (1=None, 5=Complete)",
       y = "Self-directed Learning Score") +
  scale_y_continuous(breaks = 1:5)

# Plot 2: Autonomy vs. skill application for students
plot_student_2 <- ggplot(student_complete, 
                         aes(x = factor(autonomy_level), y = skill_application_score)) +
  geom_boxplot(fill = "lightgreen", alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.7, color = "darkgreen") +
  labs(title = "Autonomy and Skill Application",
       subtitle = "Current Students",
       x = "Autonomy Level (1=None, 5=Complete)",
       y = "Skill Application Score") +
  scale_y_continuous(breaks = 1:5)

# Plot 3: Employer support vs. self-directed learning for students
plot_student_3 <- ggplot(student_complete, 
                         aes(x = employer_support_score, y = self_directed_learning_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "Employer Support and Self-directed Learning",
       subtitle = "Current Students",
       x = "Employer Support Score",
       y = "Self-directed Learning Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 4: University support vs. learning integration for students
plot_student_4 <- ggplot(student_complete, 
                         aes(x = university_support_score, y = learning_integration_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "University Support and Learning Integration",
       subtitle = "Current Students",
       x = "University Support Score",
       y = "Learning Integration Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Combine plots in a grid
(plot_student_1 + plot_student_2) / (plot_student_3 + plot_student_4) +
  plot_annotation(
    title = "Relationships Between Autonomy, Support, and Learning Outcomes for Current Students",
    theme = theme(plot.title = element_text(face = "bold", size = 16))
  )

Logistic Regression Analysis

Alumni Logistic Regression Models

# Model 1: Effect of employer support on work-based learning
model_alumni_1 <- glm(high_work_learning ~ employer_support_score, 
                     data = alumni_complete, 
                     family = binomial(link = "logit"))

# Model 2: Effect of university support on academic integration
# Use increased iterations to address potential convergence issues
model_alumni_2 <- glm(high_academic_integration ~ university_support_score, 
                     data = alumni_complete, 
                     family = binomial(link = "logit"),
                     control = list(maxit = 50))

# Model 3: Combined model for learning outcomes
# Use increased iterations to address potential convergence issues
model_alumni_3 <- glm(high_learning_outcome ~ employer_support_score + university_support_score, 
                     data = alumni_complete, 
                     family = binomial(link = "logit"),
                     control = list(maxit = 50))

# Calculate odds ratios
odds_ratio_alumni_1 <- exp(coef(model_alumni_1))
odds_ratio_alumni_2 <- exp(coef(model_alumni_2))
odds_ratio_alumni_3 <- exp(coef(model_alumni_3))

# Function to extract key model results
extract_model_results <- function(model, model_name) {
  coefs <- summary(model)$coefficients
  
  # Create a data frame with model results
  results <- data.frame(
    Model = model_name,
    Variable = rownames(coefs),
    Coefficient = coefs[, "Estimate"],
    SE = coefs[, "Std. Error"],
    z = coefs[, "z value"],
    p = coefs[, "Pr(>|z|)"],
    OR = exp(coefs[, "Estimate"])
  )
  
  return(results)
}

# Extract results from all alumni models
alumni_results <- rbind(
  extract_model_results(model_alumni_1, "Model 1: Work Learning"),
  extract_model_results(model_alumni_2, "Model 2: Academic Integration"),
  extract_model_results(model_alumni_3, "Model 3: Learning Outcomes")
)

# Display results table
alumni_results %>%
  kable(caption = "Logistic Regression Results for Alumni",
        digits = c(0, 0, 3, 3, 2, 3, 2),
        col.names = c("Model", "Variable", "Coefficient", "Std. Error", 
                      "z value", "p-value", "Odds Ratio")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("Model 1: Effect of Employer Support on Work Learning", 1, 2) %>%
  pack_rows("Model 2: Effect of University Support on Academic Integration", 3, 4) %>%
  pack_rows("Model 3: Combined Effects on Learning Outcomes", 5, 7)

Logistic Regression Results for Alumni
	Model	Variable	Coefficient	Std. Error	z value	p-value	Odds Ratio
Model 1: Effect of Employer Support on Work Learning
(Intercept)	Model 1: Work Learning	(Intercept)	1.117	1.771	0.63	0.528	3.060000e+00
employer_support_score	Model 1: Work Learning	employer_support_score	-0.048	0.575	-0.08	0.933	9.500000e-01
Model 2: Effect of University Support on Academic Integration
(Intercept)1	Model 2: Academic Integration	(Intercept)	-896.904	1394728.004	0.00	0.999	0.000000e+00
university_support_score	Model 2: Academic Integration	university_support_score	230.053	357119.233	0.00	0.999	8.144807e+99
Model 3: Combined Effects on Learning Outcomes
(Intercept)2	Model 3: Learning Outcomes	(Intercept)	-829.016	1501957.066	0.00	1.000	0.000000e+00
employer_support_score1	Model 3: Learning Outcomes	employer_support_score	42.733	407993.877	0.00	1.000	3.618969e+18
university_support_score1	Model 3: Learning Outcomes	university_support_score	185.103	495648.036	0.00	1.000	2.449845e+80

Current Students Logistic Regression Models

# Model 1: Direct effect of autonomy on self-directed learning
model_student_1 <- glm(high_self_directed ~ autonomy_level, 
                      data = student_complete, 
                      family = binomial(link = "logit"))

# Model 2: Effect of autonomy and employer support on skill application
model_student_2 <- glm(high_skill_application ~ autonomy_level + employer_support_score, 
                      data = student_complete, 
                      family = binomial(link = "logit"))

# Model 3: Combined model for learning integration
model_student_3 <- glm(high_learning_integration ~ autonomy_level + employer_support_score + university_support_score, 
                      data = student_complete, 
                      family = binomial(link = "logit"))

# Calculate odds ratios
odds_ratio_student_1 <- exp(coef(model_student_1))
odds_ratio_student_2 <- exp(coef(model_student_2))
odds_ratio_student_3 <- exp(coef(model_student_3))

# Extract results from all student models
student_results <- rbind(
  extract_model_results(model_student_1, "Model 1: Self-directed Learning"),
  extract_model_results(model_student_2, "Model 2: Skill Application"),
  extract_model_results(model_student_3, "Model 3: Learning Integration")
)

# Display results table
student_results %>%
  kable(caption = "Logistic Regression Results for Current Students",
        digits = c(0, 0, 3, 3, 2, 3, 2),
        col.names = c("Model", "Variable", "Coefficient", "Std. Error", 
                      "z value", "p-value", "Odds Ratio")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("Model 1: Effect of Autonomy on Self-directed Learning", 1, 2) %>%
  pack_rows("Model 2: Effect of Autonomy & Employer Support on Skill Application", 3, 5) %>%
  pack_rows("Model 3: Combined Effects on Learning Integration", 6, 9)

Logistic Regression Results for Current Students
	Model	Variable	Coefficient	Std. Error	z value	p-value	Odds Ratio
Model 1: Effect of Autonomy on Self-directed Learning
(Intercept)	Model 1: Self-directed Learning	(Intercept)	0.426	2.108	0.20	0.840	1.53
autonomy_level	Model 1: Self-directed Learning	autonomy_level	-0.311	0.558	-0.56	0.578	0.73
Model 2: Effect of Autonomy & Employer Support on Skill Application
(Intercept)1	Model 2: Skill Application	(Intercept)	-24.566	181476.794	0.00	1.000	0.00
autonomy_level1	Model 2: Skill Application	autonomy_level	0.000	35144.732	0.00	1.000	1.00
employer_support_score	Model 2: Skill Application	employer_support_score	0.000	52982.677	0.00	1.000	1.00
Model 3: Combined Effects on Learning Integration
(Intercept)2	Model 3: Learning Integration	(Intercept)	-24.566	181648.613	0.00	1.000	0.00
autonomy_level2	Model 3: Learning Integration	autonomy_level	0.000	35941.003	0.00	1.000	1.00
employer_support_score1	Model 3: Learning Integration	employer_support_score	0.000	114125.475	0.00	1.000	1.00
university_support_score	Model 3: Learning Integration	university_support_score	0.000	82144.674	0.00	1.000	1.00

Ordinal Regression Analysis

Alumni Ordinal Regression

# Create ordinal factors for alumni
alumni_complete$work_learning_level <- cut(alumni_complete$work_learning_score, 
                                         breaks = c(0, 2, 3, 4, 5), 
                                         labels = c("Low", "Medium", "High", "Very High"),
                                         include.lowest = TRUE)

# Make it ordered
alumni_complete$work_learning_level <- ordered(alumni_complete$work_learning_level, 
                                             levels = c("Low", "Medium", "High", "Very High"))

# Create employer support level
alumni_complete$employer_support_level <- cut(alumni_complete$employer_support_score, 
                                            breaks = c(0, 2, 3, 4, 5), 
                                            labels = c("Low", "Medium", "High", "Very High"),
                                            include.lowest = TRUE)

# Make it ordered
alumni_complete$employer_support_level <- ordered(alumni_complete$employer_support_level, 
                                                levels = c("Low", "Medium", "High", "Very High"))

# Check distributions of factors
wl_table <- table(alumni_complete$work_learning_level)
es_table <- table(alumni_complete$employer_support_level)

# Display factor distributions
par(mfrow = c(1, 2))
barplot(wl_table, main = "Work Learning Level Distribution",
        col = "steelblue", border = "white", las = 1)
barplot(es_table, main = "Employer Support Level Distribution", 
        col = "darkgreen", border = "white", las = 1)

par(mfrow = c(1, 1))

# Ordinal logistic regression for alumni - with error handling
ordinal_model_alumni <- tryCatch({
  # Only proceed if we have at least one observation in multiple categories
  if(sum(wl_table > 0) > 1 && sum(es_table > 0) > 1) {
    model <- polr(work_learning_level ~ employer_support_level, 
                data = alumni_complete, 
                Hess = TRUE)
    model
  } else {
    NULL
  }
}, error = function(e) {
  message(paste("Error in ordinal regression for alumni:", e$message))
  NULL
})

# Display ordinal model results if created successfully
if(!is.null(ordinal_model_alumni)) {
  ordinal_summary <- summary(ordinal_model_alumni)
  
  # Display coefficients
  ordinal_coef <- data.frame(
    Variable = names(coef(ordinal_model_alumni)),
    Coefficient = coef(ordinal_model_alumni),
    OddsRatio = exp(coef(ordinal_model_alumni))
  )
  
  ordinal_coef %>%
    kable(caption = "Ordinal Regression Coefficients (Alumni)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
  
  # Display thresholds
  thresholds <- data.frame(
    Threshold = names(ordinal_model_alumni$zeta),
    Value = ordinal_model_alumni$zeta
  )
  
  thresholds %>%
    kable(caption = "Ordinal Regression Thresholds (Alumni)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
  cat("Ordinal regression model for alumni could not be created due to data limitations.")
}

Ordinal Regression Thresholds (Alumni)
	Threshold	Value
Low\|Medium	Low\|Medium	-5.847
Medium\|High	Medium\|High	-4.959
High\|Very High	High\|Very High	-3.001

Current Students Ordinal Regression

# Create ordinal factors for students
student_complete$self_directed_level <- cut(student_complete$self_directed_learning_score, 
                                          breaks = c(0, 2, 3, 4, 5), 
                                          labels = c("Low", "Medium", "High", "Very High"),
                                          include.lowest = TRUE)

# Make it ordered
student_complete$self_directed_level <- ordered(student_complete$self_directed_level, 
                                              levels = c("Low", "Medium", "High", "Very High"))

# Create autonomy_factor
student_complete$autonomy_factor <- factor(student_complete$autonomy_level, 
                                         levels = 1:5, 
                                         labels = c("None", "Little", "Moderate", "Significant", "Complete"),
                                         ordered = TRUE)

# Check distributions of factors
sdl_table <- table(student_complete$self_directed_level)
af_table <- table(student_complete$autonomy_factor)

# Display factor distributions
par(mfrow = c(1, 2))
barplot(sdl_table, main = "Self-directed Learning Level Distribution",
        col = "steelblue", border = "white", las = 1)
barplot(af_table, main = "Autonomy Factor Distribution", 
        col = "darkgreen", border = "white", las = 1)

par(mfrow = c(1, 1))

# Ordinal logistic regression for students - with error handling
ordinal_model_student <- tryCatch({
  # Only proceed if we have at least one observation in multiple categories
  if(sum(sdl_table > 0) > 1 && sum(af_table > 0) > 1) {
    model <- polr(self_directed_level ~ autonomy_factor, 
                data = student_complete, 
                Hess = TRUE)
    model
  } else {
    NULL
  }
}, error = function(e) {
  message(paste("Error in ordinal regression for students:", e$message))
  NULL
})

# Display ordinal model results if created successfully
if(!is.null(ordinal_model_student)) {
  ordinal_summary <- summary(ordinal_model_student)
  
  # Display coefficients
  ordinal_coef <- data.frame(
    Variable = names(coef(ordinal_model_student)),
    Coefficient = coef(ordinal_model_student),
    OddsRatio = exp(coef(ordinal_model_student))
  )
  
  ordinal_coef %>%
    kable(caption = "Ordinal Regression Coefficients (Students)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
  
  # Display thresholds
  thresholds <- data.frame(
    Threshold = names(ordinal_model_student$zeta),
    Value = ordinal_model_student$zeta
  )
  
  thresholds %>%
    kable(caption = "Ordinal Regression Thresholds (Students)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
  cat("Ordinal regression model for students could not be created due to data limitations.")
}

Ordinal Regression Thresholds (Students)
	Threshold	Value
Low\|Medium	Low\|Medium	-4.001
Medium\|High	Medium\|High	-2.791
High\|Very High	High\|Very High	-1.292

Key Findings and Interpretation

Effect of Employer Support and Autonomy

# Function to interpret odds ratios from logistic regression
interpret_model <- function(model, predictor, outcome, data_group) {
  if(predictor %in% names(coef(model))) {
    coef_val <- coef(model)[predictor]
    odds <- exp(coef_val)
    p_val <- summary(model)$coefficients[predictor, "Pr(>|z|)"]
    
    result <- data.frame(
      Group = data_group,
      Predictor = predictor,
      Outcome = outcome,
      Coefficient = round(coef_val, 3),
      OddsRatio = round(odds, 2),
      p.value = round(p_val, 3),
      Significant = p_val < 0.05,
      Interpretation = ifelse(p_val < 0.05,
                             ifelse(odds > 1,
                                    paste0("+", round((odds-1)*100, 1), "% increase in odds per unit"),
                                    paste0("-", round((1-odds)*100, 1), "% decrease in odds per unit")),
                             "Not statistically significant")
    )
    return(result)
  } else {
    return(NULL)
  }
}

# Extract key findings from alumni models
alumni_findings <- rbind(
  interpret_model(model_alumni_1, "employer_support_score", "High Work-based Learning", "Alumni"),
  interpret_model(model_alumni_2, "university_support_score", "High Academic Integration", "Alumni"),
  interpret_model(model_alumni_3, "employer_support_score", "High Learning Outcomes", "Alumni"),
  interpret_model(model_alumni_3, "university_support_score", "High Learning Outcomes", "Alumni")
)

# Extract key findings from student models
student_findings <- rbind(
  interpret_model(model_student_1, "autonomy_level", "High Self-directed Learning", "Students"),
  interpret_model(model_student_2, "autonomy_level", "High Skill Application", "Students"),
  interpret_model(model_student_2, "employer_support_score", "High Skill Application", "Students"),
  interpret_model(model_student_3, "autonomy_level", "High Learning Integration", "Students"),
  interpret_model(model_student_3, "employer_support_score", "High Learning Integration", "Students"),
  interpret_model(model_student_3, "university_support_score", "High Learning Integration", "Students")
)

# Combine all findings
all_findings <- rbind(alumni_findings, student_findings)

# Create a nice table of findings
all_findings %>%
  kable(caption = "Key Findings from Logistic Regression Models",
        col.names = c("Group", "Predictor", "Outcome", "Coefficient", 
                     "Odds Ratio", "p-value", "Significant", "Interpretation")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(all_findings$Significant), background = "#e6ffe6") %>%
  row_spec(which(!all_findings$Significant), background = "#ffe6e6") %>%
  column_spec(8, width = "20em")

Key Findings from Logistic Regression Models
	Group	Predictor	Outcome	Coefficient	Odds Ratio	p-value	Significant	Interpretation
employer_support_score	Alumni	employer_support_score	High Work-based Learning	-0.048	9.500000e-01	0.933	FALSE	Not statistically significant
university_support_score	Alumni	university_support_score	High Academic Integration	230.053	8.144807e+99	0.999	FALSE	Not statistically significant
employer_support_score1	Alumni	employer_support_score	High Learning Outcomes	42.733	3.618969e+18	1.000	FALSE	Not statistically significant
university_support_score1	Alumni	university_support_score	High Learning Outcomes	185.103	2.449845e+80	1.000	FALSE	Not statistically significant
autonomy_level	Students	autonomy_level	High Self-directed Learning	-0.311	7.300000e-01	0.578	FALSE	Not statistically significant
autonomy_level1	Students	autonomy_level	High Skill Application	0.000	1.000000e+00	1.000	FALSE	Not statistically significant
employer_support_score2	Students	employer_support_score	High Skill Application	0.000	1.000000e+00	1.000	FALSE	Not statistically significant
autonomy_level2	Students	autonomy_level	High Learning Integration	0.000	1.000000e+00	1.000	FALSE	Not statistically significant
employer_support_score11	Students	employer_support_score	High Learning Integration	0.000	1.000000e+00	1.000	FALSE	Not statistically significant
university_support_score2	Students	university_support_score	High Learning Integration	0.000	1.000000e+00	1.000	FALSE	Not statistically significant

Comparison of Alumni and Current Students

# Compare the impact of employer support between alumni and current students
employer_impact <- all_findings %>%
  dplyr::filter(Predictor %in% c("employer_support_score")) %>%
  dplyr::select(Group, Outcome, OddsRatio, Significant, Interpretation)

# Compare the impact of university support between alumni and current students
university_impact <- all_findings %>%
  dplyr::filter(Predictor %in% c("university_support_score")) %>%
  dplyr::select(Group, Outcome, OddsRatio, Significant, Interpretation)

# Display comparative findings
employer_impact %>%
  kable(caption = "Impact of Employer Support Across Groups") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(employer_impact$Significant), background = "#e6ffe6") %>%
  row_spec(which(!employer_impact$Significant), background = "#ffe6e6")

Impact of Employer Support Across Groups
	Group	Outcome	OddsRatio	Significant	Interpretation
employer_support_score	Alumni	High Work-based Learning	9.500000e-01	FALSE	Not statistically significant
employer_support_score1	Alumni	High Learning Outcomes	3.618969e+18	FALSE	Not statistically significant
employer_support_score2	Students	High Skill Application	1.000000e+00	FALSE	Not statistically significant
employer_support_score11	Students	High Learning Integration	1.000000e+00	FALSE	Not statistically significant

university_impact %>%
  kable(caption = "Impact of University Support Across Groups") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(university_impact$Significant), background = "#e6ffe6") %>%
  row_spec(which(!university_impact$Significant), background = "#ffe6e6")

Impact of University Support Across Groups
	Group	Outcome	OddsRatio	Significant	Interpretation
university_support_score	Alumni	High Academic Integration	8.144807e+99	FALSE	Not statistically significant
university_support_score1	Alumni	High Learning Outcomes	2.449845e+80	FALSE	Not statistically significant
university_support_score2	Students	High Learning Integration	1.000000e+00	FALSE	Not statistically significant

Conclusion

This analysis examined the relationship between autonomy and discretionary learning in the Graduate Apprenticeship Programme, comparing both current students and alumni. Our key findings include:

No statistically significant relationships were found, possibly due to the limited sample size. ### Key Correlations

For Alumni:

university_support and work_learning have a strong positive correlation (r = 0.95).
university_support and academic_integration have a strong positive correlation (r = 0.73).
work_learning and academic_integration have a strong positive correlation (r = 0.87).
university_support and learning_outcome have a strong positive correlation (r = 0.92).
work_learning and learning_outcome have a strong positive correlation (r = 0.87).
academic_integration and learning_outcome have a moderate positive correlation (r = 0.59).

For Current Students:

employer_support and university_support have a strong positive correlation (r = 0.88).
employer_support and self_directed_learning have a strong positive correlation (r = 0.74).
university_support and self_directed_learning have a strong positive correlation (r = 0.73).
employer_support and skill_application have a moderate positive correlation (r = 0.52).
employer_support and learning_integration have a moderate positive correlation (r = 0.66).
self_directed_learning and learning_integration have a moderate positive correlation (r = 0.56).
skill_application and learning_integration have a strong positive correlation (r = 0.83).

Implications for Practice

Based on our findings, we can make the following recommendations:

For Employers:
- Provide structured support systems for apprentices
- Foster autonomy in work-based learning projects
- Integrate academic learning with workplace practice
For Universities:
- Strengthen academic support structures
- Design curricula that complement workplace learning
- Develop clear frameworks for workplace mentors
For Apprentices:
- Actively seek autonomy in learning projects
- Develop self-directed learning skills
- Integrate academic knowledge with workplace practice

Limitations and Future Research

This study has several limitations:

Small sample size, particularly for current students
Potential selection bias in survey respondents
Cross-sectional design limits causal inferences

Future research should:

Collect longitudinal data to track changes over time
Increase sample size for better statistical power
Include qualitative components to deepen understanding
Explore how autonomy interacts with different learning styles

References

Billett, S. (2001). Learning through work: Workplace affordances and individual engagement. Journal of Workplace Learning, 13(5), 209-214.

Ericsson, K. A. (2006). The influence of experience and deliberate practice on the development of superior expert performance. The Cambridge handbook of expertise and expert performance, 38, 685-705.

Tynjälä, P. (2008). Perspectives into learning at the workplace. Educational Research Review, 3(2), 130-154.

	Threshold	Value
Low\|Medium	Low\|Medium	-5.847
Medium\|High	Medium\|High	-4.959
High\|Very High	High\|Very High	-3.001

	Threshold	Value
Low\|Medium	Low\|Medium	-4.001
Medium\|High	Medium\|High	-2.791
High\|Very High	High\|Very High	-1.292