Introduction

This analysis explores the relationship between autonomy and discretionary learning outcomes in the Graduate Apprenticeship Programme. The study examines both alumni and current students to understand how workplace autonomy and support influence learning outcomes.

Data Preparation

Loading Required Packages

Importing and Preparing Data

# Function to clean and prepare data
prepare_data <- function(file_path, dataset_type) {
  message(paste("Reading", dataset_type, "data from", file_path))
  
  # Try to read the CSV data with error handling
  tryCatch({
    data <- read_csv(file_path)
    message(paste("Successfully read", nrow(data), "rows and", ncol(data), "columns"))
    
    # Add dataset identifier
    data$dataset <- dataset_type
    
    return(data)
  }, error = function(e) {
    message(paste("Error reading file:", e$message))
    message("Trying alternative approach with base R...")
    
    # Try with base R as a fallback
    data <- read.csv(file_path, stringsAsFactors = FALSE)
    message(paste("Successfully read", nrow(data), "rows and", ncol(data), "columns with base R"))
    
    # Add dataset identifier
    data$dataset <- dataset_type
    
    return(data)
  })
}

# Import alumni data
alumni_data <- prepare_data("standardized_alumni_survey_complete.csv", "Alumni")

# Import current students data
student_data <- prepare_data("standardized_current_survey_complete.csv", "Current")

Exploring the Data

Alumni Data

# Convert all relevant columns to numeric to avoid issues
columns_to_check <- c(
  paste0("Q11_", 1:5),
  paste0("Q12_", 1:5),
  paste0("Q13_", 1:5),
  paste0("Q14_", 1:5),
  paste0("Q15_", 1:5)
)

for(col in columns_to_check) {
  if(col %in% names(alumni_data) && !is.numeric(alumni_data[[col]])) {
    alumni_data[[col]] <- as.numeric(alumni_data[[col]])
  }
}

# Create composite scores using rowMeans
alumni_data$employer_support_score <- rowMeans(
  alumni_data[, paste0("Q11_", 1:5)], 
  na.rm = TRUE
)

alumni_data$university_support_score <- rowMeans(
  alumni_data[, paste0("Q12_", 1:5)], 
  na.rm = TRUE
)

alumni_data$work_learning_score <- rowMeans(
  alumni_data[, paste0("Q13_", 1:5)], 
  na.rm = TRUE
)

alumni_data$academic_integration_score <- rowMeans(
  alumni_data[, paste0("Q14_", 1:5)], 
  na.rm = TRUE
)

alumni_data$learning_outcome_score <- rowMeans(
  alumni_data[, paste0("Q15_", 1:5)], 
  na.rm = TRUE
)

# Create binary outcomes for logistic regression
alumni_data$high_work_learning <- ifelse(alumni_data$work_learning_score >= 4, 1, 0)
alumni_data$high_academic_integration <- ifelse(alumni_data$academic_integration_score >= 4, 1, 0)
alumni_data$high_learning_outcome <- ifelse(alumni_data$learning_outcome_score >= 4, 1, 0)

Alumni Demographics

# Extract demographics
alumni_demo <- alumni_data %>%
  dplyr::select(Q1, Q2, Q3, Q4, Q5, Q6) %>%
  dplyr::rename(
    "Age Group" = Q1,
    "Gender" = Q2,
    "University" = Q3,
    "Programme" = Q4,
    "Year of Completion" = Q5,
    "Level Achieved" = Q6
  )

# Summary table
alumni_demo %>%
  group_by(`Gender`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Gender Distribution of Alumni") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Gender Distribution of Alumni
Gender Count Percentage
Female 10 66.7
Male 4 26.7
NA 1 6.7
alumni_demo %>%
  group_by(`Age Group`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Age Distribution of Alumni") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Age Distribution of Alumni
Age Group Count Percentage
25-34 4 26.7
35-44 4 26.7
45-54 6 40.0
NA 1 6.7

Alumni Support and Learning Scores

# Create summary of alumni scores
alumni_scores <- alumni_data %>%
  dplyr::select(employer_support_score, university_support_score, 
         work_learning_score, academic_integration_score, learning_outcome_score) %>%
  dplyr::rename(
    "Employer Support" = employer_support_score,
    "University Support" = university_support_score,
    "Work-based Learning" = work_learning_score,
    "Academic Integration" = academic_integration_score,
    "Learning Outcomes" = learning_outcome_score
  )

# Summary statistics
alumni_scores_summary <- alumni_scores %>%
  summarise(across(everything(), 
                  list(Mean = ~mean(., na.rm = TRUE),
                       SD = ~sd(., na.rm = TRUE),
                       Min = ~min(., na.rm = TRUE),
                       Max = ~max(., na.rm = TRUE)))) %>%
  pivot_longer(cols = everything(), 
               names_to = c("Variable", "Statistic"), 
               names_pattern = "(.*)_(.*)") %>%
  pivot_wider(names_from = Statistic, values_from = value)

alumni_scores_summary %>%
  kable(caption = "Summary Statistics for Alumni Scores", digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Summary Statistics for Alumni Scores
Variable Mean SD Min Max
Employer Support 2.93 1.20 1.0 5
University Support 4.04 0.83 1.6 5
Work-based Learning 4.09 0.94 1.6 5
Academic Integration 4.17 0.95 2.0 5
Learning Outcomes 4.33 0.67 2.6 5

Current Students Data

# Current students have direct autonomy measure (Q9)
# Transform autonomy variable for analysis
if("Q9" %in% names(student_data)) {
  # Create function to map autonomy levels
  map_autonomy <- function(x) {
    ifelse(x == "No autonomy", 1,
           ifelse(x == "Little autonomy", 2,
                  ifelse(x == "Moderate autonomy", 3,
                         ifelse(x == "Significant autonomy", 4,
                                ifelse(x == "Complete autonomy", 5, NA)))))
  }
  
  # Apply the mapping function
  student_data$autonomy_level <- map_autonomy(student_data$Q9)
} else {
  # Create a placeholder autonomy_level if Q9 doesn't exist
  student_data$autonomy_level <- NA
}

# Convert all relevant columns to numeric to avoid issues
for(col in columns_to_check) {
  if(col %in% names(student_data) && !is.numeric(student_data[[col]])) {
    student_data[[col]] <- as.numeric(student_data[[col]])
  }
}

# Create composite scores for students
student_data$employer_support_score <- rowMeans(
  student_data[, paste0("Q11_", 1:5)], 
  na.rm = TRUE
)

student_data$university_support_score <- rowMeans(
  student_data[, paste0("Q12_", 1:5)], 
  na.rm = TRUE
)

student_data$self_directed_learning_score <- rowMeans(
  student_data[, paste0("Q13_", 1:5)], 
  na.rm = TRUE
)

student_data$skill_application_score <- rowMeans(
  student_data[, paste0("Q14_", 1:5)], 
  na.rm = TRUE
)

student_data$learning_integration_score <- rowMeans(
  student_data[, paste0("Q15_", 1:5)], 
  na.rm = TRUE
)

# Create binary outcomes for logistic regression
student_data$high_self_directed <- ifelse(student_data$self_directed_learning_score >= 4, 1, 0)
student_data$high_skill_application <- ifelse(student_data$skill_application_score >= 4, 1, 0)
student_data$high_learning_integration <- ifelse(student_data$learning_integration_score >= 4, 1, 0)

Current Students Demographics

# Extract demographics
student_demo <- student_data %>%
  dplyr::select(Q1, Q2, Q3, Q4, Q5, Q9) %>%
  dplyr::rename(
    "Age Group" = Q1,
    "Gender" = Q2,
    "University" = Q3,
    "Programme" = Q4,
    "Year of Study" = Q5,
    "Autonomy Level" = Q9
  )

# Summary tables
student_demo %>%
  group_by(`Gender`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Gender Distribution of Current Students") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Gender Distribution of Current Students
Gender Count Percentage
Female 9 64.3
Male 3 21.4
NA 2 14.3
student_demo %>%
  group_by(`Autonomy Level`) %>%
  summarise(Count = n()) %>%
  mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
  kable(caption = "Autonomy Level Distribution of Current Students") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Autonomy Level Distribution of Current Students
Autonomy Level Count Percentage
Complete autonomy 2 14.3
Little autonomy 1 7.1
No autonomy 1 7.1
Significant autonomy 8 57.1
NA 2 14.3

Current Students Autonomy and Learning Scores

# Create summary of student scores
student_scores <- student_data %>%
  dplyr::select(autonomy_level, employer_support_score, university_support_score, 
         self_directed_learning_score, skill_application_score, learning_integration_score) %>%
  dplyr::rename(
    "Autonomy Level" = autonomy_level,
    "Employer Support" = employer_support_score,
    "University Support" = university_support_score,
    "Self-directed Learning" = self_directed_learning_score,
    "Skill Application" = skill_application_score,
    "Learning Integration" = learning_integration_score
  )

# Summary statistics
student_scores_summary <- student_scores %>%
  summarise(across(everything(), 
                  list(Mean = ~mean(., na.rm = TRUE),
                       SD = ~sd(., na.rm = TRUE),
                       Min = ~min(., na.rm = TRUE),
                       Max = ~max(., na.rm = TRUE)))) %>%
  pivot_longer(cols = everything(), 
               names_to = c("Variable", "Statistic"), 
               names_pattern = "(.*)_(.*)") %>%
  pivot_wider(names_from = Statistic, values_from = value)

student_scores_summary %>%
  kable(caption = "Summary Statistics for Current Students Scores", digits = 2) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Summary Statistics for Current Students Scores
Variable Mean SD Min Max
Autonomy Level 3.75 1.14 1 5.0
Employer Support 2.17 0.82 1 3.8
University Support 2.07 1.11 1 5.0
Self-directed Learning 2.80 1.26 1 5.0
Skill Application 2.20 0.92 1 3.4
Learning Integration 1.96 0.70 1 2.8

Preparing Analysis Datasets

# Create analysis subset for alumni - INCLUDE ALL NEEDED VARIABLES
alumni_complete <- subset(alumni_data, 
                         Finished == "True" | Finished == TRUE,
                         select = c(employer_support_score, university_support_score, 
                                   work_learning_score, academic_integration_score, learning_outcome_score,
                                   high_work_learning, high_academic_integration, high_learning_outcome))

# Remove rows with NA in key variables
alumni_complete <- na.omit(alumni_complete)

# Create analysis subset for students - INCLUDE ALL NEEDED VARIABLES
student_complete <- subset(student_data,
                          Finished == "True" | Finished == TRUE,
                          select = c(autonomy_level, employer_support_score, university_support_score,
                                   self_directed_learning_score, skill_application_score, learning_integration_score,
                                   high_self_directed, high_skill_application, high_learning_integration))

# Remove rows with NA in key variables
student_complete <- na.omit(student_complete)

# Display data sizes
data_sizes <- data.frame(
  Dataset = c("Alumni (Complete Records)", "Current Students (Complete Records)"),
  Records = c(nrow(alumni_complete), nrow(student_complete))
)

data_sizes %>%
  kable(caption = "Number of Complete Records for Analysis") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Number of Complete Records for Analysis
Dataset Records
Alumni (Complete Records) 11
Current Students (Complete Records) 9

Correlation Analysis

Alumni Correlations

# Install corrplot if not already installed
if (!requireNamespace("corrplot", quietly = TRUE)) {
  install.packages("corrplot")
}
# Load the corrplot package
library(corrplot)

# Create correlation matrix for key variables in alumni data
alumni_cor_vars <- alumni_complete[, c("employer_support_score", "university_support_score", 
                                       "work_learning_score", "academic_integration_score", 
                                       "learning_outcome_score")]

alumni_cor <- cor(alumni_cor_vars, use = "pairwise.complete.obs")

# Create a nicer correlation plot
corrplot::corrplot(alumni_cor, 
                  method = "circle", 
                  type = "upper", 
                  tl.col = "black",
                  tl.srt = 45,
                  tl.cex = 0.8,
                  diag = FALSE,
                  title = "Correlation Matrix for Alumni Variables",
                  mar = c(0, 0, 2, 0))

# Also display as a table with rounded values
round(alumni_cor, 2) %>%
  as.data.frame() %>%
  rownames_to_column("Variable") %>%
  kable(caption = "Correlation Matrix for Alumni Variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = TRUE)
Correlation Matrix for Alumni Variables
Variable employer_support_score university_support_score work_learning_score academic_integration_score learning_outcome_score
employer_support_score 1.00 0.25 0.08 -0.27 0.31
university_support_score 0.25 1.00 0.95 0.73 0.92
work_learning_score 0.08 0.95 1.00 0.87 0.87
academic_integration_score -0.27 0.73 0.87 1.00 0.59
learning_outcome_score 0.31 0.92 0.87 0.59 1.00

Current Students Correlations

# Create correlation matrix for key variables in student data
student_cor_vars <- student_complete[, c("autonomy_level", "employer_support_score", 
                                         "university_support_score", "self_directed_learning_score", 
                                         "skill_application_score", "learning_integration_score")]

student_cor <- cor(student_cor_vars, use = "pairwise.complete.obs")

# Create a nicer correlation plot
corrplot::corrplot(student_cor, 
                  method = "circle", 
                  type = "upper", 
                  tl.col = "black",
                  tl.srt = 45,
                  tl.cex = 0.8,
                  diag = FALSE,
                  title = "Correlation Matrix for Current Students Variables",
                  mar = c(0, 0, 2, 0))

# Also display as a table with rounded values
round(student_cor, 2) %>%
  as.data.frame() %>%
  rownames_to_column("Variable") %>%
  kable(caption = "Correlation Matrix for Current Students Variables") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  column_spec(1, bold = TRUE)
Correlation Matrix for Current Students Variables
Variable autonomy_level employer_support_score university_support_score self_directed_learning_score skill_application_score learning_integration_score
autonomy_level 1.00 -0.09 -0.17 0.03 0.01 0.24
employer_support_score -0.09 1.00 0.88 0.74 0.52 0.66
university_support_score -0.17 0.88 1.00 0.73 0.23 0.33
self_directed_learning_score 0.03 0.74 0.73 1.00 0.40 0.56
skill_application_score 0.01 0.52 0.23 0.40 1.00 0.83
learning_integration_score 0.24 0.66 0.33 0.56 0.83 1.00

Visualization of Key Relationships

Alumni Visualizations

# Create plots with consistent styling
theme_set(theme_minimal() +
          theme(
            plot.title = element_text(face = "bold", size = 14),
            axis.title = element_text(size = 12),
            legend.title = element_text(face = "bold"),
            panel.grid.minor = element_blank()
          ))

# Plot 1: Employer support vs. work learning for alumni
plot_alumni_1 <- ggplot(alumni_complete, aes(x = employer_support_score, y = work_learning_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "Employer Support and Work-based Learning",
       subtitle = "Alumni Respondents",
       x = "Employer Support Score",
       y = "Work-based Learning Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 2: University support vs. academic integration for alumni
plot_alumni_2 <- ggplot(alumni_complete, aes(x = university_support_score, y = academic_integration_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "University Support and Academic Integration",
       subtitle = "Alumni Respondents",
       x = "University Support Score",
       y = "Academic Integration Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 3: University support vs. learning outcomes for alumni
plot_alumni_3 <- ggplot(alumni_complete, aes(x = university_support_score, y = learning_outcome_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "University Support and Learning Outcomes",
       subtitle = "Alumni Respondents",
       x = "University Support Score",
       y = "Learning Outcome Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 4: Employer support vs. learning outcomes for alumni
plot_alumni_4 <- ggplot(alumni_complete, aes(x = employer_support_score, y = learning_outcome_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "Employer Support and Learning Outcomes",
       subtitle = "Alumni Respondents",
       x = "Employer Support Score",
       y = "Learning Outcome Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Combine plots in a grid
(plot_alumni_1 + plot_alumni_2) / (plot_alumni_3 + plot_alumni_4) +
  plot_annotation(
    title = "Relationships Between Support and Learning Outcomes for Alumni",
    theme = theme(plot.title = element_text(face = "bold", size = 16))
  )

Current Students Visualizations

# Plot 1: Autonomy vs. self-directed learning for students
plot_student_1 <- ggplot(student_complete, 
                         aes(x = factor(autonomy_level), y = self_directed_learning_score)) +
  geom_boxplot(fill = "lightblue", alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.7, color = "steelblue") +
  labs(title = "Autonomy and Self-directed Learning",
       subtitle = "Current Students",
       x = "Autonomy Level (1=None, 5=Complete)",
       y = "Self-directed Learning Score") +
  scale_y_continuous(breaks = 1:5)

# Plot 2: Autonomy vs. skill application for students
plot_student_2 <- ggplot(student_complete, 
                         aes(x = factor(autonomy_level), y = skill_application_score)) +
  geom_boxplot(fill = "lightgreen", alpha = 0.7) +
  geom_jitter(width = 0.2, alpha = 0.7, color = "darkgreen") +
  labs(title = "Autonomy and Skill Application",
       subtitle = "Current Students",
       x = "Autonomy Level (1=None, 5=Complete)",
       y = "Skill Application Score") +
  scale_y_continuous(breaks = 1:5)

# Plot 3: Employer support vs. self-directed learning for students
plot_student_3 <- ggplot(student_complete, 
                         aes(x = employer_support_score, y = self_directed_learning_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "Employer Support and Self-directed Learning",
       subtitle = "Current Students",
       x = "Employer Support Score",
       y = "Self-directed Learning Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Plot 4: University support vs. learning integration for students
plot_student_4 <- ggplot(student_complete, 
                         aes(x = university_support_score, y = learning_integration_score)) +
  geom_point(alpha = 0.7, size = 3, color = "steelblue") +
  geom_smooth(method = "lm", se = TRUE, color = "darkred") +
  labs(title = "University Support and Learning Integration",
       subtitle = "Current Students",
       x = "University Support Score",
       y = "Learning Integration Score") +
  scale_x_continuous(breaks = 1:5) +
  scale_y_continuous(breaks = 1:5)

# Combine plots in a grid
(plot_student_1 + plot_student_2) / (plot_student_3 + plot_student_4) +
  plot_annotation(
    title = "Relationships Between Autonomy, Support, and Learning Outcomes for Current Students",
    theme = theme(plot.title = element_text(face = "bold", size = 16))
  )

Logistic Regression Analysis

Alumni Logistic Regression Models

# Model 1: Effect of employer support on work-based learning
model_alumni_1 <- glm(high_work_learning ~ employer_support_score, 
                     data = alumni_complete, 
                     family = binomial(link = "logit"))

# Model 2: Effect of university support on academic integration
# Use increased iterations to address potential convergence issues
model_alumni_2 <- glm(high_academic_integration ~ university_support_score, 
                     data = alumni_complete, 
                     family = binomial(link = "logit"),
                     control = list(maxit = 50))

# Model 3: Combined model for learning outcomes
# Use increased iterations to address potential convergence issues
model_alumni_3 <- glm(high_learning_outcome ~ employer_support_score + university_support_score, 
                     data = alumni_complete, 
                     family = binomial(link = "logit"),
                     control = list(maxit = 50))

# Calculate odds ratios
odds_ratio_alumni_1 <- exp(coef(model_alumni_1))
odds_ratio_alumni_2 <- exp(coef(model_alumni_2))
odds_ratio_alumni_3 <- exp(coef(model_alumni_3))

# Function to extract key model results
extract_model_results <- function(model, model_name) {
  coefs <- summary(model)$coefficients
  
  # Create a data frame with model results
  results <- data.frame(
    Model = model_name,
    Variable = rownames(coefs),
    Coefficient = coefs[, "Estimate"],
    SE = coefs[, "Std. Error"],
    z = coefs[, "z value"],
    p = coefs[, "Pr(>|z|)"],
    OR = exp(coefs[, "Estimate"])
  )
  
  return(results)
}

# Extract results from all alumni models
alumni_results <- rbind(
  extract_model_results(model_alumni_1, "Model 1: Work Learning"),
  extract_model_results(model_alumni_2, "Model 2: Academic Integration"),
  extract_model_results(model_alumni_3, "Model 3: Learning Outcomes")
)

# Display results table
alumni_results %>%
  kable(caption = "Logistic Regression Results for Alumni",
        digits = c(0, 0, 3, 3, 2, 3, 2),
        col.names = c("Model", "Variable", "Coefficient", "Std. Error", 
                      "z value", "p-value", "Odds Ratio")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("Model 1: Effect of Employer Support on Work Learning", 1, 2) %>%
  pack_rows("Model 2: Effect of University Support on Academic Integration", 3, 4) %>%
  pack_rows("Model 3: Combined Effects on Learning Outcomes", 5, 7)
Logistic Regression Results for Alumni
Model Variable Coefficient Std. Error z value p-value Odds Ratio
Model 1: Effect of Employer Support on Work Learning
(Intercept) Model 1: Work Learning (Intercept) 1.117 1.771 0.63 0.528 3.060000e+00
employer_support_score Model 1: Work Learning employer_support_score -0.048 0.575 -0.08 0.933 9.500000e-01
Model 2: Effect of University Support on Academic Integration
(Intercept)1 Model 2: Academic Integration (Intercept) -896.904 1394728.004 0.00 0.999 0.000000e+00
university_support_score Model 2: Academic Integration university_support_score 230.053 357119.233 0.00 0.999 8.144807e+99
Model 3: Combined Effects on Learning Outcomes
(Intercept)2 Model 3: Learning Outcomes (Intercept) -829.016 1501957.066 0.00 1.000 0.000000e+00
employer_support_score1 Model 3: Learning Outcomes employer_support_score 42.733 407993.877 0.00 1.000 3.618969e+18
university_support_score1 Model 3: Learning Outcomes university_support_score 185.103 495648.036 0.00 1.000 2.449845e+80

Current Students Logistic Regression Models

# Model 1: Direct effect of autonomy on self-directed learning
model_student_1 <- glm(high_self_directed ~ autonomy_level, 
                      data = student_complete, 
                      family = binomial(link = "logit"))

# Model 2: Effect of autonomy and employer support on skill application
model_student_2 <- glm(high_skill_application ~ autonomy_level + employer_support_score, 
                      data = student_complete, 
                      family = binomial(link = "logit"))

# Model 3: Combined model for learning integration
model_student_3 <- glm(high_learning_integration ~ autonomy_level + employer_support_score + university_support_score, 
                      data = student_complete, 
                      family = binomial(link = "logit"))

# Calculate odds ratios
odds_ratio_student_1 <- exp(coef(model_student_1))
odds_ratio_student_2 <- exp(coef(model_student_2))
odds_ratio_student_3 <- exp(coef(model_student_3))

# Extract results from all student models
student_results <- rbind(
  extract_model_results(model_student_1, "Model 1: Self-directed Learning"),
  extract_model_results(model_student_2, "Model 2: Skill Application"),
  extract_model_results(model_student_3, "Model 3: Learning Integration")
)

# Display results table
student_results %>%
  kable(caption = "Logistic Regression Results for Current Students",
        digits = c(0, 0, 3, 3, 2, 3, 2),
        col.names = c("Model", "Variable", "Coefficient", "Std. Error", 
                      "z value", "p-value", "Odds Ratio")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(0, bold = TRUE) %>%
  pack_rows("Model 1: Effect of Autonomy on Self-directed Learning", 1, 2) %>%
  pack_rows("Model 2: Effect of Autonomy & Employer Support on Skill Application", 3, 5) %>%
  pack_rows("Model 3: Combined Effects on Learning Integration", 6, 9)
Logistic Regression Results for Current Students
Model Variable Coefficient Std. Error z value p-value Odds Ratio
Model 1: Effect of Autonomy on Self-directed Learning
(Intercept) Model 1: Self-directed Learning (Intercept) 0.426 2.108 0.20 0.840 1.53
autonomy_level Model 1: Self-directed Learning autonomy_level -0.311 0.558 -0.56 0.578 0.73
Model 2: Effect of Autonomy & Employer Support on Skill Application
(Intercept)1 Model 2: Skill Application (Intercept) -24.566 181476.794 0.00 1.000 0.00
autonomy_level1 Model 2: Skill Application autonomy_level 0.000 35144.732 0.00 1.000 1.00
employer_support_score Model 2: Skill Application employer_support_score 0.000 52982.677 0.00 1.000 1.00
Model 3: Combined Effects on Learning Integration
(Intercept)2 Model 3: Learning Integration (Intercept) -24.566 181648.613 0.00 1.000 0.00
autonomy_level2 Model 3: Learning Integration autonomy_level 0.000 35941.003 0.00 1.000 1.00
employer_support_score1 Model 3: Learning Integration employer_support_score 0.000 114125.475 0.00 1.000 1.00
university_support_score Model 3: Learning Integration university_support_score 0.000 82144.674 0.00 1.000 1.00

Ordinal Regression Analysis

Alumni Ordinal Regression

# Create ordinal factors for alumni
alumni_complete$work_learning_level <- cut(alumni_complete$work_learning_score, 
                                         breaks = c(0, 2, 3, 4, 5), 
                                         labels = c("Low", "Medium", "High", "Very High"),
                                         include.lowest = TRUE)

# Make it ordered
alumni_complete$work_learning_level <- ordered(alumni_complete$work_learning_level, 
                                             levels = c("Low", "Medium", "High", "Very High"))

# Create employer support level
alumni_complete$employer_support_level <- cut(alumni_complete$employer_support_score, 
                                            breaks = c(0, 2, 3, 4, 5), 
                                            labels = c("Low", "Medium", "High", "Very High"),
                                            include.lowest = TRUE)

# Make it ordered
alumni_complete$employer_support_level <- ordered(alumni_complete$employer_support_level, 
                                                levels = c("Low", "Medium", "High", "Very High"))

# Check distributions of factors
wl_table <- table(alumni_complete$work_learning_level)
es_table <- table(alumni_complete$employer_support_level)

# Display factor distributions
par(mfrow = c(1, 2))
barplot(wl_table, main = "Work Learning Level Distribution",
        col = "steelblue", border = "white", las = 1)
barplot(es_table, main = "Employer Support Level Distribution", 
        col = "darkgreen", border = "white", las = 1)

par(mfrow = c(1, 1))

# Ordinal logistic regression for alumni - with error handling
ordinal_model_alumni <- tryCatch({
  # Only proceed if we have at least one observation in multiple categories
  if(sum(wl_table > 0) > 1 && sum(es_table > 0) > 1) {
    model <- polr(work_learning_level ~ employer_support_level, 
                data = alumni_complete, 
                Hess = TRUE)
    model
  } else {
    NULL
  }
}, error = function(e) {
  message(paste("Error in ordinal regression for alumni:", e$message))
  NULL
})

# Display ordinal model results if created successfully
if(!is.null(ordinal_model_alumni)) {
  ordinal_summary <- summary(ordinal_model_alumni)
  
  # Display coefficients
  ordinal_coef <- data.frame(
    Variable = names(coef(ordinal_model_alumni)),
    Coefficient = coef(ordinal_model_alumni),
    OddsRatio = exp(coef(ordinal_model_alumni))
  )
  
  ordinal_coef %>%
    kable(caption = "Ordinal Regression Coefficients (Alumni)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
  
  # Display thresholds
  thresholds <- data.frame(
    Threshold = names(ordinal_model_alumni$zeta),
    Value = ordinal_model_alumni$zeta
  )
  
  thresholds %>%
    kable(caption = "Ordinal Regression Thresholds (Alumni)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
  cat("Ordinal regression model for alumni could not be created due to data limitations.")
}
Ordinal Regression Thresholds (Alumni)
Threshold Value
Low&#124;Medium Low&#124;Medium -5.847
Medium&#124;High Medium&#124;High -4.959
High&#124;Very High High&#124;Very High -3.001

Current Students Ordinal Regression

# Create ordinal factors for students
student_complete$self_directed_level <- cut(student_complete$self_directed_learning_score, 
                                          breaks = c(0, 2, 3, 4, 5), 
                                          labels = c("Low", "Medium", "High", "Very High"),
                                          include.lowest = TRUE)

# Make it ordered
student_complete$self_directed_level <- ordered(student_complete$self_directed_level, 
                                              levels = c("Low", "Medium", "High", "Very High"))

# Create autonomy_factor
student_complete$autonomy_factor <- factor(student_complete$autonomy_level, 
                                         levels = 1:5, 
                                         labels = c("None", "Little", "Moderate", "Significant", "Complete"),
                                         ordered = TRUE)

# Check distributions of factors
sdl_table <- table(student_complete$self_directed_level)
af_table <- table(student_complete$autonomy_factor)

# Display factor distributions
par(mfrow = c(1, 2))
barplot(sdl_table, main = "Self-directed Learning Level Distribution",
        col = "steelblue", border = "white", las = 1)
barplot(af_table, main = "Autonomy Factor Distribution", 
        col = "darkgreen", border = "white", las = 1)

par(mfrow = c(1, 1))

# Ordinal logistic regression for students - with error handling
ordinal_model_student <- tryCatch({
  # Only proceed if we have at least one observation in multiple categories
  if(sum(sdl_table > 0) > 1 && sum(af_table > 0) > 1) {
    model <- polr(self_directed_level ~ autonomy_factor, 
                data = student_complete, 
                Hess = TRUE)
    model
  } else {
    NULL
  }
}, error = function(e) {
  message(paste("Error in ordinal regression for students:", e$message))
  NULL
})

# Display ordinal model results if created successfully
if(!is.null(ordinal_model_student)) {
  ordinal_summary <- summary(ordinal_model_student)
  
  # Display coefficients
  ordinal_coef <- data.frame(
    Variable = names(coef(ordinal_model_student)),
    Coefficient = coef(ordinal_model_student),
    OddsRatio = exp(coef(ordinal_model_student))
  )
  
  ordinal_coef %>%
    kable(caption = "Ordinal Regression Coefficients (Students)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
  
  # Display thresholds
  thresholds <- data.frame(
    Threshold = names(ordinal_model_student$zeta),
    Value = ordinal_model_student$zeta
  )
  
  thresholds %>%
    kable(caption = "Ordinal Regression Thresholds (Students)",
          digits = 3) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
  cat("Ordinal regression model for students could not be created due to data limitations.")
}
Ordinal Regression Thresholds (Students)
Threshold Value
Low&#124;Medium Low&#124;Medium -4.001
Medium&#124;High Medium&#124;High -2.791
High&#124;Very High High&#124;Very High -1.292

Key Findings and Interpretation

Effect of Employer Support and Autonomy

# Function to interpret odds ratios from logistic regression
interpret_model <- function(model, predictor, outcome, data_group) {
  if(predictor %in% names(coef(model))) {
    coef_val <- coef(model)[predictor]
    odds <- exp(coef_val)
    p_val <- summary(model)$coefficients[predictor, "Pr(>|z|)"]
    
    result <- data.frame(
      Group = data_group,
      Predictor = predictor,
      Outcome = outcome,
      Coefficient = round(coef_val, 3),
      OddsRatio = round(odds, 2),
      p.value = round(p_val, 3),
      Significant = p_val < 0.05,
      Interpretation = ifelse(p_val < 0.05,
                             ifelse(odds > 1,
                                    paste0("+", round((odds-1)*100, 1), "% increase in odds per unit"),
                                    paste0("-", round((1-odds)*100, 1), "% decrease in odds per unit")),
                             "Not statistically significant")
    )
    return(result)
  } else {
    return(NULL)
  }
}

# Extract key findings from alumni models
alumni_findings <- rbind(
  interpret_model(model_alumni_1, "employer_support_score", "High Work-based Learning", "Alumni"),
  interpret_model(model_alumni_2, "university_support_score", "High Academic Integration", "Alumni"),
  interpret_model(model_alumni_3, "employer_support_score", "High Learning Outcomes", "Alumni"),
  interpret_model(model_alumni_3, "university_support_score", "High Learning Outcomes", "Alumni")
)

# Extract key findings from student models
student_findings <- rbind(
  interpret_model(model_student_1, "autonomy_level", "High Self-directed Learning", "Students"),
  interpret_model(model_student_2, "autonomy_level", "High Skill Application", "Students"),
  interpret_model(model_student_2, "employer_support_score", "High Skill Application", "Students"),
  interpret_model(model_student_3, "autonomy_level", "High Learning Integration", "Students"),
  interpret_model(model_student_3, "employer_support_score", "High Learning Integration", "Students"),
  interpret_model(model_student_3, "university_support_score", "High Learning Integration", "Students")
)

# Combine all findings
all_findings <- rbind(alumni_findings, student_findings)

# Create a nice table of findings
all_findings %>%
  kable(caption = "Key Findings from Logistic Regression Models",
        col.names = c("Group", "Predictor", "Outcome", "Coefficient", 
                     "Odds Ratio", "p-value", "Significant", "Interpretation")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(all_findings$Significant), background = "#e6ffe6") %>%
  row_spec(which(!all_findings$Significant), background = "#ffe6e6") %>%
  column_spec(8, width = "20em")
Key Findings from Logistic Regression Models
Group Predictor Outcome Coefficient Odds Ratio p-value Significant Interpretation
employer_support_score Alumni employer_support_score High Work-based Learning -0.048 9.500000e-01 0.933 FALSE Not statistically significant
university_support_score Alumni university_support_score High Academic Integration 230.053 8.144807e+99 0.999 FALSE Not statistically significant
employer_support_score1 Alumni employer_support_score High Learning Outcomes 42.733 3.618969e+18 1.000 FALSE Not statistically significant
university_support_score1 Alumni university_support_score High Learning Outcomes 185.103 2.449845e+80 1.000 FALSE Not statistically significant
autonomy_level Students autonomy_level High Self-directed Learning -0.311 7.300000e-01 0.578 FALSE Not statistically significant
autonomy_level1 Students autonomy_level High Skill Application 0.000 1.000000e+00 1.000 FALSE Not statistically significant
employer_support_score2 Students employer_support_score High Skill Application 0.000 1.000000e+00 1.000 FALSE Not statistically significant
autonomy_level2 Students autonomy_level High Learning Integration 0.000 1.000000e+00 1.000 FALSE Not statistically significant
employer_support_score11 Students employer_support_score High Learning Integration 0.000 1.000000e+00 1.000 FALSE Not statistically significant
university_support_score2 Students university_support_score High Learning Integration 0.000 1.000000e+00 1.000 FALSE Not statistically significant

Comparison of Alumni and Current Students

# Compare the impact of employer support between alumni and current students
employer_impact <- all_findings %>%
  dplyr::filter(Predictor %in% c("employer_support_score")) %>%
  dplyr::select(Group, Outcome, OddsRatio, Significant, Interpretation)

# Compare the impact of university support between alumni and current students
university_impact <- all_findings %>%
  dplyr::filter(Predictor %in% c("university_support_score")) %>%
  dplyr::select(Group, Outcome, OddsRatio, Significant, Interpretation)

# Display comparative findings
employer_impact %>%
  kable(caption = "Impact of Employer Support Across Groups") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(employer_impact$Significant), background = "#e6ffe6") %>%
  row_spec(which(!employer_impact$Significant), background = "#ffe6e6")
Impact of Employer Support Across Groups
Group Outcome OddsRatio Significant Interpretation
employer_support_score Alumni High Work-based Learning 9.500000e-01 FALSE Not statistically significant
employer_support_score1 Alumni High Learning Outcomes 3.618969e+18 FALSE Not statistically significant
employer_support_score2 Students High Skill Application 1.000000e+00 FALSE Not statistically significant
employer_support_score11 Students High Learning Integration 1.000000e+00 FALSE Not statistically significant
university_impact %>%
  kable(caption = "Impact of University Support Across Groups") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
  row_spec(which(university_impact$Significant), background = "#e6ffe6") %>%
  row_spec(which(!university_impact$Significant), background = "#ffe6e6")
Impact of University Support Across Groups
Group Outcome OddsRatio Significant Interpretation
university_support_score Alumni High Academic Integration 8.144807e+99 FALSE Not statistically significant
university_support_score1 Alumni High Learning Outcomes 2.449845e+80 FALSE Not statistically significant
university_support_score2 Students High Learning Integration 1.000000e+00 FALSE Not statistically significant

Conclusion

This analysis examined the relationship between autonomy and discretionary learning in the Graduate Apprenticeship Programme, comparing both current students and alumni. Our key findings include:

No statistically significant relationships were found, possibly due to the limited sample size. ### Key Correlations

For Alumni:

  • university_support and work_learning have a strong positive correlation (r = 0.95).
  • university_support and academic_integration have a strong positive correlation (r = 0.73).
  • work_learning and academic_integration have a strong positive correlation (r = 0.87).
  • university_support and learning_outcome have a strong positive correlation (r = 0.92).
  • work_learning and learning_outcome have a strong positive correlation (r = 0.87).
  • academic_integration and learning_outcome have a moderate positive correlation (r = 0.59).

For Current Students:

  • employer_support and university_support have a strong positive correlation (r = 0.88).
  • employer_support and self_directed_learning have a strong positive correlation (r = 0.74).
  • university_support and self_directed_learning have a strong positive correlation (r = 0.73).
  • employer_support and skill_application have a moderate positive correlation (r = 0.52).
  • employer_support and learning_integration have a moderate positive correlation (r = 0.66).
  • self_directed_learning and learning_integration have a moderate positive correlation (r = 0.56).
  • skill_application and learning_integration have a strong positive correlation (r = 0.83).

Implications for Practice

Based on our findings, we can make the following recommendations:

  1. For Employers:
    • Provide structured support systems for apprentices
    • Foster autonomy in work-based learning projects
    • Integrate academic learning with workplace practice
  2. For Universities:
    • Strengthen academic support structures
    • Design curricula that complement workplace learning
    • Develop clear frameworks for workplace mentors
  3. For Apprentices:
    • Actively seek autonomy in learning projects
    • Develop self-directed learning skills
    • Integrate academic knowledge with workplace practice

Limitations and Future Research

This study has several limitations:

  • Small sample size, particularly for current students
  • Potential selection bias in survey respondents
  • Cross-sectional design limits causal inferences

Future research should:

  • Collect longitudinal data to track changes over time
  • Increase sample size for better statistical power
  • Include qualitative components to deepen understanding
  • Explore how autonomy interacts with different learning styles

References

Billett, S. (2001). Learning through work: Workplace affordances and individual engagement. Journal of Workplace Learning, 13(5), 209-214.

Ericsson, K. A. (2006). The influence of experience and deliberate practice on the development of superior expert performance. The Cambridge handbook of expertise and expert performance, 38, 685-705.

Tynjälä, P. (2008). Perspectives into learning at the workplace. Educational Research Review, 3(2), 130-154.