This analysis explores the relationship between autonomy and discretionary learning outcomes in the Graduate Apprenticeship Programme. The study examines both alumni and current students to understand how workplace autonomy and support influence learning outcomes.
# Function to clean and prepare data
prepare_data <- function(file_path, dataset_type) {
message(paste("Reading", dataset_type, "data from", file_path))
# Try to read the CSV data with error handling
tryCatch({
data <- read_csv(file_path)
message(paste("Successfully read", nrow(data), "rows and", ncol(data), "columns"))
# Add dataset identifier
data$dataset <- dataset_type
return(data)
}, error = function(e) {
message(paste("Error reading file:", e$message))
message("Trying alternative approach with base R...")
# Try with base R as a fallback
data <- read.csv(file_path, stringsAsFactors = FALSE)
message(paste("Successfully read", nrow(data), "rows and", ncol(data), "columns with base R"))
# Add dataset identifier
data$dataset <- dataset_type
return(data)
})
}
# Import alumni data
alumni_data <- prepare_data("standardized_alumni_survey_complete.csv", "Alumni")
# Import current students data
student_data <- prepare_data("standardized_current_survey_complete.csv", "Current")# Convert all relevant columns to numeric to avoid issues
columns_to_check <- c(
paste0("Q11_", 1:5),
paste0("Q12_", 1:5),
paste0("Q13_", 1:5),
paste0("Q14_", 1:5),
paste0("Q15_", 1:5)
)
for(col in columns_to_check) {
if(col %in% names(alumni_data) && !is.numeric(alumni_data[[col]])) {
alumni_data[[col]] <- as.numeric(alumni_data[[col]])
}
}
# Create composite scores using rowMeans
alumni_data$employer_support_score <- rowMeans(
alumni_data[, paste0("Q11_", 1:5)],
na.rm = TRUE
)
alumni_data$university_support_score <- rowMeans(
alumni_data[, paste0("Q12_", 1:5)],
na.rm = TRUE
)
alumni_data$work_learning_score <- rowMeans(
alumni_data[, paste0("Q13_", 1:5)],
na.rm = TRUE
)
alumni_data$academic_integration_score <- rowMeans(
alumni_data[, paste0("Q14_", 1:5)],
na.rm = TRUE
)
alumni_data$learning_outcome_score <- rowMeans(
alumni_data[, paste0("Q15_", 1:5)],
na.rm = TRUE
)
# Create binary outcomes for logistic regression
alumni_data$high_work_learning <- ifelse(alumni_data$work_learning_score >= 4, 1, 0)
alumni_data$high_academic_integration <- ifelse(alumni_data$academic_integration_score >= 4, 1, 0)
alumni_data$high_learning_outcome <- ifelse(alumni_data$learning_outcome_score >= 4, 1, 0)# Extract demographics
alumni_demo <- alumni_data %>%
dplyr::select(Q1, Q2, Q3, Q4, Q5, Q6) %>%
dplyr::rename(
"Age Group" = Q1,
"Gender" = Q2,
"University" = Q3,
"Programme" = Q4,
"Year of Completion" = Q5,
"Level Achieved" = Q6
)
# Summary table
alumni_demo %>%
group_by(`Gender`) %>%
summarise(Count = n()) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
kable(caption = "Gender Distribution of Alumni") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Gender | Count | Percentage |
|---|---|---|
| Female | 10 | 66.7 |
| Male | 4 | 26.7 |
| NA | 1 | 6.7 |
alumni_demo %>%
group_by(`Age Group`) %>%
summarise(Count = n()) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
kable(caption = "Age Distribution of Alumni") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Age Group | Count | Percentage |
|---|---|---|
| 25-34 | 4 | 26.7 |
| 35-44 | 4 | 26.7 |
| 45-54 | 6 | 40.0 |
| NA | 1 | 6.7 |
# Create summary of alumni scores
alumni_scores <- alumni_data %>%
dplyr::select(employer_support_score, university_support_score,
work_learning_score, academic_integration_score, learning_outcome_score) %>%
dplyr::rename(
"Employer Support" = employer_support_score,
"University Support" = university_support_score,
"Work-based Learning" = work_learning_score,
"Academic Integration" = academic_integration_score,
"Learning Outcomes" = learning_outcome_score
)
# Summary statistics
alumni_scores_summary <- alumni_scores %>%
summarise(across(everything(),
list(Mean = ~mean(., na.rm = TRUE),
SD = ~sd(., na.rm = TRUE),
Min = ~min(., na.rm = TRUE),
Max = ~max(., na.rm = TRUE)))) %>%
pivot_longer(cols = everything(),
names_to = c("Variable", "Statistic"),
names_pattern = "(.*)_(.*)") %>%
pivot_wider(names_from = Statistic, values_from = value)
alumni_scores_summary %>%
kable(caption = "Summary Statistics for Alumni Scores", digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Variable | Mean | SD | Min | Max |
|---|---|---|---|---|
| Employer Support | 2.93 | 1.20 | 1.0 | 5 |
| University Support | 4.04 | 0.83 | 1.6 | 5 |
| Work-based Learning | 4.09 | 0.94 | 1.6 | 5 |
| Academic Integration | 4.17 | 0.95 | 2.0 | 5 |
| Learning Outcomes | 4.33 | 0.67 | 2.6 | 5 |
# Current students have direct autonomy measure (Q9)
# Transform autonomy variable for analysis
if("Q9" %in% names(student_data)) {
# Create function to map autonomy levels
map_autonomy <- function(x) {
ifelse(x == "No autonomy", 1,
ifelse(x == "Little autonomy", 2,
ifelse(x == "Moderate autonomy", 3,
ifelse(x == "Significant autonomy", 4,
ifelse(x == "Complete autonomy", 5, NA)))))
}
# Apply the mapping function
student_data$autonomy_level <- map_autonomy(student_data$Q9)
} else {
# Create a placeholder autonomy_level if Q9 doesn't exist
student_data$autonomy_level <- NA
}
# Convert all relevant columns to numeric to avoid issues
for(col in columns_to_check) {
if(col %in% names(student_data) && !is.numeric(student_data[[col]])) {
student_data[[col]] <- as.numeric(student_data[[col]])
}
}
# Create composite scores for students
student_data$employer_support_score <- rowMeans(
student_data[, paste0("Q11_", 1:5)],
na.rm = TRUE
)
student_data$university_support_score <- rowMeans(
student_data[, paste0("Q12_", 1:5)],
na.rm = TRUE
)
student_data$self_directed_learning_score <- rowMeans(
student_data[, paste0("Q13_", 1:5)],
na.rm = TRUE
)
student_data$skill_application_score <- rowMeans(
student_data[, paste0("Q14_", 1:5)],
na.rm = TRUE
)
student_data$learning_integration_score <- rowMeans(
student_data[, paste0("Q15_", 1:5)],
na.rm = TRUE
)
# Create binary outcomes for logistic regression
student_data$high_self_directed <- ifelse(student_data$self_directed_learning_score >= 4, 1, 0)
student_data$high_skill_application <- ifelse(student_data$skill_application_score >= 4, 1, 0)
student_data$high_learning_integration <- ifelse(student_data$learning_integration_score >= 4, 1, 0)# Extract demographics
student_demo <- student_data %>%
dplyr::select(Q1, Q2, Q3, Q4, Q5, Q9) %>%
dplyr::rename(
"Age Group" = Q1,
"Gender" = Q2,
"University" = Q3,
"Programme" = Q4,
"Year of Study" = Q5,
"Autonomy Level" = Q9
)
# Summary tables
student_demo %>%
group_by(`Gender`) %>%
summarise(Count = n()) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
kable(caption = "Gender Distribution of Current Students") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Gender | Count | Percentage |
|---|---|---|
| Female | 9 | 64.3 |
| Male | 3 | 21.4 |
| NA | 2 | 14.3 |
student_demo %>%
group_by(`Autonomy Level`) %>%
summarise(Count = n()) %>%
mutate(Percentage = round(Count / sum(Count) * 100, 1)) %>%
kable(caption = "Autonomy Level Distribution of Current Students") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Autonomy Level | Count | Percentage |
|---|---|---|
| Complete autonomy | 2 | 14.3 |
| Little autonomy | 1 | 7.1 |
| No autonomy | 1 | 7.1 |
| Significant autonomy | 8 | 57.1 |
| NA | 2 | 14.3 |
# Create summary of student scores
student_scores <- student_data %>%
dplyr::select(autonomy_level, employer_support_score, university_support_score,
self_directed_learning_score, skill_application_score, learning_integration_score) %>%
dplyr::rename(
"Autonomy Level" = autonomy_level,
"Employer Support" = employer_support_score,
"University Support" = university_support_score,
"Self-directed Learning" = self_directed_learning_score,
"Skill Application" = skill_application_score,
"Learning Integration" = learning_integration_score
)
# Summary statistics
student_scores_summary <- student_scores %>%
summarise(across(everything(),
list(Mean = ~mean(., na.rm = TRUE),
SD = ~sd(., na.rm = TRUE),
Min = ~min(., na.rm = TRUE),
Max = ~max(., na.rm = TRUE)))) %>%
pivot_longer(cols = everything(),
names_to = c("Variable", "Statistic"),
names_pattern = "(.*)_(.*)") %>%
pivot_wider(names_from = Statistic, values_from = value)
student_scores_summary %>%
kable(caption = "Summary Statistics for Current Students Scores", digits = 2) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Variable | Mean | SD | Min | Max |
|---|---|---|---|---|
| Autonomy Level | 3.75 | 1.14 | 1 | 5.0 |
| Employer Support | 2.17 | 0.82 | 1 | 3.8 |
| University Support | 2.07 | 1.11 | 1 | 5.0 |
| Self-directed Learning | 2.80 | 1.26 | 1 | 5.0 |
| Skill Application | 2.20 | 0.92 | 1 | 3.4 |
| Learning Integration | 1.96 | 0.70 | 1 | 2.8 |
# Create analysis subset for alumni - INCLUDE ALL NEEDED VARIABLES
alumni_complete <- subset(alumni_data,
Finished == "True" | Finished == TRUE,
select = c(employer_support_score, university_support_score,
work_learning_score, academic_integration_score, learning_outcome_score,
high_work_learning, high_academic_integration, high_learning_outcome))
# Remove rows with NA in key variables
alumni_complete <- na.omit(alumni_complete)
# Create analysis subset for students - INCLUDE ALL NEEDED VARIABLES
student_complete <- subset(student_data,
Finished == "True" | Finished == TRUE,
select = c(autonomy_level, employer_support_score, university_support_score,
self_directed_learning_score, skill_application_score, learning_integration_score,
high_self_directed, high_skill_application, high_learning_integration))
# Remove rows with NA in key variables
student_complete <- na.omit(student_complete)
# Display data sizes
data_sizes <- data.frame(
Dataset = c("Alumni (Complete Records)", "Current Students (Complete Records)"),
Records = c(nrow(alumni_complete), nrow(student_complete))
)
data_sizes %>%
kable(caption = "Number of Complete Records for Analysis") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)| Dataset | Records |
|---|---|
| Alumni (Complete Records) | 11 |
| Current Students (Complete Records) | 9 |
# Install corrplot if not already installed
if (!requireNamespace("corrplot", quietly = TRUE)) {
install.packages("corrplot")
}
# Load the corrplot package
library(corrplot)
# Create correlation matrix for key variables in alumni data
alumni_cor_vars <- alumni_complete[, c("employer_support_score", "university_support_score",
"work_learning_score", "academic_integration_score",
"learning_outcome_score")]
alumni_cor <- cor(alumni_cor_vars, use = "pairwise.complete.obs")
# Create a nicer correlation plot
corrplot::corrplot(alumni_cor,
method = "circle",
type = "upper",
tl.col = "black",
tl.srt = 45,
tl.cex = 0.8,
diag = FALSE,
title = "Correlation Matrix for Alumni Variables",
mar = c(0, 0, 2, 0))# Also display as a table with rounded values
round(alumni_cor, 2) %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
kable(caption = "Correlation Matrix for Alumni Variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = TRUE)| Variable | employer_support_score | university_support_score | work_learning_score | academic_integration_score | learning_outcome_score |
|---|---|---|---|---|---|
| employer_support_score | 1.00 | 0.25 | 0.08 | -0.27 | 0.31 |
| university_support_score | 0.25 | 1.00 | 0.95 | 0.73 | 0.92 |
| work_learning_score | 0.08 | 0.95 | 1.00 | 0.87 | 0.87 |
| academic_integration_score | -0.27 | 0.73 | 0.87 | 1.00 | 0.59 |
| learning_outcome_score | 0.31 | 0.92 | 0.87 | 0.59 | 1.00 |
# Create correlation matrix for key variables in student data
student_cor_vars <- student_complete[, c("autonomy_level", "employer_support_score",
"university_support_score", "self_directed_learning_score",
"skill_application_score", "learning_integration_score")]
student_cor <- cor(student_cor_vars, use = "pairwise.complete.obs")
# Create a nicer correlation plot
corrplot::corrplot(student_cor,
method = "circle",
type = "upper",
tl.col = "black",
tl.srt = 45,
tl.cex = 0.8,
diag = FALSE,
title = "Correlation Matrix for Current Students Variables",
mar = c(0, 0, 2, 0))# Also display as a table with rounded values
round(student_cor, 2) %>%
as.data.frame() %>%
rownames_to_column("Variable") %>%
kable(caption = "Correlation Matrix for Current Students Variables") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
column_spec(1, bold = TRUE)| Variable | autonomy_level | employer_support_score | university_support_score | self_directed_learning_score | skill_application_score | learning_integration_score |
|---|---|---|---|---|---|---|
| autonomy_level | 1.00 | -0.09 | -0.17 | 0.03 | 0.01 | 0.24 |
| employer_support_score | -0.09 | 1.00 | 0.88 | 0.74 | 0.52 | 0.66 |
| university_support_score | -0.17 | 0.88 | 1.00 | 0.73 | 0.23 | 0.33 |
| self_directed_learning_score | 0.03 | 0.74 | 0.73 | 1.00 | 0.40 | 0.56 |
| skill_application_score | 0.01 | 0.52 | 0.23 | 0.40 | 1.00 | 0.83 |
| learning_integration_score | 0.24 | 0.66 | 0.33 | 0.56 | 0.83 | 1.00 |
# Create plots with consistent styling
theme_set(theme_minimal() +
theme(
plot.title = element_text(face = "bold", size = 14),
axis.title = element_text(size = 12),
legend.title = element_text(face = "bold"),
panel.grid.minor = element_blank()
))
# Plot 1: Employer support vs. work learning for alumni
plot_alumni_1 <- ggplot(alumni_complete, aes(x = employer_support_score, y = work_learning_score)) +
geom_point(alpha = 0.7, size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(title = "Employer Support and Work-based Learning",
subtitle = "Alumni Respondents",
x = "Employer Support Score",
y = "Work-based Learning Score") +
scale_x_continuous(breaks = 1:5) +
scale_y_continuous(breaks = 1:5)
# Plot 2: University support vs. academic integration for alumni
plot_alumni_2 <- ggplot(alumni_complete, aes(x = university_support_score, y = academic_integration_score)) +
geom_point(alpha = 0.7, size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(title = "University Support and Academic Integration",
subtitle = "Alumni Respondents",
x = "University Support Score",
y = "Academic Integration Score") +
scale_x_continuous(breaks = 1:5) +
scale_y_continuous(breaks = 1:5)
# Plot 3: University support vs. learning outcomes for alumni
plot_alumni_3 <- ggplot(alumni_complete, aes(x = university_support_score, y = learning_outcome_score)) +
geom_point(alpha = 0.7, size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(title = "University Support and Learning Outcomes",
subtitle = "Alumni Respondents",
x = "University Support Score",
y = "Learning Outcome Score") +
scale_x_continuous(breaks = 1:5) +
scale_y_continuous(breaks = 1:5)
# Plot 4: Employer support vs. learning outcomes for alumni
plot_alumni_4 <- ggplot(alumni_complete, aes(x = employer_support_score, y = learning_outcome_score)) +
geom_point(alpha = 0.7, size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(title = "Employer Support and Learning Outcomes",
subtitle = "Alumni Respondents",
x = "Employer Support Score",
y = "Learning Outcome Score") +
scale_x_continuous(breaks = 1:5) +
scale_y_continuous(breaks = 1:5)
# Combine plots in a grid
(plot_alumni_1 + plot_alumni_2) / (plot_alumni_3 + plot_alumni_4) +
plot_annotation(
title = "Relationships Between Support and Learning Outcomes for Alumni",
theme = theme(plot.title = element_text(face = "bold", size = 16))
)# Plot 1: Autonomy vs. self-directed learning for students
plot_student_1 <- ggplot(student_complete,
aes(x = factor(autonomy_level), y = self_directed_learning_score)) +
geom_boxplot(fill = "lightblue", alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.7, color = "steelblue") +
labs(title = "Autonomy and Self-directed Learning",
subtitle = "Current Students",
x = "Autonomy Level (1=None, 5=Complete)",
y = "Self-directed Learning Score") +
scale_y_continuous(breaks = 1:5)
# Plot 2: Autonomy vs. skill application for students
plot_student_2 <- ggplot(student_complete,
aes(x = factor(autonomy_level), y = skill_application_score)) +
geom_boxplot(fill = "lightgreen", alpha = 0.7) +
geom_jitter(width = 0.2, alpha = 0.7, color = "darkgreen") +
labs(title = "Autonomy and Skill Application",
subtitle = "Current Students",
x = "Autonomy Level (1=None, 5=Complete)",
y = "Skill Application Score") +
scale_y_continuous(breaks = 1:5)
# Plot 3: Employer support vs. self-directed learning for students
plot_student_3 <- ggplot(student_complete,
aes(x = employer_support_score, y = self_directed_learning_score)) +
geom_point(alpha = 0.7, size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(title = "Employer Support and Self-directed Learning",
subtitle = "Current Students",
x = "Employer Support Score",
y = "Self-directed Learning Score") +
scale_x_continuous(breaks = 1:5) +
scale_y_continuous(breaks = 1:5)
# Plot 4: University support vs. learning integration for students
plot_student_4 <- ggplot(student_complete,
aes(x = university_support_score, y = learning_integration_score)) +
geom_point(alpha = 0.7, size = 3, color = "steelblue") +
geom_smooth(method = "lm", se = TRUE, color = "darkred") +
labs(title = "University Support and Learning Integration",
subtitle = "Current Students",
x = "University Support Score",
y = "Learning Integration Score") +
scale_x_continuous(breaks = 1:5) +
scale_y_continuous(breaks = 1:5)
# Combine plots in a grid
(plot_student_1 + plot_student_2) / (plot_student_3 + plot_student_4) +
plot_annotation(
title = "Relationships Between Autonomy, Support, and Learning Outcomes for Current Students",
theme = theme(plot.title = element_text(face = "bold", size = 16))
)# Model 1: Effect of employer support on work-based learning
model_alumni_1 <- glm(high_work_learning ~ employer_support_score,
data = alumni_complete,
family = binomial(link = "logit"))
# Model 2: Effect of university support on academic integration
# Use increased iterations to address potential convergence issues
model_alumni_2 <- glm(high_academic_integration ~ university_support_score,
data = alumni_complete,
family = binomial(link = "logit"),
control = list(maxit = 50))
# Model 3: Combined model for learning outcomes
# Use increased iterations to address potential convergence issues
model_alumni_3 <- glm(high_learning_outcome ~ employer_support_score + university_support_score,
data = alumni_complete,
family = binomial(link = "logit"),
control = list(maxit = 50))
# Calculate odds ratios
odds_ratio_alumni_1 <- exp(coef(model_alumni_1))
odds_ratio_alumni_2 <- exp(coef(model_alumni_2))
odds_ratio_alumni_3 <- exp(coef(model_alumni_3))
# Function to extract key model results
extract_model_results <- function(model, model_name) {
coefs <- summary(model)$coefficients
# Create a data frame with model results
results <- data.frame(
Model = model_name,
Variable = rownames(coefs),
Coefficient = coefs[, "Estimate"],
SE = coefs[, "Std. Error"],
z = coefs[, "z value"],
p = coefs[, "Pr(>|z|)"],
OR = exp(coefs[, "Estimate"])
)
return(results)
}
# Extract results from all alumni models
alumni_results <- rbind(
extract_model_results(model_alumni_1, "Model 1: Work Learning"),
extract_model_results(model_alumni_2, "Model 2: Academic Integration"),
extract_model_results(model_alumni_3, "Model 3: Learning Outcomes")
)
# Display results table
alumni_results %>%
kable(caption = "Logistic Regression Results for Alumni",
digits = c(0, 0, 3, 3, 2, 3, 2),
col.names = c("Model", "Variable", "Coefficient", "Std. Error",
"z value", "p-value", "Odds Ratio")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(0, bold = TRUE) %>%
pack_rows("Model 1: Effect of Employer Support on Work Learning", 1, 2) %>%
pack_rows("Model 2: Effect of University Support on Academic Integration", 3, 4) %>%
pack_rows("Model 3: Combined Effects on Learning Outcomes", 5, 7)| Model | Variable | Coefficient | Std. Error | z value | p-value | Odds Ratio | |
|---|---|---|---|---|---|---|---|
| Model 1: Effect of Employer Support on Work Learning | |||||||
| (Intercept) | Model 1: Work Learning | (Intercept) | 1.117 | 1.771 | 0.63 | 0.528 | 3.060000e+00 |
| employer_support_score | Model 1: Work Learning | employer_support_score | -0.048 | 0.575 | -0.08 | 0.933 | 9.500000e-01 |
| Model 2: Effect of University Support on Academic Integration | |||||||
| (Intercept)1 | Model 2: Academic Integration | (Intercept) | -896.904 | 1394728.004 | 0.00 | 0.999 | 0.000000e+00 |
| university_support_score | Model 2: Academic Integration | university_support_score | 230.053 | 357119.233 | 0.00 | 0.999 | 8.144807e+99 |
| Model 3: Combined Effects on Learning Outcomes | |||||||
| (Intercept)2 | Model 3: Learning Outcomes | (Intercept) | -829.016 | 1501957.066 | 0.00 | 1.000 | 0.000000e+00 |
| employer_support_score1 | Model 3: Learning Outcomes | employer_support_score | 42.733 | 407993.877 | 0.00 | 1.000 | 3.618969e+18 |
| university_support_score1 | Model 3: Learning Outcomes | university_support_score | 185.103 | 495648.036 | 0.00 | 1.000 | 2.449845e+80 |
# Model 1: Direct effect of autonomy on self-directed learning
model_student_1 <- glm(high_self_directed ~ autonomy_level,
data = student_complete,
family = binomial(link = "logit"))
# Model 2: Effect of autonomy and employer support on skill application
model_student_2 <- glm(high_skill_application ~ autonomy_level + employer_support_score,
data = student_complete,
family = binomial(link = "logit"))
# Model 3: Combined model for learning integration
model_student_3 <- glm(high_learning_integration ~ autonomy_level + employer_support_score + university_support_score,
data = student_complete,
family = binomial(link = "logit"))
# Calculate odds ratios
odds_ratio_student_1 <- exp(coef(model_student_1))
odds_ratio_student_2 <- exp(coef(model_student_2))
odds_ratio_student_3 <- exp(coef(model_student_3))
# Extract results from all student models
student_results <- rbind(
extract_model_results(model_student_1, "Model 1: Self-directed Learning"),
extract_model_results(model_student_2, "Model 2: Skill Application"),
extract_model_results(model_student_3, "Model 3: Learning Integration")
)
# Display results table
student_results %>%
kable(caption = "Logistic Regression Results for Current Students",
digits = c(0, 0, 3, 3, 2, 3, 2),
col.names = c("Model", "Variable", "Coefficient", "Std. Error",
"z value", "p-value", "Odds Ratio")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(0, bold = TRUE) %>%
pack_rows("Model 1: Effect of Autonomy on Self-directed Learning", 1, 2) %>%
pack_rows("Model 2: Effect of Autonomy & Employer Support on Skill Application", 3, 5) %>%
pack_rows("Model 3: Combined Effects on Learning Integration", 6, 9)| Model | Variable | Coefficient | Std. Error | z value | p-value | Odds Ratio | |
|---|---|---|---|---|---|---|---|
| Model 1: Effect of Autonomy on Self-directed Learning | |||||||
| (Intercept) | Model 1: Self-directed Learning | (Intercept) | 0.426 | 2.108 | 0.20 | 0.840 | 1.53 |
| autonomy_level | Model 1: Self-directed Learning | autonomy_level | -0.311 | 0.558 | -0.56 | 0.578 | 0.73 |
| Model 2: Effect of Autonomy & Employer Support on Skill Application | |||||||
| (Intercept)1 | Model 2: Skill Application | (Intercept) | -24.566 | 181476.794 | 0.00 | 1.000 | 0.00 |
| autonomy_level1 | Model 2: Skill Application | autonomy_level | 0.000 | 35144.732 | 0.00 | 1.000 | 1.00 |
| employer_support_score | Model 2: Skill Application | employer_support_score | 0.000 | 52982.677 | 0.00 | 1.000 | 1.00 |
| Model 3: Combined Effects on Learning Integration | |||||||
| (Intercept)2 | Model 3: Learning Integration | (Intercept) | -24.566 | 181648.613 | 0.00 | 1.000 | 0.00 |
| autonomy_level2 | Model 3: Learning Integration | autonomy_level | 0.000 | 35941.003 | 0.00 | 1.000 | 1.00 |
| employer_support_score1 | Model 3: Learning Integration | employer_support_score | 0.000 | 114125.475 | 0.00 | 1.000 | 1.00 |
| university_support_score | Model 3: Learning Integration | university_support_score | 0.000 | 82144.674 | 0.00 | 1.000 | 1.00 |
# Create ordinal factors for alumni
alumni_complete$work_learning_level <- cut(alumni_complete$work_learning_score,
breaks = c(0, 2, 3, 4, 5),
labels = c("Low", "Medium", "High", "Very High"),
include.lowest = TRUE)
# Make it ordered
alumni_complete$work_learning_level <- ordered(alumni_complete$work_learning_level,
levels = c("Low", "Medium", "High", "Very High"))
# Create employer support level
alumni_complete$employer_support_level <- cut(alumni_complete$employer_support_score,
breaks = c(0, 2, 3, 4, 5),
labels = c("Low", "Medium", "High", "Very High"),
include.lowest = TRUE)
# Make it ordered
alumni_complete$employer_support_level <- ordered(alumni_complete$employer_support_level,
levels = c("Low", "Medium", "High", "Very High"))
# Check distributions of factors
wl_table <- table(alumni_complete$work_learning_level)
es_table <- table(alumni_complete$employer_support_level)
# Display factor distributions
par(mfrow = c(1, 2))
barplot(wl_table, main = "Work Learning Level Distribution",
col = "steelblue", border = "white", las = 1)
barplot(es_table, main = "Employer Support Level Distribution",
col = "darkgreen", border = "white", las = 1)par(mfrow = c(1, 1))
# Ordinal logistic regression for alumni - with error handling
ordinal_model_alumni <- tryCatch({
# Only proceed if we have at least one observation in multiple categories
if(sum(wl_table > 0) > 1 && sum(es_table > 0) > 1) {
model <- polr(work_learning_level ~ employer_support_level,
data = alumni_complete,
Hess = TRUE)
model
} else {
NULL
}
}, error = function(e) {
message(paste("Error in ordinal regression for alumni:", e$message))
NULL
})
# Display ordinal model results if created successfully
if(!is.null(ordinal_model_alumni)) {
ordinal_summary <- summary(ordinal_model_alumni)
# Display coefficients
ordinal_coef <- data.frame(
Variable = names(coef(ordinal_model_alumni)),
Coefficient = coef(ordinal_model_alumni),
OddsRatio = exp(coef(ordinal_model_alumni))
)
ordinal_coef %>%
kable(caption = "Ordinal Regression Coefficients (Alumni)",
digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Display thresholds
thresholds <- data.frame(
Threshold = names(ordinal_model_alumni$zeta),
Value = ordinal_model_alumni$zeta
)
thresholds %>%
kable(caption = "Ordinal Regression Thresholds (Alumni)",
digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
cat("Ordinal regression model for alumni could not be created due to data limitations.")
}| Threshold | Value | |
|---|---|---|
| Low|Medium | Low|Medium | -5.847 |
| Medium|High | Medium|High | -4.959 |
| High|Very High | High|Very High | -3.001 |
# Create ordinal factors for students
student_complete$self_directed_level <- cut(student_complete$self_directed_learning_score,
breaks = c(0, 2, 3, 4, 5),
labels = c("Low", "Medium", "High", "Very High"),
include.lowest = TRUE)
# Make it ordered
student_complete$self_directed_level <- ordered(student_complete$self_directed_level,
levels = c("Low", "Medium", "High", "Very High"))
# Create autonomy_factor
student_complete$autonomy_factor <- factor(student_complete$autonomy_level,
levels = 1:5,
labels = c("None", "Little", "Moderate", "Significant", "Complete"),
ordered = TRUE)
# Check distributions of factors
sdl_table <- table(student_complete$self_directed_level)
af_table <- table(student_complete$autonomy_factor)
# Display factor distributions
par(mfrow = c(1, 2))
barplot(sdl_table, main = "Self-directed Learning Level Distribution",
col = "steelblue", border = "white", las = 1)
barplot(af_table, main = "Autonomy Factor Distribution",
col = "darkgreen", border = "white", las = 1)par(mfrow = c(1, 1))
# Ordinal logistic regression for students - with error handling
ordinal_model_student <- tryCatch({
# Only proceed if we have at least one observation in multiple categories
if(sum(sdl_table > 0) > 1 && sum(af_table > 0) > 1) {
model <- polr(self_directed_level ~ autonomy_factor,
data = student_complete,
Hess = TRUE)
model
} else {
NULL
}
}, error = function(e) {
message(paste("Error in ordinal regression for students:", e$message))
NULL
})
# Display ordinal model results if created successfully
if(!is.null(ordinal_model_student)) {
ordinal_summary <- summary(ordinal_model_student)
# Display coefficients
ordinal_coef <- data.frame(
Variable = names(coef(ordinal_model_student)),
Coefficient = coef(ordinal_model_student),
OddsRatio = exp(coef(ordinal_model_student))
)
ordinal_coef %>%
kable(caption = "Ordinal Regression Coefficients (Students)",
digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
# Display thresholds
thresholds <- data.frame(
Threshold = names(ordinal_model_student$zeta),
Value = ordinal_model_student$zeta
)
thresholds %>%
kable(caption = "Ordinal Regression Thresholds (Students)",
digits = 3) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
} else {
cat("Ordinal regression model for students could not be created due to data limitations.")
}| Threshold | Value | |
|---|---|---|
| Low|Medium | Low|Medium | -4.001 |
| Medium|High | Medium|High | -2.791 |
| High|Very High | High|Very High | -1.292 |
# Function to interpret odds ratios from logistic regression
interpret_model <- function(model, predictor, outcome, data_group) {
if(predictor %in% names(coef(model))) {
coef_val <- coef(model)[predictor]
odds <- exp(coef_val)
p_val <- summary(model)$coefficients[predictor, "Pr(>|z|)"]
result <- data.frame(
Group = data_group,
Predictor = predictor,
Outcome = outcome,
Coefficient = round(coef_val, 3),
OddsRatio = round(odds, 2),
p.value = round(p_val, 3),
Significant = p_val < 0.05,
Interpretation = ifelse(p_val < 0.05,
ifelse(odds > 1,
paste0("+", round((odds-1)*100, 1), "% increase in odds per unit"),
paste0("-", round((1-odds)*100, 1), "% decrease in odds per unit")),
"Not statistically significant")
)
return(result)
} else {
return(NULL)
}
}
# Extract key findings from alumni models
alumni_findings <- rbind(
interpret_model(model_alumni_1, "employer_support_score", "High Work-based Learning", "Alumni"),
interpret_model(model_alumni_2, "university_support_score", "High Academic Integration", "Alumni"),
interpret_model(model_alumni_3, "employer_support_score", "High Learning Outcomes", "Alumni"),
interpret_model(model_alumni_3, "university_support_score", "High Learning Outcomes", "Alumni")
)
# Extract key findings from student models
student_findings <- rbind(
interpret_model(model_student_1, "autonomy_level", "High Self-directed Learning", "Students"),
interpret_model(model_student_2, "autonomy_level", "High Skill Application", "Students"),
interpret_model(model_student_2, "employer_support_score", "High Skill Application", "Students"),
interpret_model(model_student_3, "autonomy_level", "High Learning Integration", "Students"),
interpret_model(model_student_3, "employer_support_score", "High Learning Integration", "Students"),
interpret_model(model_student_3, "university_support_score", "High Learning Integration", "Students")
)
# Combine all findings
all_findings <- rbind(alumni_findings, student_findings)
# Create a nice table of findings
all_findings %>%
kable(caption = "Key Findings from Logistic Regression Models",
col.names = c("Group", "Predictor", "Outcome", "Coefficient",
"Odds Ratio", "p-value", "Significant", "Interpretation")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(which(all_findings$Significant), background = "#e6ffe6") %>%
row_spec(which(!all_findings$Significant), background = "#ffe6e6") %>%
column_spec(8, width = "20em")| Group | Predictor | Outcome | Coefficient | Odds Ratio | p-value | Significant | Interpretation | |
|---|---|---|---|---|---|---|---|---|
| employer_support_score | Alumni | employer_support_score | High Work-based Learning | -0.048 | 9.500000e-01 | 0.933 | FALSE | Not statistically significant |
| university_support_score | Alumni | university_support_score | High Academic Integration | 230.053 | 8.144807e+99 | 0.999 | FALSE | Not statistically significant |
| employer_support_score1 | Alumni | employer_support_score | High Learning Outcomes | 42.733 | 3.618969e+18 | 1.000 | FALSE | Not statistically significant |
| university_support_score1 | Alumni | university_support_score | High Learning Outcomes | 185.103 | 2.449845e+80 | 1.000 | FALSE | Not statistically significant |
| autonomy_level | Students | autonomy_level | High Self-directed Learning | -0.311 | 7.300000e-01 | 0.578 | FALSE | Not statistically significant |
| autonomy_level1 | Students | autonomy_level | High Skill Application | 0.000 | 1.000000e+00 | 1.000 | FALSE | Not statistically significant |
| employer_support_score2 | Students | employer_support_score | High Skill Application | 0.000 | 1.000000e+00 | 1.000 | FALSE | Not statistically significant |
| autonomy_level2 | Students | autonomy_level | High Learning Integration | 0.000 | 1.000000e+00 | 1.000 | FALSE | Not statistically significant |
| employer_support_score11 | Students | employer_support_score | High Learning Integration | 0.000 | 1.000000e+00 | 1.000 | FALSE | Not statistically significant |
| university_support_score2 | Students | university_support_score | High Learning Integration | 0.000 | 1.000000e+00 | 1.000 | FALSE | Not statistically significant |
# Compare the impact of employer support between alumni and current students
employer_impact <- all_findings %>%
dplyr::filter(Predictor %in% c("employer_support_score")) %>%
dplyr::select(Group, Outcome, OddsRatio, Significant, Interpretation)
# Compare the impact of university support between alumni and current students
university_impact <- all_findings %>%
dplyr::filter(Predictor %in% c("university_support_score")) %>%
dplyr::select(Group, Outcome, OddsRatio, Significant, Interpretation)
# Display comparative findings
employer_impact %>%
kable(caption = "Impact of Employer Support Across Groups") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(which(employer_impact$Significant), background = "#e6ffe6") %>%
row_spec(which(!employer_impact$Significant), background = "#ffe6e6")| Group | Outcome | OddsRatio | Significant | Interpretation | |
|---|---|---|---|---|---|
| employer_support_score | Alumni | High Work-based Learning | 9.500000e-01 | FALSE | Not statistically significant |
| employer_support_score1 | Alumni | High Learning Outcomes | 3.618969e+18 | FALSE | Not statistically significant |
| employer_support_score2 | Students | High Skill Application | 1.000000e+00 | FALSE | Not statistically significant |
| employer_support_score11 | Students | High Learning Integration | 1.000000e+00 | FALSE | Not statistically significant |
university_impact %>%
kable(caption = "Impact of University Support Across Groups") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed")) %>%
row_spec(which(university_impact$Significant), background = "#e6ffe6") %>%
row_spec(which(!university_impact$Significant), background = "#ffe6e6")| Group | Outcome | OddsRatio | Significant | Interpretation | |
|---|---|---|---|---|---|
| university_support_score | Alumni | High Academic Integration | 8.144807e+99 | FALSE | Not statistically significant |
| university_support_score1 | Alumni | High Learning Outcomes | 2.449845e+80 | FALSE | Not statistically significant |
| university_support_score2 | Students | High Learning Integration | 1.000000e+00 | FALSE | Not statistically significant |
This analysis examined the relationship between autonomy and discretionary learning in the Graduate Apprenticeship Programme, comparing both current students and alumni. Our key findings include:
No statistically significant relationships were found, possibly due to the limited sample size. ### Key Correlations
For Alumni:
For Current Students:
Based on our findings, we can make the following recommendations:
This study has several limitations:
Future research should:
Billett, S. (2001). Learning through work: Workplace affordances and individual engagement. Journal of Workplace Learning, 13(5), 209-214.
Ericsson, K. A. (2006). The influence of experience and deliberate practice on the development of superior expert performance. The Cambridge handbook of expertise and expert performance, 38, 685-705.
Tynjälä, P. (2008). Perspectives into learning at the workplace. Educational Research Review, 3(2), 130-154.