#Simulate student features simulate_student_features <- function(n = 100) { # Set the random seed set.seed(260923) # Generate unique student IDs student_ids <- seq(1, n) # Simulate student engagement student_engagement <- rnorm(n, mean = 50, sd = 10) # Simulate student performance student_performance <- rnorm(n, mean = 60, sd = 15) # Combine the data into a data frame student_features <- data.frame( student_id = student_ids, student_engagement = student_engagement, student_performance = student_performance ) # Return the data frame return(student_features) } # Generate the dataset student_data <- simulate_student_features()
head(student_data)
student_features <- simulate_student_features(n = 100)
pca_result <- prcomp(student_features[, -1], scale. = TRUE)
pca_components <- pca_result$x
summary(pca_components) # Apply KMeans clustering set.seed(123) kmeans_result <- kmeans(pca_components, centers = 3)
cluster_assignments <- kmeans_result$cluster
student_data$cluster <- cluster_assignments
summary(cluster_assignments)
head(student_data)
library(ggplot2)
ggplot(student_data, aes(x = student_engagement, y = student_performance, color = factor(cluster))) + geom_point() + scale_color_manual(values = c(“blue”, “red”, “green”)) + labs(title = “Student Clusters based on Engagement and Performance”, x = “Student Engagement”, y = “Student Performance”, color = “Cluster”) + theme_minimal()