Try with 100 applications.
# Load necessary libraries
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(stringdist)
# Generate fake data
set.seed(123)
n <- 100
grades <- c("A", "B", "C")
# Generate all possible three-grade combinations
generate_combinations <- function(grades) {
combinations <- expand.grid(grades, grades, grades)
apply(combinations, 1, paste, collapse = "")
}
grade_combinations <- generate_combinations(grades)
# Function to check if a grade passes the cutoff
passes_cutoff <- function(grade, cutoff) {
grade_split <- sort(unlist(strsplit(grade, "")))
cutoff_split <- sort(unlist(strsplit(cutoff, "")))
grade_counts <- table(factor(grade_split, levels = grades))
cutoff_counts <- table(factor(cutoff_split, levels = grades))
all(grade_counts >= cutoff_counts)
}
# Function to compute the distance based on the number of adjustments needed
compute_distance <- function(grade, cutoff) {
stringdist(grade, cutoff, method = "lv")
}
# Function to estimate treatment effect at a given cutoff
estimate_treatment_effect <- function(cutoff, x, y) {
left_indices <- sapply(x, function(g) !passes_cutoff(g, cutoff))
right_indices <- sapply(x, function(g) passes_cutoff(g, cutoff))
if (sum(left_indices) == 0 || sum(right_indices) == 0) {
return(NA) # Not enough data points to calculate the treatment effect
}
mean_right <- mean(y[right_indices])
mean_left <- mean(y[left_indices])
treatment_effect <- mean_right - mean_left
return(treatment_effect)
}
# Simulate multiple runs
num_simulations <- 100
results <- data.frame(Simulation = integer(), TrueCutoff = character(), BestCutoff = character(), TreatmentEffect = numeric(), Match = logical())
for (sim in 1:num_simulations) {
# Generate grades for applications
x <- sample(grade_combinations, n, replace = TRUE)
# Randomly select a true cutoff from the grade combinations
true_cutoff <- sample(grade_combinations, 1)
# Admission probabilities based on distance from the cutoff
admission_prob <- sapply(x, function(g) {
if (passes_cutoff(g, true_cutoff)) {
return(1)
} else {
distance <- compute_distance(g, true_cutoff)
return(max(0.2, 1 - 0.4 * distance))
}
})
# Generate admission results based on probabilities
y <- rbinom(n, 1, admission_prob)
# Estimate treatment effects for all possible cutoffs
treatment_effects <- sapply(grade_combinations, function(cutoff) estimate_treatment_effect(cutoff, x, y))
# Remove NAs from treatment effects
valid_cutoffs <- grade_combinations[!is.na(treatment_effects)]
valid_treatment_effects <- treatment_effects[!is.na(treatment_effects)]
# Find the cutoff with the largest treatment effect
best_cutoff <- valid_cutoffs[which.max(valid_treatment_effects)]
best_treatment_effect <- max(valid_treatment_effects)
# Reorder best_cutoff and true_cutoff
reorder_cutoff <- function(cutoff) {
paste(sort(unlist(strsplit(cutoff, ""))), collapse = "")
}
best_cutoff <- reorder_cutoff(best_cutoff)
true_cutoff <- reorder_cutoff(true_cutoff)
# Document if the true and best cutoff match
match <- best_cutoff == true_cutoff
results <- rbind(results, data.frame(Simulation = sim, TrueCutoff = true_cutoff, BestCutoff = best_cutoff, TreatmentEffect = best_treatment_effect, Match = match))
}
# Summarize results
summary(results$Match)
## Mode TRUE
## logical 100
# Plot the distribution of treatment effects
hist(as.numeric(factor(results$BestCutoff, levels = grade_combinations)), breaks = length(grade_combinations), main = "Distribution of Best Cutoff Grades", xlab = "Best Cutoff Grade", col = "skyblue", xaxt='n')
axis(1, at=1:length(grade_combinations), labels=grade_combinations)
# Print results
print(results)
## Simulation TrueCutoff BestCutoff TreatmentEffect Match
## 1 1 AAC AAC 0.7126437 TRUE
## 2 2 AAA AAA 0.7525773 TRUE
## 3 3 ABC ABC 0.5540541 TRUE
## 4 4 ABB ABB 0.7282609 TRUE
## 5 5 AAB AAB 0.7108434 TRUE
## 6 6 AAA AAA 0.6907216 TRUE
## 7 7 BCC BCC 0.6666667 TRUE
## 8 8 ACC ACC 0.7368421 TRUE
## 9 9 AAB AAB 0.6404494 TRUE
## 10 10 ABB ABB 0.7127660 TRUE
## 11 11 ABC ABC 0.6794872 TRUE
## 12 12 BBC BBC 0.7701149 TRUE
## 13 13 ABC ABC 0.7532468 TRUE
## 14 14 ABC ABC 0.7160494 TRUE
## 15 15 CCC CCC 0.6979167 TRUE
## 16 16 ABC ABC 0.7027027 TRUE
## 17 17 ABB ABB 0.7765957 TRUE
## 18 18 BBB BBB 0.7010309 TRUE
## 19 19 BCC BCC 0.7727273 TRUE
## 20 20 BBC BBC 0.6444444 TRUE
## 21 21 AAB AAB 0.6625000 TRUE
## 22 22 BBB BBB 0.8191489 TRUE
## 23 23 AAC AAC 0.7356322 TRUE
## 24 24 ABC ABC 0.6666667 TRUE
## 25 25 AAC AAC 0.6853933 TRUE
## 26 26 BCC BCC 0.6516854 TRUE
## 27 27 ABC ABC 0.5875000 TRUE
## 28 28 ABC ABC 0.6794872 TRUE
## 29 29 AAC AAC 0.6630435 TRUE
## 30 30 AAA AAA 0.7021277 TRUE
## 31 31 BCC BCC 0.7209302 TRUE
## 32 32 ABB ABB 0.6666667 TRUE
## 33 33 ABC ABC 0.7333333 TRUE
## 34 34 AAB AAB 0.7368421 TRUE
## 35 35 ABC ABC 0.6987952 TRUE
## 36 36 AAA AAA 0.7113402 TRUE
## 37 37 AAC AAC 0.7752809 TRUE
## 38 38 ABC ABC 0.6835443 TRUE
## 39 39 BBC BBC 0.7790698 TRUE
## 40 40 ABB ABB 0.6477273 TRUE
## 41 41 AAC AAC 0.7840909 TRUE
## 42 42 ABC ABC 0.6049383 TRUE
## 43 43 ABC ABC 0.6962025 TRUE
## 44 44 ACC ACC 0.6559140 TRUE
## 45 45 ABC ABC 0.7297297 TRUE
## 46 46 AAC AAC 0.6896552 TRUE
## 47 47 ABC ABC 0.7402597 TRUE
## 48 48 BBC BBC 0.6823529 TRUE
## 49 49 BCC BCC 0.7840909 TRUE
## 50 50 AAC AAC 0.6847826 TRUE
## 51 51 ABC ABC 0.7594937 TRUE
## 52 52 CCC CCC 0.6875000 TRUE
## 53 53 BBB BBB 0.6489362 TRUE
## 54 54 ACC ACC 0.7752809 TRUE
## 55 55 AAB AAB 0.7282609 TRUE
## 56 56 BCC BCC 0.7325581 TRUE
## 57 57 BCC BCC 0.7727273 TRUE
## 58 58 CCC CCC 0.7473684 TRUE
## 59 59 ABB ABB 0.7415730 TRUE
## 60 60 ABC ABC 0.5975610 TRUE
## 61 61 AAA AAA 0.5957447 TRUE
## 62 62 BBC BBC 0.6739130 TRUE
## 63 63 ACC ACC 0.6853933 TRUE
## 64 64 ABC ABC 0.6027397 TRUE
## 65 65 AAA AAA 0.7010309 TRUE
## 66 66 ABB ABB 0.7555556 TRUE
## 67 67 ABC ABC 0.6904762 TRUE
## 68 68 ABC ABC 0.7058824 TRUE
## 69 69 BCC BCC 0.6703297 TRUE
## 70 70 BCC BCC 0.7111111 TRUE
## 71 71 ABC ABC 0.6575342 TRUE
## 72 72 ABC ABC 0.6164384 TRUE
## 73 73 AAB AAB 0.6451613 TRUE
## 74 74 BBC BBC 0.7000000 TRUE
## 75 75 BCC BCC 0.6860465 TRUE
## 76 76 ABB ABB 0.6144578 TRUE
## 77 77 ACC ACC 0.8000000 TRUE
## 78 78 BCC BCC 0.7619048 TRUE
## 79 79 ABC ABC 0.6629213 TRUE
## 80 80 AAA AAA 0.7234043 TRUE
## 81 81 ABC ABC 0.6666667 TRUE
## 82 82 BBC BBC 0.6739130 TRUE
## 83 83 ABB ABB 0.7586207 TRUE
## 84 84 BCC BCC 0.8191489 TRUE
## 85 85 ABB ABB 0.6235294 TRUE
## 86 86 BBC BBC 0.7252747 TRUE
## 87 87 ABC ABC 0.7260274 TRUE
## 88 88 AAB AAB 0.6976744 TRUE
## 89 89 BCC BCC 0.7303371 TRUE
## 90 90 ABC ABC 0.6785714 TRUE
## 91 91 BBB BBB 0.7187500 TRUE
## 92 92 ABC ABC 0.7402597 TRUE
## 93 93 ABB ABB 0.7582418 TRUE
## 94 94 ACC ACC 0.7142857 TRUE
## 95 95 BCC BCC 0.6888889 TRUE
## 96 96 ABC ABC 0.7500000 TRUE
## 97 97 CCC CCC 0.7634409 TRUE
## 98 98 ACC ACC 0.7176471 TRUE
## 99 99 ABC ABC 0.6623377 TRUE
## 100 100 BBB BBB 0.7010309 TRUE
Try with 20 applications per universities.
# Load necessary libraries
library(ggplot2)
library(stringdist)
# Generate fake data
set.seed(123)
n <- 20
grades <- c("A", "B", "C")
# Generate all possible three-grade combinations
generate_combinations <- function(grades) {
combinations <- expand.grid(grades, grades, grades)
apply(combinations, 1, paste, collapse = "")
}
grade_combinations <- generate_combinations(grades)
# Function to check if a grade passes the cutoff
passes_cutoff <- function(grade, cutoff) {
grade_split <- sort(unlist(strsplit(grade, "")))
cutoff_split <- sort(unlist(strsplit(cutoff, "")))
grade_counts <- table(factor(grade_split, levels = grades))
cutoff_counts <- table(factor(cutoff_split, levels = grades))
all(grade_counts >= cutoff_counts)
}
# Function to compute the distance based on the number of adjustments needed
compute_distance <- function(grade, cutoff) {
stringdist(grade, cutoff, method = "lv")
}
# Function to estimate treatment effect at a given cutoff
estimate_treatment_effect <- function(cutoff, x, y) {
left_indices <- sapply(x, function(g) !passes_cutoff(g, cutoff))
right_indices <- sapply(x, function(g) passes_cutoff(g, cutoff))
if (sum(left_indices) == 0 || sum(right_indices) == 0) {
return(NA) # Not enough data points to calculate the treatment effect
}
mean_right <- mean(y[right_indices])
mean_left <- mean(y[left_indices])
treatment_effect <- mean_right - mean_left
return(treatment_effect)
}
# Simulate multiple runs
num_simulations <- 100
results <- data.frame(Simulation = integer(), TrueCutoff = character(), BestCutoff = character(), TreatmentEffect = numeric(), Match = logical())
for (sim in 1:num_simulations) {
# Generate grades for applications
x <- sample(grade_combinations, n, replace = TRUE)
# Randomly select a true cutoff from the grade combinations
true_cutoff <- sample(grade_combinations, 1)
# Admission probabilities based on distance from the cutoff
admission_prob <- sapply(x, function(g) {
if (passes_cutoff(g, true_cutoff)) {
return(1)
} else {
distance <- compute_distance(g, true_cutoff)
return(max(0.2, 1 - 0.4 * distance))
}
})
# Generate admission results based on probabilities
y <- rbinom(n, 1, admission_prob)
# Estimate treatment effects for all possible cutoffs
treatment_effects <- sapply(grade_combinations, function(cutoff) estimate_treatment_effect(cutoff, x, y))
# Remove NAs from treatment effects
valid_cutoffs <- grade_combinations[!is.na(treatment_effects)]
valid_treatment_effects <- treatment_effects[!is.na(treatment_effects)]
# Find the cutoff with the largest treatment effect
best_cutoff <- valid_cutoffs[which.max(valid_treatment_effects)]
best_treatment_effect <- max(valid_treatment_effects)
# Reorder best_cutoff and true_cutoff
reorder_cutoff <- function(cutoff) {
paste(sort(unlist(strsplit(cutoff, ""))), collapse = "")
}
best_cutoff <- reorder_cutoff(best_cutoff)
true_cutoff <- reorder_cutoff(true_cutoff)
# Document if the true and best cutoff match
match <- best_cutoff == true_cutoff
results <- rbind(results, data.frame(Simulation = sim, TrueCutoff = true_cutoff, BestCutoff = best_cutoff, TreatmentEffect = best_treatment_effect, Match = match))
}
# Summarize results
summary(results$Match)
## Mode FALSE TRUE
## logical 23 77
# Plot the distribution of treatment effects
hist(as.numeric(factor(results$BestCutoff, levels = grade_combinations)), breaks = length(grade_combinations), main = "Distribution of Best Cutoff Grades", xlab = "Best Cutoff Grade", col = "skyblue", xaxt='n')
axis(1, at=1:length(grade_combinations), labels=grade_combinations)
# Print results
print(results)
## Simulation TrueCutoff BestCutoff TreatmentEffect Match
## 1 1 ACC ACC 0.6470588 TRUE
## 2 2 AAA AAC 0.9444444 FALSE
## 3 3 ABC ABC 0.5714286 TRUE
## 4 4 AAC AAC 0.7368421 TRUE
## 5 5 ABC ABC 0.9375000 TRUE
## 6 6 AAB AAB 0.6666667 TRUE
## 7 7 AAB AAB 0.8333333 TRUE
## 8 8 ABC ABB 0.4210526 FALSE
## 9 9 ABC ABC 0.5625000 TRUE
## 10 10 BBC BBC 0.8333333 TRUE
## 11 11 BBC BBC 0.7222222 TRUE
## 12 12 AAA AAB 0.6875000 FALSE
## 13 13 ABB ABB 0.5555556 TRUE
## 14 14 AAB AAB 0.5625000 TRUE
## 15 15 ABC ABC 0.3571429 TRUE
## 16 16 AAB ABB 0.4705882 FALSE
## 17 17 BCC BCC 0.6250000 TRUE
## 18 18 CCC BCC 0.6250000 FALSE
## 19 19 BBC BBB 0.7368421 FALSE
## 20 20 AAC AAC 0.8571429 TRUE
## 21 21 BCC BCC 0.7777778 TRUE
## 22 22 ABB ABB 0.7777778 TRUE
## 23 23 AAB AAB 0.5333333 TRUE
## 24 24 AAA AAB 0.7777778 FALSE
## 25 25 BBC BBC 0.7333333 TRUE
## 26 26 AAC AAC 0.7894737 TRUE
## 27 27 AAB AAB 0.6666667 TRUE
## 28 28 AAA AAA 0.7894737 TRUE
## 29 29 BBC BBC 0.6315789 TRUE
## 30 30 AAB AAB 0.6666667 TRUE
## 31 31 ABC AAC 0.5294118 FALSE
## 32 32 ABC ABC 0.5882353 TRUE
## 33 33 ABC ABC 0.6111111 TRUE
## 34 34 ACC ACC 0.8235294 TRUE
## 35 35 BBB ABB 0.5294118 FALSE
## 36 36 BBB BBB 0.5882353 TRUE
## 37 37 ABC ABC 0.6875000 TRUE
## 38 38 ABC ABC 0.8333333 TRUE
## 39 39 BCC BCC 0.5882353 TRUE
## 40 40 ABB ABB 0.8888889 TRUE
## 41 41 AAC AAC 0.5555556 TRUE
## 42 42 AAC AAC 0.7368421 TRUE
## 43 43 ABC ABC 0.8235294 TRUE
## 44 44 BCC BBB 0.7894737 FALSE
## 45 45 AAB AAB 0.8235294 TRUE
## 46 46 BCC BCC 0.7894737 TRUE
## 47 47 BCC BCC 0.9444444 TRUE
## 48 48 BBB ABB 0.8333333 FALSE
## 49 49 AAA AAB 0.6470588 FALSE
## 50 50 AAB AAB 0.6666667 TRUE
## 51 51 AAC AAC 0.7647059 TRUE
## 52 52 AAB ABC 0.2666667 FALSE
## 53 53 ABC ABC 0.4285714 TRUE
## 54 54 BCC BCC 0.7500000 TRUE
## 55 55 BBC BBC 0.7222222 TRUE
## 56 56 ABC AAB 0.4705882 FALSE
## 57 57 ACC ACC 0.6842105 TRUE
## 58 58 ACC AAB 0.7777778 FALSE
## 59 59 BCC CCC 0.6666667 FALSE
## 60 60 ABC ABC 0.5882353 TRUE
## 61 61 BCC BCC 0.7222222 TRUE
## 62 62 ABC ABC 0.7058824 TRUE
## 63 63 AAB AAB 0.7647059 TRUE
## 64 64 ABB ABB 0.6470588 TRUE
## 65 65 ABB ABB 0.8421053 TRUE
## 66 66 ACC ACC 0.5000000 TRUE
## 67 67 AAC AAC 0.9473684 TRUE
## 68 68 BCC AAC 0.7894737 FALSE
## 69 69 ABC ABC 0.8571429 TRUE
## 70 70 AAA AAB 0.8333333 FALSE
## 71 71 BCC BCC 0.6111111 TRUE
## 72 72 AAC AAC 0.7058824 TRUE
## 73 73 AAA AAA 0.5789474 TRUE
## 74 74 AAC AAC 0.8947368 TRUE
## 75 75 AAB AAA 0.6842105 FALSE
## 76 76 BBC BCC 0.5789474 FALSE
## 77 77 ABC ABC 0.6923077 TRUE
## 78 78 AAB AAB 0.5882353 TRUE
## 79 79 AAC AAC 0.6666667 TRUE
## 80 80 ABC ABC 0.5384615 TRUE
## 81 81 ABC ABC 0.6875000 TRUE
## 82 82 ACC ACC 0.5882353 TRUE
## 83 83 AAB AAB 0.5882353 TRUE
## 84 84 ABC ABC 0.7142857 TRUE
## 85 85 BBB BBC 0.7894737 FALSE
## 86 86 BCC BCC 0.5625000 TRUE
## 87 87 BCC BCC 0.6111111 TRUE
## 88 88 AAB AAB 0.6470588 TRUE
## 89 89 ABC ABC 0.6666667 TRUE
## 90 90 ABC ABC 0.6000000 TRUE
## 91 91 ABB ABB 0.6250000 TRUE
## 92 92 ABC ABC 0.5294118 TRUE
## 93 93 ABB ABB 0.7222222 TRUE
## 94 94 BBC BBC 0.5625000 TRUE
## 95 95 AAB AAA 0.7368421 FALSE
## 96 96 AAB AAB 0.7368421 TRUE
## 97 97 ACC ACC 0.5555556 TRUE
## 98 98 BBB BBC 0.6875000 FALSE
## 99 99 AAB AAB 0.7333333 TRUE
## 100 100 AAC AAC 0.5000000 TRUE