Keitaro_Cutoff_Estimation

R Markdown

Try with 100 applications.

# Load necessary libraries
library(ggplot2)

## Warning: package 'ggplot2' was built under R version 4.3.3

library(stringdist)

# Generate fake data
set.seed(123)
n <- 100
grades <- c("A", "B", "C")

# Generate all possible three-grade combinations
generate_combinations <- function(grades) {
  combinations <- expand.grid(grades, grades, grades)
  apply(combinations, 1, paste, collapse = "")
}

grade_combinations <- generate_combinations(grades)

# Function to check if a grade passes the cutoff
passes_cutoff <- function(grade, cutoff) {
  grade_split <- sort(unlist(strsplit(grade, "")))
  cutoff_split <- sort(unlist(strsplit(cutoff, "")))
  
  grade_counts <- table(factor(grade_split, levels = grades))
  cutoff_counts <- table(factor(cutoff_split, levels = grades))
  
  all(grade_counts >= cutoff_counts)
}

# Function to compute the distance based on the number of adjustments needed
compute_distance <- function(grade, cutoff) {
  stringdist(grade, cutoff, method = "lv")
}

# Function to estimate treatment effect at a given cutoff
estimate_treatment_effect <- function(cutoff, x, y) {
  left_indices <- sapply(x, function(g) !passes_cutoff(g, cutoff))
  right_indices <- sapply(x, function(g) passes_cutoff(g, cutoff))
  
  if (sum(left_indices) == 0 || sum(right_indices) == 0) {
    return(NA)  # Not enough data points to calculate the treatment effect
  }
  
  mean_right <- mean(y[right_indices])
  mean_left <- mean(y[left_indices])
  treatment_effect <- mean_right - mean_left
  return(treatment_effect)
}

# Simulate multiple runs
num_simulations <- 100
results <- data.frame(Simulation = integer(), TrueCutoff = character(), BestCutoff = character(), TreatmentEffect = numeric(), Match = logical())

for (sim in 1:num_simulations) {
  # Generate grades for applications
  x <- sample(grade_combinations, n, replace = TRUE)
  
  # Randomly select a true cutoff from the grade combinations
  true_cutoff <- sample(grade_combinations, 1)
  
  # Admission probabilities based on distance from the cutoff
  admission_prob <- sapply(x, function(g) {
    if (passes_cutoff(g, true_cutoff)) {
      return(1)
    } else {
      distance <- compute_distance(g, true_cutoff)
      return(max(0.2, 1 - 0.4 * distance))
    }
  })
  
  # Generate admission results based on probabilities
  y <- rbinom(n, 1, admission_prob)
  
  # Estimate treatment effects for all possible cutoffs
  treatment_effects <- sapply(grade_combinations, function(cutoff) estimate_treatment_effect(cutoff, x, y))
  
  # Remove NAs from treatment effects
  valid_cutoffs <- grade_combinations[!is.na(treatment_effects)]
  valid_treatment_effects <- treatment_effects[!is.na(treatment_effects)]
  
  # Find the cutoff with the largest treatment effect
  best_cutoff <- valid_cutoffs[which.max(valid_treatment_effects)]
  best_treatment_effect <- max(valid_treatment_effects)
  
  # Reorder best_cutoff and true_cutoff
  reorder_cutoff <- function(cutoff) {
    paste(sort(unlist(strsplit(cutoff, ""))), collapse = "")
  }
  
  best_cutoff <- reorder_cutoff(best_cutoff)
  true_cutoff <- reorder_cutoff(true_cutoff)
  
  # Document if the true and best cutoff match
  match <- best_cutoff == true_cutoff
  
  results <- rbind(results, data.frame(Simulation = sim, TrueCutoff = true_cutoff, BestCutoff = best_cutoff, TreatmentEffect = best_treatment_effect, Match = match))
}

# Summarize results
summary(results$Match)

##    Mode    TRUE 
## logical     100

# Plot the distribution of treatment effects
hist(as.numeric(factor(results$BestCutoff, levels = grade_combinations)), breaks = length(grade_combinations), main = "Distribution of Best Cutoff Grades", xlab = "Best Cutoff Grade", col = "skyblue", xaxt='n')
axis(1, at=1:length(grade_combinations), labels=grade_combinations)

# Print results
print(results)

##     Simulation TrueCutoff BestCutoff TreatmentEffect Match
## 1            1        AAC        AAC       0.7126437  TRUE
## 2            2        AAA        AAA       0.7525773  TRUE
## 3            3        ABC        ABC       0.5540541  TRUE
## 4            4        ABB        ABB       0.7282609  TRUE
## 5            5        AAB        AAB       0.7108434  TRUE
## 6            6        AAA        AAA       0.6907216  TRUE
## 7            7        BCC        BCC       0.6666667  TRUE
## 8            8        ACC        ACC       0.7368421  TRUE
## 9            9        AAB        AAB       0.6404494  TRUE
## 10          10        ABB        ABB       0.7127660  TRUE
## 11          11        ABC        ABC       0.6794872  TRUE
## 12          12        BBC        BBC       0.7701149  TRUE
## 13          13        ABC        ABC       0.7532468  TRUE
## 14          14        ABC        ABC       0.7160494  TRUE
## 15          15        CCC        CCC       0.6979167  TRUE
## 16          16        ABC        ABC       0.7027027  TRUE
## 17          17        ABB        ABB       0.7765957  TRUE
## 18          18        BBB        BBB       0.7010309  TRUE
## 19          19        BCC        BCC       0.7727273  TRUE
## 20          20        BBC        BBC       0.6444444  TRUE
## 21          21        AAB        AAB       0.6625000  TRUE
## 22          22        BBB        BBB       0.8191489  TRUE
## 23          23        AAC        AAC       0.7356322  TRUE
## 24          24        ABC        ABC       0.6666667  TRUE
## 25          25        AAC        AAC       0.6853933  TRUE
## 26          26        BCC        BCC       0.6516854  TRUE
## 27          27        ABC        ABC       0.5875000  TRUE
## 28          28        ABC        ABC       0.6794872  TRUE
## 29          29        AAC        AAC       0.6630435  TRUE
## 30          30        AAA        AAA       0.7021277  TRUE
## 31          31        BCC        BCC       0.7209302  TRUE
## 32          32        ABB        ABB       0.6666667  TRUE
## 33          33        ABC        ABC       0.7333333  TRUE
## 34          34        AAB        AAB       0.7368421  TRUE
## 35          35        ABC        ABC       0.6987952  TRUE
## 36          36        AAA        AAA       0.7113402  TRUE
## 37          37        AAC        AAC       0.7752809  TRUE
## 38          38        ABC        ABC       0.6835443  TRUE
## 39          39        BBC        BBC       0.7790698  TRUE
## 40          40        ABB        ABB       0.6477273  TRUE
## 41          41        AAC        AAC       0.7840909  TRUE
## 42          42        ABC        ABC       0.6049383  TRUE
## 43          43        ABC        ABC       0.6962025  TRUE
## 44          44        ACC        ACC       0.6559140  TRUE
## 45          45        ABC        ABC       0.7297297  TRUE
## 46          46        AAC        AAC       0.6896552  TRUE
## 47          47        ABC        ABC       0.7402597  TRUE
## 48          48        BBC        BBC       0.6823529  TRUE
## 49          49        BCC        BCC       0.7840909  TRUE
## 50          50        AAC        AAC       0.6847826  TRUE
## 51          51        ABC        ABC       0.7594937  TRUE
## 52          52        CCC        CCC       0.6875000  TRUE
## 53          53        BBB        BBB       0.6489362  TRUE
## 54          54        ACC        ACC       0.7752809  TRUE
## 55          55        AAB        AAB       0.7282609  TRUE
## 56          56        BCC        BCC       0.7325581  TRUE
## 57          57        BCC        BCC       0.7727273  TRUE
## 58          58        CCC        CCC       0.7473684  TRUE
## 59          59        ABB        ABB       0.7415730  TRUE
## 60          60        ABC        ABC       0.5975610  TRUE
## 61          61        AAA        AAA       0.5957447  TRUE
## 62          62        BBC        BBC       0.6739130  TRUE
## 63          63        ACC        ACC       0.6853933  TRUE
## 64          64        ABC        ABC       0.6027397  TRUE
## 65          65        AAA        AAA       0.7010309  TRUE
## 66          66        ABB        ABB       0.7555556  TRUE
## 67          67        ABC        ABC       0.6904762  TRUE
## 68          68        ABC        ABC       0.7058824  TRUE
## 69          69        BCC        BCC       0.6703297  TRUE
## 70          70        BCC        BCC       0.7111111  TRUE
## 71          71        ABC        ABC       0.6575342  TRUE
## 72          72        ABC        ABC       0.6164384  TRUE
## 73          73        AAB        AAB       0.6451613  TRUE
## 74          74        BBC        BBC       0.7000000  TRUE
## 75          75        BCC        BCC       0.6860465  TRUE
## 76          76        ABB        ABB       0.6144578  TRUE
## 77          77        ACC        ACC       0.8000000  TRUE
## 78          78        BCC        BCC       0.7619048  TRUE
## 79          79        ABC        ABC       0.6629213  TRUE
## 80          80        AAA        AAA       0.7234043  TRUE
## 81          81        ABC        ABC       0.6666667  TRUE
## 82          82        BBC        BBC       0.6739130  TRUE
## 83          83        ABB        ABB       0.7586207  TRUE
## 84          84        BCC        BCC       0.8191489  TRUE
## 85          85        ABB        ABB       0.6235294  TRUE
## 86          86        BBC        BBC       0.7252747  TRUE
## 87          87        ABC        ABC       0.7260274  TRUE
## 88          88        AAB        AAB       0.6976744  TRUE
## 89          89        BCC        BCC       0.7303371  TRUE
## 90          90        ABC        ABC       0.6785714  TRUE
## 91          91        BBB        BBB       0.7187500  TRUE
## 92          92        ABC        ABC       0.7402597  TRUE
## 93          93        ABB        ABB       0.7582418  TRUE
## 94          94        ACC        ACC       0.7142857  TRUE
## 95          95        BCC        BCC       0.6888889  TRUE
## 96          96        ABC        ABC       0.7500000  TRUE
## 97          97        CCC        CCC       0.7634409  TRUE
## 98          98        ACC        ACC       0.7176471  TRUE
## 99          99        ABC        ABC       0.6623377  TRUE
## 100        100        BBB        BBB       0.7010309  TRUE

Try with 20 applications per universities.

# Load necessary libraries
library(ggplot2)
library(stringdist)

# Generate fake data
set.seed(123)
n <- 20
grades <- c("A", "B", "C")

# Generate all possible three-grade combinations
generate_combinations <- function(grades) {
  combinations <- expand.grid(grades, grades, grades)
  apply(combinations, 1, paste, collapse = "")
}

grade_combinations <- generate_combinations(grades)

# Function to check if a grade passes the cutoff
passes_cutoff <- function(grade, cutoff) {
  grade_split <- sort(unlist(strsplit(grade, "")))
  cutoff_split <- sort(unlist(strsplit(cutoff, "")))
  
  grade_counts <- table(factor(grade_split, levels = grades))
  cutoff_counts <- table(factor(cutoff_split, levels = grades))
  
  all(grade_counts >= cutoff_counts)
}

# Function to compute the distance based on the number of adjustments needed
compute_distance <- function(grade, cutoff) {
  stringdist(grade, cutoff, method = "lv")
}

# Function to estimate treatment effect at a given cutoff
estimate_treatment_effect <- function(cutoff, x, y) {
  left_indices <- sapply(x, function(g) !passes_cutoff(g, cutoff))
  right_indices <- sapply(x, function(g) passes_cutoff(g, cutoff))
  
  if (sum(left_indices) == 0 || sum(right_indices) == 0) {
    return(NA)  # Not enough data points to calculate the treatment effect
  }
  
  mean_right <- mean(y[right_indices])
  mean_left <- mean(y[left_indices])
  treatment_effect <- mean_right - mean_left
  return(treatment_effect)
}

# Simulate multiple runs
num_simulations <- 100
results <- data.frame(Simulation = integer(), TrueCutoff = character(), BestCutoff = character(), TreatmentEffect = numeric(), Match = logical())

for (sim in 1:num_simulations) {
  # Generate grades for applications
  x <- sample(grade_combinations, n, replace = TRUE)
  
  # Randomly select a true cutoff from the grade combinations
  true_cutoff <- sample(grade_combinations, 1)
  
  # Admission probabilities based on distance from the cutoff
  admission_prob <- sapply(x, function(g) {
    if (passes_cutoff(g, true_cutoff)) {
      return(1)
    } else {
      distance <- compute_distance(g, true_cutoff)
      return(max(0.2, 1 - 0.4 * distance))
    }
  })
  
  # Generate admission results based on probabilities
  y <- rbinom(n, 1, admission_prob)
  
  # Estimate treatment effects for all possible cutoffs
  treatment_effects <- sapply(grade_combinations, function(cutoff) estimate_treatment_effect(cutoff, x, y))
  
  # Remove NAs from treatment effects
  valid_cutoffs <- grade_combinations[!is.na(treatment_effects)]
  valid_treatment_effects <- treatment_effects[!is.na(treatment_effects)]
  
  # Find the cutoff with the largest treatment effect
  best_cutoff <- valid_cutoffs[which.max(valid_treatment_effects)]
  best_treatment_effect <- max(valid_treatment_effects)
  
  # Reorder best_cutoff and true_cutoff
  reorder_cutoff <- function(cutoff) {
    paste(sort(unlist(strsplit(cutoff, ""))), collapse = "")
  }
  
  best_cutoff <- reorder_cutoff(best_cutoff)
  true_cutoff <- reorder_cutoff(true_cutoff)
  
  # Document if the true and best cutoff match
  match <- best_cutoff == true_cutoff
  
  results <- rbind(results, data.frame(Simulation = sim, TrueCutoff = true_cutoff, BestCutoff = best_cutoff, TreatmentEffect = best_treatment_effect, Match = match))
}

# Summarize results
summary(results$Match)

##    Mode   FALSE    TRUE 
## logical      23      77

# Plot the distribution of treatment effects
hist(as.numeric(factor(results$BestCutoff, levels = grade_combinations)), breaks = length(grade_combinations), main = "Distribution of Best Cutoff Grades", xlab = "Best Cutoff Grade", col = "skyblue", xaxt='n')
axis(1, at=1:length(grade_combinations), labels=grade_combinations)

# Print results
print(results)

##     Simulation TrueCutoff BestCutoff TreatmentEffect Match
## 1            1        ACC        ACC       0.6470588  TRUE
## 2            2        AAA        AAC       0.9444444 FALSE
## 3            3        ABC        ABC       0.5714286  TRUE
## 4            4        AAC        AAC       0.7368421  TRUE
## 5            5        ABC        ABC       0.9375000  TRUE
## 6            6        AAB        AAB       0.6666667  TRUE
## 7            7        AAB        AAB       0.8333333  TRUE
## 8            8        ABC        ABB       0.4210526 FALSE
## 9            9        ABC        ABC       0.5625000  TRUE
## 10          10        BBC        BBC       0.8333333  TRUE
## 11          11        BBC        BBC       0.7222222  TRUE
## 12          12        AAA        AAB       0.6875000 FALSE
## 13          13        ABB        ABB       0.5555556  TRUE
## 14          14        AAB        AAB       0.5625000  TRUE
## 15          15        ABC        ABC       0.3571429  TRUE
## 16          16        AAB        ABB       0.4705882 FALSE
## 17          17        BCC        BCC       0.6250000  TRUE
## 18          18        CCC        BCC       0.6250000 FALSE
## 19          19        BBC        BBB       0.7368421 FALSE
## 20          20        AAC        AAC       0.8571429  TRUE
## 21          21        BCC        BCC       0.7777778  TRUE
## 22          22        ABB        ABB       0.7777778  TRUE
## 23          23        AAB        AAB       0.5333333  TRUE
## 24          24        AAA        AAB       0.7777778 FALSE
## 25          25        BBC        BBC       0.7333333  TRUE
## 26          26        AAC        AAC       0.7894737  TRUE
## 27          27        AAB        AAB       0.6666667  TRUE
## 28          28        AAA        AAA       0.7894737  TRUE
## 29          29        BBC        BBC       0.6315789  TRUE
## 30          30        AAB        AAB       0.6666667  TRUE
## 31          31        ABC        AAC       0.5294118 FALSE
## 32          32        ABC        ABC       0.5882353  TRUE
## 33          33        ABC        ABC       0.6111111  TRUE
## 34          34        ACC        ACC       0.8235294  TRUE
## 35          35        BBB        ABB       0.5294118 FALSE
## 36          36        BBB        BBB       0.5882353  TRUE
## 37          37        ABC        ABC       0.6875000  TRUE
## 38          38        ABC        ABC       0.8333333  TRUE
## 39          39        BCC        BCC       0.5882353  TRUE
## 40          40        ABB        ABB       0.8888889  TRUE
## 41          41        AAC        AAC       0.5555556  TRUE
## 42          42        AAC        AAC       0.7368421  TRUE
## 43          43        ABC        ABC       0.8235294  TRUE
## 44          44        BCC        BBB       0.7894737 FALSE
## 45          45        AAB        AAB       0.8235294  TRUE
## 46          46        BCC        BCC       0.7894737  TRUE
## 47          47        BCC        BCC       0.9444444  TRUE
## 48          48        BBB        ABB       0.8333333 FALSE
## 49          49        AAA        AAB       0.6470588 FALSE
## 50          50        AAB        AAB       0.6666667  TRUE
## 51          51        AAC        AAC       0.7647059  TRUE
## 52          52        AAB        ABC       0.2666667 FALSE
## 53          53        ABC        ABC       0.4285714  TRUE
## 54          54        BCC        BCC       0.7500000  TRUE
## 55          55        BBC        BBC       0.7222222  TRUE
## 56          56        ABC        AAB       0.4705882 FALSE
## 57          57        ACC        ACC       0.6842105  TRUE
## 58          58        ACC        AAB       0.7777778 FALSE
## 59          59        BCC        CCC       0.6666667 FALSE
## 60          60        ABC        ABC       0.5882353  TRUE
## 61          61        BCC        BCC       0.7222222  TRUE
## 62          62        ABC        ABC       0.7058824  TRUE
## 63          63        AAB        AAB       0.7647059  TRUE
## 64          64        ABB        ABB       0.6470588  TRUE
## 65          65        ABB        ABB       0.8421053  TRUE
## 66          66        ACC        ACC       0.5000000  TRUE
## 67          67        AAC        AAC       0.9473684  TRUE
## 68          68        BCC        AAC       0.7894737 FALSE
## 69          69        ABC        ABC       0.8571429  TRUE
## 70          70        AAA        AAB       0.8333333 FALSE
## 71          71        BCC        BCC       0.6111111  TRUE
## 72          72        AAC        AAC       0.7058824  TRUE
## 73          73        AAA        AAA       0.5789474  TRUE
## 74          74        AAC        AAC       0.8947368  TRUE
## 75          75        AAB        AAA       0.6842105 FALSE
## 76          76        BBC        BCC       0.5789474 FALSE
## 77          77        ABC        ABC       0.6923077  TRUE
## 78          78        AAB        AAB       0.5882353  TRUE
## 79          79        AAC        AAC       0.6666667  TRUE
## 80          80        ABC        ABC       0.5384615  TRUE
## 81          81        ABC        ABC       0.6875000  TRUE
## 82          82        ACC        ACC       0.5882353  TRUE
## 83          83        AAB        AAB       0.5882353  TRUE
## 84          84        ABC        ABC       0.7142857  TRUE
## 85          85        BBB        BBC       0.7894737 FALSE
## 86          86        BCC        BCC       0.5625000  TRUE
## 87          87        BCC        BCC       0.6111111  TRUE
## 88          88        AAB        AAB       0.6470588  TRUE
## 89          89        ABC        ABC       0.6666667  TRUE
## 90          90        ABC        ABC       0.6000000  TRUE
## 91          91        ABB        ABB       0.6250000  TRUE
## 92          92        ABC        ABC       0.5294118  TRUE
## 93          93        ABB        ABB       0.7222222  TRUE
## 94          94        BBC        BBC       0.5625000  TRUE
## 95          95        AAB        AAA       0.7368421 FALSE
## 96          96        AAB        AAB       0.7368421  TRUE
## 97          97        ACC        ACC       0.5555556  TRUE
## 98          98        BBB        BBC       0.6875000 FALSE
## 99          99        AAB        AAB       0.7333333  TRUE
## 100        100        AAC        AAC       0.5000000  TRUE

Keitaro_Cutoff_Estimation

Keitaro

2024-06-21

R Markdown