knitr::opts_chunk$set(echo = TRUE, warning = FALSE, message = FALSE)

library(ggplot2)
library(dplyr)
library(knitr)

new_df <- data.frame(
  University = c(
    "Arizona State University (Law)", "University of Virginia (Law)", "University of Nebraska (Law)",
    "William and Mary (Law)", "University of Arizona Law", "William and Mary (Law)",
    "Berkeley (Law)", "University of Virginia (Undergrad)", "University of Michigan (Undergrad)",
    "University of Maryland (Medical)", "North Carolina State (Undergrad)", "SUNY (Medical)",
    "Miami University (Undergrad)", "UCLA (Undergrad)", "US Naval Academy (Military)",
    "University of Washington (Medical)", "Ohio State University (Undergrad)", "US Military Academy (Military)",
    "George Mason University (Law)", "Median (All)", "Mean (All)"
  ),
  Black_OR = c(1115.434, 730.8, 442.39, 267, 2500.3, 167.51, 121.6, 106, 62.79, 20.63, 19, 9.44, 7.99, 5.15, 4.44, 4.01, 3.33, 1.94, 1.13, 20.63, 175.51),
  Hispanic_OR = c(89.45, 1.1, 89.63, 0.66, 18.15, 2.47, 18.2, 2.81, 4.782, 2.51, 1.93, 4.08, 2.16, 1.92, 3.32, 4.86, 4.3, 1.2, 1.09, 2.81, 15.43),
  Asian_OR = c(5.78, 1.86, 5.78, 0.66, 2.54, 3.29, 1.6, 0.94, 0.81, 0.68, 0.64, 0.75, 2.14, 0.85, 0.67, 0.9, 1.47, 0.88, 1.74, 0.94, 1.59)
)

str(df)
## function (x, df1, df2, ncp, log = FALSE)
# assuming log-normal distribution
ci_lower <- exp(log(new_df$Black_OR) - 1.96 * (log(new_df$Hispanic_OR) - log(new_df$Asian_OR)) / (2 * 1.35))
ci_upper <- exp(log(new_df$Black_OR) + 1.96 * (log(new_df$Hispanic_OR) - log(new_df$Asian_OR)) / (2 * 1.35))

new_df$CI_Lower <- ci_lower
new_df$CI_Upper <- ci_upper

print(new_df)
##                            University Black_OR Hispanic_OR Asian_OR    CI_Lower
## 1      Arizona State University (Law) 1115.434      89.450     5.78  152.701844
## 2        University of Virginia (Law)  730.800       1.100     1.86 1070.032451
## 3        University of Nebraska (Law)  442.390      89.630     5.78   60.474451
## 4              William and Mary (Law)  267.000       0.660     0.66  267.000000
## 5           University of Arizona Law 2500.300      18.150     2.54  599.819014
## 6              William and Mary (Law)  167.510       2.470     3.29  206.261300
## 7                      Berkeley (Law)  121.600      18.200     1.60   20.815703
## 8  University of Virginia (Undergrad)  106.000       2.810     0.94   47.870849
## 9  University of Michigan (Undergrad)   62.790       4.782     0.81   17.302592
## 10   University of Maryland (Medical)   20.630       2.510     0.68    7.994284
## 11   North Carolina State (Undergrad)   19.000       1.930     0.64    8.526311
## 12                     SUNY (Medical)    9.440       4.080     0.75    2.760461
## 13       Miami University (Undergrad)    7.990       2.160     2.14    7.936227
## 14                   UCLA (Undergrad)    5.150       1.920     0.85    2.850460
## 15        US Naval Academy (Military)    4.440       3.320     0.67    1.389372
## 16 University of Washington (Medical)    4.010       4.860     0.90    1.178910
## 17  Ohio State University (Undergrad)    3.330       4.300     1.47    1.527753
## 18     US Military Academy (Military)    1.940       1.200     0.88    1.548890
## 19      George Mason University (Law)    1.130       1.090     1.74    1.586830
## 20                       Median (All)   20.630       2.810     0.94    9.316751
## 21                         Mean (All)  175.510      15.430     1.59   33.715967
##        CI_Upper
## 1  8.147858e+03
## 2  4.991144e+02
## 3  3.236225e+03
## 4  2.670000e+02
## 5  1.042231e+04
## 6  1.360391e+02
## 7  7.103560e+02
## 8  2.347149e+02
## 9  2.278609e+02
## 10 5.323765e+01
## 11 4.233953e+01
## 12 3.228214e+01
## 13 8.044138e+00
## 14 9.304640e+00
## 15 1.418886e+01
## 16 1.363980e+01
## 17 7.258308e+00
## 18 2.429869e+00
## 19 8.046859e-01
## 20 4.568083e+01
## 21 9.136253e+02
forest_plot <- ggplot(new_df, aes(y = University, x = Black_OR)) +
  geom_point(size = 3) +
  geom_errorbarh(aes(xmin = CI_Lower, xmax = CI_Upper), height = 0.2) +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +
  scale_x_log10(breaks = c(0.1, 0.5, 1, 2, 5, 10, 20)) +
  labs(title = "Forest Plot of Admission Odds Ratios by Race",
       x = "Odds Ratio (log scale)",
       y = "University") +
  theme_minimal()

new_df$SE <- (log(new_df$CI_Upper) - log(new_df$CI_Lower)) / (2 * 1.96)

funnel_plot <- ggplot(new_df, aes(x = Black_OR, y = SE)) +
  geom_point() +
  geom_vline(xintercept = 1, linetype = "dashed", color = "red") +
  scale_x_log10() +
  labs(title = "Funnel Plot for Publication Bias",
       x = "Odds Ratio (log scale)",
       y = "Standard Error") +
  theme_minimal()

print(forest_plot)

print(funnel_plot)