Matching methods
library(MatchIt)
## Warning: package 'MatchIt' was built under R version 4.4.3
library(openxlsx)
## Warning: package 'openxlsx' was built under R version 4.4.3
data <- read.csv("D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/Nepal_2014.csv")
# --- NEAREST - GLM - Replace TRUE
nearestglmT <- matchit(CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN + MIGRA_EMPLOY_YN,
data = data, method = "nearest", distance = "glm", replace = TRUE)
nearestglmT
## A `matchit` object
## - method: 1:1 nearest neighbor matching with replacement
## - distance: Propensity score
## - estimated with logistic regression
## - number of obs.: 461 (original), 363 (matched)
## - target estimand: ATT
## - covariates: AGE, EDU_INTERSCHOOL_YN, MIGRA_EMPLOY_YN
summary(nearestglmT)
##
## Call:
## matchit(formula = CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN +
## MIGRA_EMPLOY_YN, data = data, method = "nearest", distance = "glm",
## replace = TRUE)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5333 0.5248 0.1889 0.9049
## AGE 45.1803 44.7373 0.0333 0.9602
## EDU_INTERSCHOOL_YN 0.3525 0.4009 -0.1014 .
## MIGRA_EMPLOY_YN 0.6352 0.5576 0.1613 .
## eCDF Mean eCDF Max
## distance 0.0528 0.0976
## AGE 0.0126 0.0583
## EDU_INTERSCHOOL_YN 0.0485 0.0485
## MIGRA_EMPLOY_YN 0.0776 0.0776
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5333 0.5333 -0.0007 0.9896
## AGE 45.1803 45.0205 0.0120 0.9998
## EDU_INTERSCHOOL_YN 0.3525 0.3566 -0.0086 .
## MIGRA_EMPLOY_YN 0.6352 0.6393 -0.0085 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.0021 0.0205 0.0049
## AGE 0.0058 0.0205 0.0477
## EDU_INTERSCHOOL_YN 0.0041 0.0041 0.0086
## MIGRA_EMPLOY_YN 0.0041 0.0041 0.0085
##
## Sample Sizes:
## Control Treated
## All 217. 244
## Matched (ESS) 82.46 244
## Matched 119. 244
## Unmatched 98. 0
## Discarded 0. 0
plot(nearestglmT, type = "histogram")

nearestglmT_data <- match.data(nearestglmT)
# --- NEAREST - GLM - Replace FALSE
nearestglmF <- matchit(CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN + MIGRA_EMPLOY_YN,
data = data, method = "nearest", distance = "glm", replace = FALSE)
## Warning: Fewer control units than treated units; not all treated units will get
## a match.
nearestglmF
## A `matchit` object
## - method: 1:1 nearest neighbor matching without replacement
## - distance: Propensity score
## - estimated with logistic regression
## - number of obs.: 461 (original), 434 (matched)
## - target estimand: ATT
## - covariates: AGE, EDU_INTERSCHOOL_YN, MIGRA_EMPLOY_YN
summary(nearestglmF)
##
## Call:
## matchit(formula = CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN +
## MIGRA_EMPLOY_YN, data = data, method = "nearest", distance = "glm",
## replace = FALSE)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5333 0.5248 0.1889 0.9049
## AGE 45.1803 44.7373 0.0333 0.9602
## EDU_INTERSCHOOL_YN 0.3525 0.4009 -0.1014 .
## MIGRA_EMPLOY_YN 0.6352 0.5576 0.1613 .
## eCDF Mean eCDF Max
## distance 0.0528 0.0976
## AGE 0.0126 0.0583
## EDU_INTERSCHOOL_YN 0.0485 0.0485
## MIGRA_EMPLOY_YN 0.0776 0.0776
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5433 0.5248 0.4130 0.6034
## AGE 45.4562 44.7373 0.0540 0.9959
## EDU_INTERSCHOOL_YN 0.2719 0.4009 -0.2701 .
## MIGRA_EMPLOY_YN 0.7143 0.5576 0.3255 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.1108 0.1843 0.4179
## AGE 0.0151 0.0599 1.1452
## EDU_INTERSCHOOL_YN 0.1290 0.1290 0.9260
## MIGRA_EMPLOY_YN 0.1567 0.1567 0.3255
##
## Sample Sizes:
## Control Treated
## All 217 244
## Matched 217 217
## Unmatched 0 27
## Discarded 0 0
plot(nearestglmF, type = "histogram")

nearestglmF_data <- match.data(nearestglmF)
# --- NEAREST - MAHALANOBIS - Replace TRUE
nearestmahalanobisT <- matchit(CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN + MIGRA_EMPLOY_YN,
data = data, method = "nearest", distance = "mahalanobis", replace = TRUE)
nearestmahalanobisT
## A `matchit` object
## - method: 1:1 nearest neighbor matching with replacement
## - distance: Mahalanobis - number of obs.: 461 (original), 359 (matched)
## - target estimand: ATT
## - covariates: AGE, EDU_INTERSCHOOL_YN, MIGRA_EMPLOY_YN
summary(nearestmahalanobisT)
##
## Call:
## matchit(formula = CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN +
## MIGRA_EMPLOY_YN, data = data, method = "nearest", distance = "mahalanobis",
## replace = TRUE)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## AGE 45.1803 44.7373 0.0333 0.9602
## EDU_INTERSCHOOL_YN 0.3525 0.4009 -0.1014 .
## MIGRA_EMPLOY_YN 0.6352 0.5576 0.1613 .
## eCDF Mean eCDF Max
## AGE 0.0126 0.0583
## EDU_INTERSCHOOL_YN 0.0485 0.0485
## MIGRA_EMPLOY_YN 0.0776 0.0776
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## AGE 45.1803 45.1967 -0.0012 1.0004
## EDU_INTERSCHOOL_YN 0.3525 0.3525 0.0000 .
## MIGRA_EMPLOY_YN 0.6352 0.6352 0.0000 .
## eCDF Mean eCDF Max Std. Pair Dist.
## AGE 0.0047 0.0246 0.0333
## EDU_INTERSCHOOL_YN 0.0000 0.0000 0.0000
## MIGRA_EMPLOY_YN 0.0000 0.0000 0.0000
##
## Sample Sizes:
## Control Treated
## All 217. 244
## Matched (ESS) 80.24 244
## Matched 115. 244
## Unmatched 102. 0
## Discarded 0. 0
nearestmahalanobisT_data <- match.data(nearestmahalanobisT)
# --- NEAREST - MAHALANOBIS - Replace FALSE
nearestmahalanobisF <- matchit(CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN + MIGRA_EMPLOY_YN,
data = data, method = "nearest", distance = "mahalanobis", replace = FALSE)
## Warning: Fewer control units than treated units; not all treated units will get
## a match.
nearestmahalanobisF
## A `matchit` object
## - method: 1:1 nearest neighbor matching without replacement
## - distance: Mahalanobis - number of obs.: 461 (original), 434 (matched)
## - target estimand: ATT
## - covariates: AGE, EDU_INTERSCHOOL_YN, MIGRA_EMPLOY_YN
summary(nearestmahalanobisF)
##
## Call:
## matchit(formula = CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN +
## MIGRA_EMPLOY_YN, data = data, method = "nearest", distance = "mahalanobis",
## replace = FALSE)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## AGE 45.1803 44.7373 0.0333 0.9602
## EDU_INTERSCHOOL_YN 0.3525 0.4009 -0.1014 .
## MIGRA_EMPLOY_YN 0.6352 0.5576 0.1613 .
## eCDF Mean eCDF Max
## AGE 0.0126 0.0583
## EDU_INTERSCHOOL_YN 0.0485 0.0485
## MIGRA_EMPLOY_YN 0.0776 0.0776
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## AGE 44.9447 44.7373 0.0156 0.9561
## EDU_INTERSCHOOL_YN 0.3594 0.4009 -0.0868 .
## MIGRA_EMPLOY_YN 0.6221 0.5576 0.1340 .
## eCDF Mean eCDF Max Std. Pair Dist.
## AGE 0.0116 0.0507 0.1957
## EDU_INTERSCHOOL_YN 0.0415 0.0415 0.1061
## MIGRA_EMPLOY_YN 0.0645 0.0645 0.1340
##
## Sample Sizes:
## Control Treated
## All 217 244
## Matched 217 217
## Unmatched 0 27
## Discarded 0 0
nearestmahalanobisF_data <- match.data(nearestmahalanobisF)
# --- OPTIMAL - GLM
optimalglm <- matchit(CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN + MIGRA_EMPLOY_YN,
data = data, method = "optimal", distance = "glm")
## Warning: Fewer control units than treated units; not all treated units will get
## a match.
optimalglm
## A `matchit` object
## - method: 1:1 optimal pair matching
## - distance: Propensity score
## - estimated with logistic regression
## - number of obs.: 461 (original), 434 (matched)
## - target estimand: ATT
## - covariates: AGE, EDU_INTERSCHOOL_YN, MIGRA_EMPLOY_YN
summary(optimalglm)
##
## Call:
## matchit(formula = CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN +
## MIGRA_EMPLOY_YN, data = data, method = "optimal", distance = "glm")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5333 0.5248 0.1889 0.9049
## AGE 45.1803 44.7373 0.0333 0.9602
## EDU_INTERSCHOOL_YN 0.3525 0.4009 -0.1014 .
## MIGRA_EMPLOY_YN 0.6352 0.5576 0.1613 .
## eCDF Mean eCDF Max
## distance 0.0528 0.0976
## AGE 0.0126 0.0583
## EDU_INTERSCHOOL_YN 0.0485 0.0485
## MIGRA_EMPLOY_YN 0.0776 0.0776
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## distance 0.5278 0.5248 0.0676 0.8934
## AGE 44.9770 44.7373 0.0180 0.9845
## EDU_INTERSCHOOL_YN 0.3917 0.4009 -0.0193 .
## MIGRA_EMPLOY_YN 0.5899 0.5576 0.0670 .
## eCDF Mean eCDF Max Std. Pair Dist.
## distance 0.0196 0.0599 0.0719
## AGE 0.0125 0.0507 0.4344
## EDU_INTERSCHOOL_YN 0.0092 0.0092 0.1736
## MIGRA_EMPLOY_YN 0.0323 0.0323 0.0862
##
## Sample Sizes:
## Control Treated
## All 217 244
## Matched 217 217
## Unmatched 0 27
## Discarded 0 0
plot(optimalglm, type = "histogram")

optimalglm_data <- match.data(optimalglm)
# --- OPTIMAL - MAHALANOBIS
optimalmahalanobis <- matchit(CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN + MIGRA_EMPLOY_YN,
data = data, method = "optimal", distance = "mahalanobis")
## Warning: Fewer control units than treated units; not all treated units will get
## a match.
optimalmahalanobis
## A `matchit` object
## - method: 1:1 optimal pair matching
## - distance: Mahalanobis - number of obs.: 461 (original), 434 (matched)
## - target estimand: ATT
## - covariates: AGE, EDU_INTERSCHOOL_YN, MIGRA_EMPLOY_YN
summary(optimalmahalanobis)
##
## Call:
## matchit(formula = CONTRACT_FARMING_YN ~ AGE + EDU_INTERSCHOOL_YN +
## MIGRA_EMPLOY_YN, data = data, method = "optimal", distance = "mahalanobis")
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## AGE 45.1803 44.7373 0.0333 0.9602
## EDU_INTERSCHOOL_YN 0.3525 0.4009 -0.1014 .
## MIGRA_EMPLOY_YN 0.6352 0.5576 0.1613 .
## eCDF Mean eCDF Max
## AGE 0.0126 0.0583
## EDU_INTERSCHOOL_YN 0.0485 0.0485
## MIGRA_EMPLOY_YN 0.0776 0.0776
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio
## AGE 44.9401 44.7373 0.0152 0.9895
## EDU_INTERSCHOOL_YN 0.3871 0.4009 -0.0289 .
## MIGRA_EMPLOY_YN 0.5899 0.5576 0.0670 .
## eCDF Mean eCDF Max Std. Pair Dist.
## AGE 0.0127 0.0507 0.1268
## EDU_INTERSCHOOL_YN 0.0138 0.0138 0.0289
## MIGRA_EMPLOY_YN 0.0323 0.0323 0.0670
##
## Sample Sizes:
## Control Treated
## All 217 244
## Matched 217 217
## Unmatched 0 27
## Discarded 0 0
optimalmahalanobis_data <- match.data(optimalmahalanobis)
# --- Write both to Excel at your specified path ---
# output_file <- "D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/PSM_results - EDU_INTERSCHOOL.xlsx"
#
# wb <- createWorkbook()
#
# addWorksheet(wb, "nearestglmT_data")
# writeData(wb, "nearestglmT_data", nearestglmT_data)
# addWorksheet(wb, "nearestglmF_data")
# writeData(wb, "nearestglmF_data", nearestglmF_data)
#
# addWorksheet(wb, "nearestmahalanobisT_data")
# writeData(wb, "nearestmahalanobisT_data", nearestmahalanobisT_data)
# addWorksheet(wb, "nearestmahalanobisF_data")
# writeData(wb, "nearestmahalanobisF_data", nearestmahalanobisF_data)
#
# addWorksheet(wb, "optimalglm_data")
# writeData(wb, "optimalglm_data", optimalglm_data)
#
# addWorksheet(wb, "optimalmahalanobis_data")
# writeData(wb, "optimalmahalanobis_data", optimalmahalanobis_data)
#
# saveWorkbook(wb, output_file, overwrite = TRUE)
Conventional DEA
# Load package
library(Benchmarking)
## Warning: package 'Benchmarking' was built under R version 4.4.3
## Loading required package: lpSolveAPI
## Warning: package 'lpSolveAPI' was built under R version 4.4.3
## Loading required package: ucminf
## Loading required package: quadprog
library(openxlsx)
# File path and sheet name
file_path <- "D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/Nepal_2014_matcheddatasets.xlsx"
sheet_name <- "MatchedData_EDU-INTERSCHOOL"
# Read the data
data <- read.xlsx(file_path, sheet = sheet_name)
# --- Prepare input and output matrices ---
X <- as.matrix(data[, c("Q_SEED", "Q_MANURE", "M_DAYS_LABOUR", "PLOT_AREA")])
Y <- as.matrix(data[, "PLOT_PROD"])
# --- IO-BCC Model
deaio <- dea(X,Y, RTS = "vrs", ORIENTATION = "in", SLACK = TRUE) #Use RTS = "crs" for CCR model AND ORIENTATION = "out" for output orientation
summary(deaio)
## Summary of efficiencies
## VRS technology and input orientated efficiency
## Number of firms with efficiency==1 are 56 out of 359
## Mean efficiency: 0.77
## ---
## Eff range # %
## 0.3<= E <0.4 6 1.7
## 0.4<= E <0.5 12 3.3
## 0.5<= E <0.6 41 11.4
## 0.6<= E <0.7 86 24.0
## 0.7<= E <0.8 56 15.6
## 0.8<= E <0.9 46 12.8
## 0.9<= E <1 56 15.6
## E ==1 56 15.6
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.3183 0.6377 0.7481 0.7701 0.9378 1.0000
results_deaio <- data.frame(data$ID,data$CONTRACT_FARMING,deaio$eff,deaio$slack,deaio$sx,deaio$sy)
# --- OO-BCC Model
deaoo <- dea(X,Y, RTS = "vrs", ORIENTATION = "out", SLACK = TRUE) #Use RTS = "crs" for CCR model AND ORIENTATION = "in" for input orientation
summary(deaoo)
## Summary of efficiencies
## VRS technology and output orientated efficiency
## Number of firms with efficiency==1 are 24 out of 359
## Mean efficiency: 1.67
## ---
## Eff range # %
## F ==1 24 6.7
## 1< F =<1.1 22 6.1
## 1.1< F =<1.2 27 7.5
## 1.2< F =<1.3 26 7.2
## 1.3< F =<1.5 44 12.3
## 1.5< F =< 2 141 39.3
## 2< F =< 5 75 20.9
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.261 1.611 1.671 1.985 3.347
results_deaoo <- data.frame(data$ID,data$CONTRACT_FARMING,deaoo$eff,deaoo$slack,deaoo$sx,deaoo$sy)
# # --- Write both to Excel at your specified path ---
# output_file <- "D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/VRS_DEA_Conv_MatchedData_EDU-INTERSCHOOL_Results.xlsx"
#
# wb <- createWorkbook()
# addWorksheet(wb, "Input_Oriented")
# writeData(wb, "Input_Oriented", results_deaio)
#
# addWorksheet(wb, "Output_Oriented")
# writeData(wb, "Output_Oriented", results_deaoo)
#
# saveWorkbook(wb, output_file, overwrite = TRUE)
Robust DEA
# Load package
library(rDEA)
## Warning: package 'rDEA' was built under R version 4.4.3
## Using the GLPK callable library version 5.0
##
## Attaching package: 'rDEA'
## The following object is masked from 'package:Benchmarking':
##
## dea
library(openxlsx)
# File path and sheet name
file_path <- "D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/Nepal_2014_matcheddatasets.xlsx"
sheet_name <- "MatchedData_EDU-HIGHSCHOOL"
# Read the data
data <- read.xlsx(file_path, sheet = sheet_name)
# --- Prepare input and output matrices ---
X <- as.matrix(data[, c("Q_SEED", "Q_MANURE", "M_DAYS_LABOUR", "PLOT_AREA")])
Y <- as.matrix(data[, "PLOT_PROD"])
if (any(X <= 0) | any(Y <= 0)) {
stop("DEA requires strictly positive inputs and outputs.")
}
set.seed(123)
# --- Input-oriented Robust DEA ---
dea_robust_io <- dea.robust(
X = X, Y = Y,
model = "input", # input-oriented, Use model = "output" for output orientation
RTS = "variable", # variable returns to scale (BCC), Use RTS = "constant" for CCR model
B = 500, # bootstrap replications
alpha = 0.05, # 95% CI
bw = "cv" # cross-validation bandwidth
)
data_io <- data
data_io$DEA_BiasCorrected_IO <- dea_robust_io$theta_hat_hat # bias-corrected efficiency scores
data_io$DEA_Bias_IO <- dea_robust_io$bias # bootstrap-estimated bias
data_io$DEA_CI_Low_IO <- dea_robust_io$theta_ci_low # lower CI bounds
data_io$DEA_CI_High_IO <- dea_robust_io$theta_ci_high # upper CI bounds
# --- Output-oriented Robust DEA ---
dea_robust_oo <- dea.robust(
X = X, Y = Y,
model = "output", # output-oriented, Use model = "input" for input orientation
RTS = "variable", # variable returns to scale (BCC), Use RTS = "constant" for CCR model
B = 500, # bootstrap replications
alpha = 0.05, # 95% CI
bw = "cv" # cross-validation bandwidth
)
data_oo <- data
data_oo$DEA_BiasCorrected_OO <- dea_robust_oo$theta_hat_hat # bias-corrected efficiency scores
data_oo$DEA_Bias_OO <- dea_robust_oo$bias # bootstrap-estimated bias
data_oo$DEA_CI_Low_OO <- dea_robust_oo$theta_ci_low # lower CI bounds
data_oo$DEA_CI_High_OO <- dea_robust_oo$theta_ci_high # upper CI bounds
# # --- Write both to Excel at your specified path ---
# output_file <- "D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/VRS_DEA_Robust_MatchedData_EDU-HIGHSCHOOL_Results.xlsx"
#
# wb <- createWorkbook()
# addWorksheet(wb, "Input_Oriented")
# writeData(wb, "Input_Oriented", data_io)
#
# addWorksheet(wb, "Output_Oriented")
# writeData(wb, "Output_Oriented", data_oo)
#
# saveWorkbook(wb, output_file, overwrite = TRUE)
#
# cat("Results exported to:\n", output_file, "\nwith two sheets: Input_Oriented and Output_Oriented\n")
Kolmogorov-Smirnov (K-S) and Kruskal-Wallis (K-W) tests
# Load libraries
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# File path and sheet name
file_path <- "D:/OneDrive - IIT Kanpur/PhD_new/Papers/gingerpaper/Nepal_2014_matcheddatasets.xlsx"
sheet_name <- "MatchedData_EDU-INTERSCHOOL"
# Read data
df <- read_excel(file_path, sheet = sheet_name)
# Efficiency variables
eff_vars <- c("CRS.DEA_Robust_OO.IS",
"VRS.DEA_Robust_OO.IS",
"Scale.Eff.Robust.OO.IS",
"CRS.DEA_Robust_IO.IS",
"VRS.DEA_Robust_IO.IS",
"CRS.DEA_Conv_OO.IS",
"VRS.DEA_Conv_OO.IS",
"Scale.Eff.Conv.OO.IS",
"CRS.DEA_Conv_IO.IS",
"VRS.DEA_Conv_IO.IS",
"PLOT_YIELD")
group_var <- "CONTRACT_FARMING_YN"
# Function to add significance stars
stars <- function(p) {
if (p <= 0.01) return("***")
else if (p <= 0.05) return("**")
else if (p <= 0.1) return("*")
else return("")
}
# Create results dataframe
results_list <- list()
for (var in eff_vars) {
ks_test <- ks.test(df[[var]][df[[group_var]] == 1],
df[[var]][df[[group_var]] == 0]) #Kolmogorov-Smirnov (K-S) test
kw_test <- kruskal.test(df[[var]] ~ as.factor(df[[group_var]]), data = df) # Kruskal-Wallis (K-W) test
results_list[[var]] <- data.frame(
Variable = var,
KS_Statistic = round(ks_test$statistic, 3),
KS_p_value = round(ks_test$p.value, 4),
KS_Sig = stars(ks_test$p.value),
KW_Chi_sq = round(kw_test$statistic, 3),
KW_p_value = round(kw_test$p.value, 4),
KW_Sig = stars(kw_test$p.value),
Summary = ifelse(kw_test$p.value <= 0.05,
"Significant difference",
"No significant difference")
)
}
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
## Warning in ks.test.default(df[[var]][df[[group_var]] == 1],
## df[[var]][df[[group_var]] == : p-value will be approximate in the presence of
## ties
results_df <- bind_rows(results_list)
print(results_df)
## Variable KS_Statistic KS_p_value KS_Sig KW_Chi_sq
## D...1 CRS.DEA_Robust_OO.IS 0.256 0.0001 *** 11.821
## D...2 VRS.DEA_Robust_OO.IS 0.248 0.0001 *** 12.421
## D...3 Scale.Eff.Robust.OO.IS 0.090 0.5457 0.003
## D...4 CRS.DEA_Robust_IO.IS 0.256 0.0001 *** 11.634
## D...5 VRS.DEA_Robust_IO.IS 0.146 0.0730 * 2.387
## D...6 CRS.DEA_Conv_OO.IS 0.210 0.0020 *** 12.718
## D...7 VRS.DEA_Conv_OO.IS 0.230 0.0005 *** 7.797
## D...8 Scale.Eff.Conv.OO.IS 0.077 0.7423 0.000
## D...9 CRS.DEA_Conv_IO.IS 0.210 0.0020 *** 12.559
## D...10 VRS.DEA_Conv_IO.IS 0.108 0.3226 1.439
## D...11 PLOT_YIELD 0.251 0.0001 *** 10.963
## KW_p_value KW_Sig Summary
## D...1 0.0006 *** Significant difference
## D...2 0.0004 *** Significant difference
## D...3 0.9561 No significant difference
## D...4 0.0006 *** Significant difference
## D...5 0.1224 No significant difference
## D...6 0.0004 *** Significant difference
## D...7 0.0052 *** Significant difference
## D...8 0.9851 No significant difference
## D...9 0.0004 *** Significant difference
## D...10 0.2303 No significant difference
## D...11 0.0009 *** Significant difference