# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(car)
## Carregando pacotes exigidos: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(multcomp)
## Warning: package 'multcomp' was built under R version 4.3.2
## Carregando pacotes exigidos: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.3.2
## Carregando pacotes exigidos: survival
## Carregando pacotes exigidos: TH.data
## Warning: package 'TH.data' was built under R version 4.3.2
## Carregando pacotes exigidos: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'TH.data'
##
## The following object is masked from 'package:MASS':
##
## geyser
# Load your data
library(readr)
Taxi <- read_csv("c:/users/dell/downloads/Taxi.csv")
## Rows: 348 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): origin, income, rushhour, dgender, overcharge
## dbl (3): triple, dage, overtreatment
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Extract overtreatment indices for 'high' and 'low' income levels
overtreatment_high <- as.numeric(Taxi$overtreatment[Taxi$income == "high"])
overtreatment_low <- as.numeric(Taxi$overtreatment[Taxi$income == "low"])
# Function for robust permutation test based on medians
robust_permutation_test_median <- function(x, y, n_permutations = 1000) {
observed_difference <- median(x) - median(y)
combined_data <- c(x, y)
# Initialize an empty vector to store permuted differences
permuted_differences <- numeric(n_permutations)
# Perform permutations
set.seed(123) # Set seed for reproducibility
for (i in 1:n_permutations) {
permuted_data <- sample(combined_data)
permuted_x <- permuted_data[1:length(x)]
permuted_y <- permuted_data[(length(x) + 1):length(combined_data)]
permuted_differences[i] <- median(permuted_x) - median(permuted_y)
}
# Calculate p-value
p_value <- mean(abs(permuted_differences) >= abs(observed_difference))
# Return results
result <- list(
observed_difference = observed_difference,
permuted_differences = permuted_differences,
p_value = p_value
)
return(result)
}
robust_permutation_test_upper_quartile <- function(x, y, n_permutations = 1000) {
observed_difference <- quantile(x, 0.75, na.rm = TRUE) - quantile(y, 0.75, na.rm = TRUE)
combined_data <- c(x, y)
# Initialize an empty vector to store permuted differences
permuted_differences <- numeric(n_permutations)
# Perform permutations
set.seed(123) # Set seed for reproducibility
for (i in 1:n_permutations) {
permuted_data <- sample(combined_data)
permuted_x <- permuted_data[1:length(x)]
permuted_y <- permuted_data[(length(x) + 1):length(combined_data)]
permuted_differences[i] <- quantile(permuted_x, 0.75, na.rm = TRUE) - quantile(permuted_y, 0.75, na.rm = TRUE)
}
# Calculate p-value
p_value <- mean(abs(permuted_differences) >= abs(observed_difference))
# Return results
result <- list(
observed_difference = observed_difference,
permuted_differences = permuted_differences,
p_value = p_value
)
return(result)
}
result_median <- robust_permutation_test_median(overtreatment_high, overtreatment_low)
result_upper_quartile <- robust_permutation_test_upper_quartile(overtreatment_high, overtreatment_low)
# Interpret the results
cat("Robust Permutation Test Based on Medians:\n")
## Robust Permutation Test Based on Medians:
cat("Observed Difference:", result_median$observed_difference, "\n")
## Observed Difference: 0.0099897
cat("P-value:", result_median$p_value, "\n\n")
## P-value: 0.081
cat("Robust Permutation Test Based on Upper Quartile:\n")
## Robust Permutation Test Based on Upper Quartile:
cat("Observed Difference:", result_upper_quartile$observed_difference, "\n")
## Observed Difference: 0.04290212
cat("P-value:", result_upper_quartile$p_value, "\n")
## P-value: 0.033
These findings offer insights into potential variations in the positioning of the overtreatment index across diverse income levels. The upper quartile test implies a more pronounced difference than the median test.