Mean vs. Median

rm(list=ls())

# Number of samples
M <- 10000
# Sample size
ns <- 100

##############################################################################
# Data generated from a N(20,1)
##############################################################################
sim.mean <- sim.median <- vector()

for(i in 1:M){
  set.seed(i)
  data0 <- rnorm(ns, 20, 1)
  sim.mean[i] <- mean(data0)
  sim.median[i] <- median(data0)
}

# Histogram of the Means
hist(sim.mean, probability = T, xlab = "Mean", ylab = "Density", main = "Simulated Mean", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Histogram of the Medians
hist(sim.median, probability = T, xlab = "Median", ylab = "Density", main = "Simulated Median", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Smooth density comparison
plot(density(sim.mean), col="black", ylim = c(0,4), lwd = 2, 
     xlab = "Estimates", ylab = "Density", main = "Mean vs. Median",
     cex.axis = 1.5, cex.lab = 1.5)
points(density(sim.median), col="blue", type = "l", lwd = 2)
abline(v = 20, lwd = 2, col = "red")
legend(20.2, 4, legend=c("Mean", "Median"),
       col=c("black", "blue"), lty=1, cex=1, lwd = 2)

# Boxplot comparison
boxplot(sim.mean, sim.median, names = c("Mean","Median"), col=c('lightgray','blue'),
        xlab = "Estimates", ylab = "Density", main = "Mean vs. Median")
abline(h = 20, lwd = 2, col = "red")
legend(1.3, 20.4, legend=c("Mean", "Median"),
       col=c('lightgray','blue'), lty=1, cex=1, lwd = 2)        

##############################################################################
# Data generated from a normal distribution N(20,9)
##############################################################################
sim.mean<- sim.median <- vector()

for(i in 1:M){
  set.seed(i)
  data0 <- rnorm(ns, 20, 3)
  sim.mean[i] <- mean(data0)
  sim.median[i] <- median(data0)
}

# Histogram of the Means
hist(sim.mean, probability = T, xlab = "Mean", ylab = "Density", main = "Simulated Mean", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Histogram of the Medians
hist(sim.median, probability = T, xlab = "Median", ylab = "Density", main = "Simulated Median", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Smooth density comparison
plot(density(sim.mean), col="black", ylim = c(0,1.5), lwd = 2, 
     xlab = "Estimates", ylab = "Density", main = "Mean vs. Median",
     cex.axis = 1.5, cex.lab = 1.5)
points(density(sim.median), col="blue", type = "l", lwd = 2)
abline(v = 20, lwd = 2, col = "red")
legend(20.2, 4, legend=c("Mean", "Median"),
       col=c("black", "blue"), lty=1, cex=1, lwd = 2)

# Boxplot comparison
boxplot(sim.mean, sim.median, names = c("Mean","Median"), col=c('lightgray','blue'),
        xlab = "Estimates", ylab = "Density", main = "Mean vs. Median")
abline(h = 20, lwd = 2, col = "red")
legend(1.3, 21, legend=c("Mean", "Median"),
       col=c('lightgray','blue'), lty=1, cex=1, lwd = 2)    

#####################################################################################################
# Data simulated from a contaminated normal data
# 95% simulated from a N(20,1) and 5% simulated from a t distribution with 2 df and location 20
#####################################################################################################

sim.mean <- sim.median <- vector()

for(i in 1:M){
  set.seed(i)
  data0 <- c(rnorm(ns*0.95, 20, 1), 20+rt(ns*0.05, df = 2) )
  sim.mean[i] <- mean(data0)
  sim.median[i] <- median(data0)
}

# Histogram of the Means
hist(sim.mean, probability = T, xlab = "Mean", ylab = "Density", main = "Simulated Mean", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Proportion of estimates smaller than 19.75 and greater than 20.25
mean(sim.mean > 20.25) + mean(sim.mean < 19.75)
## [1] 0.0331
# Histogram of the Medians
hist(sim.median, probability = T, xlab = "Median", ylab = "Density", main = "Simulated Median", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Smooth density comparison
plot(density(sim.mean), col="black", ylim = c(0,4), lwd = 2, 
     xlab = "Estimates", ylab = "Density", main = "Mean vs. Median",
     cex.axis = 1.5, cex.lab = 1.5)
points(density(sim.median), col="blue", type = "l", lwd = 2)
abline(v = 20, lwd = 2, col = "red")
legend(20.2, 4, legend=c("Mean", "Median"),
       col=c("black", "blue"), lty=1, cex=1, lwd = 2)

# Boxplot comparison
boxplot(sim.mean, sim.median, names = c("Mean","Median"), col=c('lightgray','blue'),
        xlab = "Estimates", ylab = "Density", main = "Mean vs. Median")
abline(h = 20, lwd = 2, col = "red")
legend(1.3, 24, legend=c("Mean", "Median"),
       col=c('lightgray','blue'), lty=1, cex=1, lwd = 2) 

###################################################################################################
# Data simulated from a contaminated normal data
# 90% simulated from a N(20,1) and 10% simulated from a t distribution with 2 df
###################################################################################################


sim.mean <- sim.median <- vector()

for(i in 1:M){
  set.seed(i)
  data0 <- c(rnorm(ns*0.90, 20, 1), 20+rt(ns*0.10, df = 2) )
  sim.mean[i] <- mean(data0)
  sim.median[i] <- median(data0)
}

# Histogram of the Means
hist(sim.mean, probability = T, xlab = "Mean", ylab = "Density", main = "Simulated Mean", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Histogram of the Medians
hist(sim.median, probability = T, xlab = "Median", ylab = "Density", main = "Simulated Median", breaks = 50, 
     cex.axis = 1.5, cex.lab = 1.5)
box()
abline(v = 20, lwd = 2, col = "red")

# Smooth density comparison
plot(density(sim.mean), col="black", ylim = c(0,4), lwd = 2, 
     xlab = "Estimates", ylab = "Density", main = "Mean vs. Median",
     cex.axis = 1.5, cex.lab = 1.5)
points(density(sim.median), col="blue", type = "l", lwd = 2)
abline(v = 20, lwd = 2, col = "red")
legend(20.2, 4, legend=c("Mean", "Median"),
       col=c("black", "blue"), lty=1, cex=1, lwd = 2)

# Boxplot comparison
boxplot(sim.mean, sim.median, names = c("Mean","Median"), col=c('lightgray','blue'),
        xlab = "Estimates", ylab = "Density", main = "Mean vs. Median")
abline(h = 20, lwd = 2, col = "red")
legend(1.3, 25, legend=c("Mean", "Median"),
       col=c('lightgray','blue'), lty=1, cex=1, lwd = 2)