library(ggplot2)

df <- read.csv('https://raw.githubusercontent.com/RWorkshop/Statistics-With-R-Workshop/master/data/socrHW.csv')
# Calculate Mahalanobis Distance with height and weight distributions
m_dist <- mahalanobis(df[, 1:2], colMeans(df[, 1:2]), cov(df[, 1:2]))
df$m_dist <- round(m_dist, 2)

# Mahalanobis Outliers - Threshold set to 12
df$outlier_maha <- "No"
df$outlier_maha[df$m_dist > 12] <- "Yes"
head(df)
##   Index   Height   Weight m_dist outlier_maha
## 1     1 65.78331 112.9925   4.32           No
## 2     2 71.51521 136.4873   6.46           No
## 3     3 69.39874 153.0269   3.56           No
## 4     4 68.21660 142.3354   3.01           No
## 5     5 67.78781 144.2971   3.01           No
## 6     6 68.69784 123.3024   3.14           No
# Scatterplot with Maha Outliers
p<- ggplot(df, aes(x = Weight, y = Height, color = outlier_maha)) +
      geom_point(size = 5, alpha = 0.6) +
      labs(title = "Weight vs Height",
           subtitle = "Outlier Detection in weight vs height data - Using Mahalanobis Distances",
           caption = "Source: http://wiki.stat.ucla.edu/socr/index.php/SOCR_Data_Dinov_020108_HeightsWeights") +
      ylab("Height in cm") + xlab("Weight in kg") +
      scale_y_continuous(breaks = seq(160, 200, 5)) +
      scale_x_continuous(breaks = seq(35, 80, 5))

p