November 11, 2020

Outlier detection and limits

How the data was generated

library(plotly)
library(dplyr)

set.seed(123)
x <- rnorm(100, 5, 2)
y <- rnorm(100, 6, 3)

outliers_x <- x[x < qnorm(.025, 5, 2) | x > qnorm(.975, 5, 2)]
outliers_y <- y[y < qnorm(.025, 6, 3) | y > qnorm(.975, 6, 3)]

ds <- data.frame(x = x,
                 y = y)

ds <- ds %>% 
    mutate(outlier = if_else(x %in% outliers_x | y %in% outliers_y,
                             'Outlier', 'Regular'))

How the plot was generated

fig <- plot_ly(ds, x = ~x, y = ~y, mode = 'markers', color = ~outlier)

fig <- layout(fig,
              shapes =
                  list(type = 'rect',
                       xref = 'x',
                       x0 = qnorm(.025, 5, 2),
                       x1 = qnorm(.975, 5, 2),
                       yref = 'y',
                       y0 = qnorm(.025, 6, 3),
                       y1 = qnorm(.975, 6, 3),
                       fillcolor = 'gray',
                       opacity = .2))

fig

Thank you!