Normality test against skew-normality: BMI data.

We illustrate the use of the proposed priors (Rubio 2026) for testing normality using the Australian athletes data set (sn R package). We analyse the Body Mass Index (BMI \(=\) weight (kg) / height (m)\(^2\)) of \(n = 100\) Australian female athletes using the Jeffreys prior (Liseo and Loperfido 2006) (Rubio and Liseo 2014) (approximated with a Student-t distribution with \(1/2\) degrees of freedom), the MOOMIN prior (Rubio 2026) (approximated with a moment Student-t distribution), and the DIMOM prior (Rubio 2026).

Application

Data preparation (full data)

rm(list=ls())

# Required packages
library(sn)
## Loading required package: stats4
## 
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
## 
##     sd
library(numDeriv)
library(ggplot2)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
# Routines
source("~/Documents/GitHub/OMOM/R/routines/routines_sn.R")
#source("C:/Users/Javier/Documents/GitHub/OMOM/R/routines/routines_sn.R")


#  Data: BMI Australian athletes
data(ais)
data <- ais$BMI[ais$sex == "female"]
length(data)
## [1] 100
# histogram
hist(data, breaks = 15, xlab = "BMI", ylab = "Density", probability = TRUE,
     cex.lab = 1.5, cex.axis = 1.5, main = "Full data") 
box()

# Boxplot
boxplot(data, ylab = "BMI",
     cex.lab = 1.5, cex.axis = 1.5, main = "Full data")

# Q-Q plot 
qqnorm(data)
qqline(data, col = "red", lwd = 2)

Priors

curve(tprior_app_min, -20, 20, lwd = 2, n = 500, xlab = expression(lambda), ylab = "Density",
      cex.lab = 1.5, cex.axis = 1.5, ylim = c(0,0.2))
curve(d_mom, -20, 20, add = TRUE, col="red", lwd = 2, n = 500, lty = 2)
curve(da_jeff,-20,20, add=TRUE, col="blue", lwd = 2, n = 500, lty = 3)
legend( "topright", legend = c("MOOMIN", "DIMOM", "Jeffreys"), lwd = 2, col = c("black","red","blue"), lty = c(1,2,3))

Normality tests (full data)

# Normality tests
NormTest <- snNormalityTest(data, method = "ILA")

# Posterior probability of sn model based on Jeffreys prior
probsn_J <- NormTest$probsn_J
print(probsn_J)
## [1] 0.5226577
# Posterior probability of sn model based on DIMOM prior
probsn_MOM <- NormTest$probsn_MOM
print(probsn_MOM)
## [1] 0.7276699
# Posterior probability of sn model based on MOOMIN prior
probsn_MOOMIN <- NormTest$probsn_MOOMIN
print(probsn_MOOMIN)
## [1] 0.5168648
# BIC sn model
BICSN <- NormTest$BICSN
print(BICSN)
## [1] 484.8234
# BIC normal model
BICN <- NormTest$BICN
print(BICN)
## [1] 486.1509
# p-value
psw <- NormTest$pval
print(psw)
## [1] 0.03453873
# Estimators

EST <- rbind(c(NormTest$MLEN,NA), NormTest$MLESN,
             c(NormTest$MAPN,NA),
             NormTest$MAPSN_J, 
             NormTest$MAPSN_MOM, NormTest$MAPSN_MOOMIN)

rownames(EST) <- c("MLE_N", "MLE_SN",
                  "MAP_N", 
                  "MAPSN_J", 
                  "MAPSN_DIMOM", "MAPSN_MOOMIN")

kable(EST, digits = 3, format = "html") %>%
  kable_styling(full_width = FALSE) %>%
  column_spec(1:ncol(EST), extra_css = "padding-left: 15px; padding-right: 15px;")
mu sigma lambda
MLE_N 21.989 0.966 NA
MLE_SN 19.228 1.338 2.235
MAP_N 21.989 0.966 NA
MAPSN_J 19.403 1.299 1.920
MAPSN_DIMOM 19.204 1.343 2.283
MAPSN_MOOMIN 19.178 1.349 2.337

Data preparation (removing outlier)

##################################################################################################
# Robust outlier detection
# Location estimated with the median
# Scale estimated with the normalised mean absolute deviation
# https://rpubs.com/FJRubio/Outlier
##################################################################################################


# Normalised Median Absolute Deviation
b <- 1/qnorm(0.75) # Normalisation constant
NMAD <- function(data)  b*median( abs(data - median(data)))

# Number of NMADS from the Median
a <- 3

# NMAD and Median
NMAD.data <- NMAD(data)
med <- median(data)

c(med,NMAD.data)
## [1] 21.815000  2.364751
# Robust outlier detection
Rob.Out.detect <- Vectorize(function(x){
  val <- abs((x - med)/NMAD.data)
  return(ifelse(val>a, TRUE, FALSE))
}  )

# Identifying outliers in the data with the robust method
which(Rob.Out.detect(data))
## [1] 75
# Removing outlier
datao <- data[!Rob.Out.detect(data)]

length(datao)
## [1] 99
# Histogram
hist(datao, breaks = 15, xlab = "BMI", ylab = "Density", probability = TRUE,
     cex.lab = 1.5, cex.axis = 1.5, main = "Removing outlier") 
box()

# Boxplot
boxplot(datao, ylab = "BMI",
     cex.lab = 1.5, cex.axis = 1.5, main = "Removing outlier")

# Q-Q plot 
qqnorm(datao)
qqline(datao, col = "red", lwd = 2)

Normality tests (after removing outlier)

# Normality tests
NormTestO <- snNormalityTest(datao, method = "ILA")

# Posterior probability of sn model based on Jeffreys prior
Oprobsn_J <- NormTestO$probsn_J
print(Oprobsn_J)
## [1] 0.2305043
# Posterior probability of sn model based on DIMOM prior
Oprobsn_MOM <- NormTestO$probsn_MOM
print(Oprobsn_MOM)
## [1] 0.2502103
# Posterior probability of sn model based on MOOMIN prior
Oprobsn_MOOMIN <- NormTestO$probsn_MOOMIN
print(Oprobsn_MOOMIN)
## [1] 0.1052804
# BIC sn model
BICSNO <- NormTestO$BICSN
print(BICSNO)
## [1] 469.6179
# BIC normal model
BICNO <- NormTestO$BICN
print(BICNO)
## [1] 466.8868
# p-value
pswo <- NormTestO$pval
print(pswo)
## [1] 0.6071626
# Estimators

ESTO <- rbind(c(NormTestO$MLEN,NA), NormTestO$MLESN,
             c(NormTestO$MAPN,NA),
             NormTestO$MAPSN_J, 
             NormTestO$MAPSN_MOM, NormTestO$MAPSN_MOOMIN)

rownames(ESTO) <- c("MLE_N", "MLE_SN",
                  "MAP_N", 
                  "MAPSN_J",
                  "MAPSN_DIMOM",  "MAPSN_MOOMIN")


kable(ESTO, digits = 3, format = "html") %>%
  kable_styling(full_width = FALSE) %>%
  column_spec(1:ncol(EST), extra_css = "padding-left: 15px; padding-right: 15px;")
mu sigma lambda
MLE_N 21.889 0.893 NA
MLE_SN 19.579 1.212 1.687
MAP_N 21.889 0.893 NA
MAPSN_J 19.929 1.137 1.272
MAPSN_DIMOM 19.423 1.248 1.921
MAPSN_MOOMIN 19.393 1.255 1.972

References

Liseo, B., and N. Loperfido. 2006. “A Note on Reference Priors for the Scalar Skew-Normal Distribution.” Journal of Statistical Planning and Inference 136 (2): 373–89.
Rubio, F. J. 2026. “An Objective Non-Local Prior for Skew-Symmetric Models.” Preprint NA (NA): NA–.
Rubio, F. J., and B. Liseo. 2014. “On the Independence Jeffreys Prior for Skew-Symmetric Models.” Statistics & Probability Letters 85: 91–97.