Application
Data preparation (full data)
rm(list=ls())
# Required packages
library(sn)
## Loading required package: stats4
##
## Attaching package: 'sn'
## The following object is masked from 'package:stats':
##
## sd
library(numDeriv)
library(ggplot2)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Routines
source("~/Documents/GitHub/OMOM/R/routines/routines_sn.R")
#source("C:/Users/Javier/Documents/GitHub/OMOM/R/routines/routines_sn.R")
# Data: BMI Australian athletes
data(ais)
data <- ais$BMI[ais$sex == "female"]
length(data)
## [1] 100
# histogram
hist(data, breaks = 15, xlab = "BMI", ylab = "Density", probability = TRUE,
cex.lab = 1.5, cex.axis = 1.5, main = "Full data")
box()

# Boxplot
boxplot(data, ylab = "BMI",
cex.lab = 1.5, cex.axis = 1.5, main = "Full data")

# Q-Q plot
qqnorm(data)
qqline(data, col = "red", lwd = 2)

Priors
curve(tprior_app_min, -20, 20, lwd = 2, n = 500, xlab = expression(lambda), ylab = "Density",
cex.lab = 1.5, cex.axis = 1.5, ylim = c(0,0.2))
curve(d_mom, -20, 20, add = TRUE, col="red", lwd = 2, n = 500, lty = 2)
curve(da_jeff,-20,20, add=TRUE, col="blue", lwd = 2, n = 500, lty = 3)
legend( "topright", legend = c("MOOMIN", "DIMOM", "Jeffreys"), lwd = 2, col = c("black","red","blue"), lty = c(1,2,3))

Normality tests (full data)
# Normality tests
NormTest <- snNormalityTest(data, method = "ILA")
# Posterior probability of sn model based on Jeffreys prior
probsn_J <- NormTest$probsn_J
print(probsn_J)
## [1] 0.5226577
# Posterior probability of sn model based on DIMOM prior
probsn_MOM <- NormTest$probsn_MOM
print(probsn_MOM)
## [1] 0.7276699
# Posterior probability of sn model based on MOOMIN prior
probsn_MOOMIN <- NormTest$probsn_MOOMIN
print(probsn_MOOMIN)
## [1] 0.5168648
# BIC sn model
BICSN <- NormTest$BICSN
print(BICSN)
## [1] 484.8234
# BIC normal model
BICN <- NormTest$BICN
print(BICN)
## [1] 486.1509
# p-value
psw <- NormTest$pval
print(psw)
## [1] 0.03453873
# Estimators
EST <- rbind(c(NormTest$MLEN,NA), NormTest$MLESN,
c(NormTest$MAPN,NA),
NormTest$MAPSN_J,
NormTest$MAPSN_MOM, NormTest$MAPSN_MOOMIN)
rownames(EST) <- c("MLE_N", "MLE_SN",
"MAP_N",
"MAPSN_J",
"MAPSN_DIMOM", "MAPSN_MOOMIN")
kable(EST, digits = 3, format = "html") %>%
kable_styling(full_width = FALSE) %>%
column_spec(1:ncol(EST), extra_css = "padding-left: 15px; padding-right: 15px;")
|
|
mu
|
sigma
|
lambda
|
|
MLE_N
|
21.989
|
0.966
|
NA
|
|
MLE_SN
|
19.228
|
1.338
|
2.235
|
|
MAP_N
|
21.989
|
0.966
|
NA
|
|
MAPSN_J
|
19.403
|
1.299
|
1.920
|
|
MAPSN_DIMOM
|
19.204
|
1.343
|
2.283
|
|
MAPSN_MOOMIN
|
19.178
|
1.349
|
2.337
|
Data preparation (removing outlier)
##################################################################################################
# Robust outlier detection
# Location estimated with the median
# Scale estimated with the normalised mean absolute deviation
# https://rpubs.com/FJRubio/Outlier
##################################################################################################
# Normalised Median Absolute Deviation
b <- 1/qnorm(0.75) # Normalisation constant
NMAD <- function(data) b*median( abs(data - median(data)))
# Number of NMADS from the Median
a <- 3
# NMAD and Median
NMAD.data <- NMAD(data)
med <- median(data)
c(med,NMAD.data)
## [1] 21.815000 2.364751
# Robust outlier detection
Rob.Out.detect <- Vectorize(function(x){
val <- abs((x - med)/NMAD.data)
return(ifelse(val>a, TRUE, FALSE))
} )
# Identifying outliers in the data with the robust method
which(Rob.Out.detect(data))
## [1] 75
# Removing outlier
datao <- data[!Rob.Out.detect(data)]
length(datao)
## [1] 99
# Histogram
hist(datao, breaks = 15, xlab = "BMI", ylab = "Density", probability = TRUE,
cex.lab = 1.5, cex.axis = 1.5, main = "Removing outlier")
box()

# Boxplot
boxplot(datao, ylab = "BMI",
cex.lab = 1.5, cex.axis = 1.5, main = "Removing outlier")

# Q-Q plot
qqnorm(datao)
qqline(datao, col = "red", lwd = 2)

Normality tests (after removing outlier)
# Normality tests
NormTestO <- snNormalityTest(datao, method = "ILA")
# Posterior probability of sn model based on Jeffreys prior
Oprobsn_J <- NormTestO$probsn_J
print(Oprobsn_J)
## [1] 0.2305043
# Posterior probability of sn model based on DIMOM prior
Oprobsn_MOM <- NormTestO$probsn_MOM
print(Oprobsn_MOM)
## [1] 0.2502103
# Posterior probability of sn model based on MOOMIN prior
Oprobsn_MOOMIN <- NormTestO$probsn_MOOMIN
print(Oprobsn_MOOMIN)
## [1] 0.1052804
# BIC sn model
BICSNO <- NormTestO$BICSN
print(BICSNO)
## [1] 469.6179
# BIC normal model
BICNO <- NormTestO$BICN
print(BICNO)
## [1] 466.8868
# p-value
pswo <- NormTestO$pval
print(pswo)
## [1] 0.6071626
# Estimators
ESTO <- rbind(c(NormTestO$MLEN,NA), NormTestO$MLESN,
c(NormTestO$MAPN,NA),
NormTestO$MAPSN_J,
NormTestO$MAPSN_MOM, NormTestO$MAPSN_MOOMIN)
rownames(ESTO) <- c("MLE_N", "MLE_SN",
"MAP_N",
"MAPSN_J",
"MAPSN_DIMOM", "MAPSN_MOOMIN")
kable(ESTO, digits = 3, format = "html") %>%
kable_styling(full_width = FALSE) %>%
column_spec(1:ncol(EST), extra_css = "padding-left: 15px; padding-right: 15px;")
|
|
mu
|
sigma
|
lambda
|
|
MLE_N
|
21.889
|
0.893
|
NA
|
|
MLE_SN
|
19.579
|
1.212
|
1.687
|
|
MAP_N
|
21.889
|
0.893
|
NA
|
|
MAPSN_J
|
19.929
|
1.137
|
1.272
|
|
MAPSN_DIMOM
|
19.423
|
1.248
|
1.921
|
|
MAPSN_MOOMIN
|
19.393
|
1.255
|
1.972
|