remotes::install_github('tingtingzhan/fmx')Finite Mixture Distribution, an S4 framework
Introduction
This vignette of package fmx (CRAN, Github) documents ..
Prerequisite
New features are first implemented on Github.
And eventually make their way to CRAN.
utils::install.packages('fmx')Note to Users
Examples in this vignette require that the search path has
library(fmx)
library(TukeyGH77)
library(sn)
#> Loading required package: stats4
#>
#> Attaching package: 'sn'
#> The following object is masked from 'package:stats':
#>
#> sd
library(ggplot2)
#library(geomtextpath)S4 Class 'fmx'
Definition
The S4 class stores the parameters of a mixture of distribution. Currently the normal distribution and Tukey’s GH distribution is supported.
Example below shows a mixture of two normal distributions N(0,1) and N(3,1.3) with (50\%, 50\%) mixing probability (i.e., the mixing probability w is normalize to so that the summation of w is 1).
(e1 = fmx('norm', mean = c(0,3), sd = c(1, .7), w = c(1, 1)))
#> Parameters
#> norm mean sd w
#> 1-comp. 0.00 1.00 50.0%
#> 2-comp. 3.00 0.70 50.0%class(e1)
#> [1] "fmx"
#> attr(,"package")
#> [1] "fmx"isS4(e1)
#> [1] TRUEExample below shows a mixture of two Tukey’s g-and-h distributions (A,B,g,h)=(0,1,.2,.2) and (3,1,.3,.1) (i.e., by default the location parameter A=0 and the scale parameter B=1) with (20\%,30\%) mixing probability.
(e2 = fmx('GH', A = c(0,3), g = c(.2, .3), h = c(.2, .1), w = c(2, 3)))
#> Parameters
#> GH A B g h w
#> 1-comp. 0.00 1.00 0.20 0.20 40.0%
#> 2-comp. 3.00 1.00 0.30 0.10 60.0%The fmx framework also accepts a single-component distribution.
(e3 = fmx('GH', g = .2, h = .2))
#> Parameters
#> GH A B g h
#> 1-comp. 0.00 1.00 0.20 0.20Density
Function dfmx() …
par(mar = c(4, 5, 1, 0))
curve(dfmx(x, dist = e1), xlim = c(-3,7), ylab = 'Density')ggplot() +
geom_function(fun = dfmx, args = list(dist = e1), xlim = c(-3,7))Probability
par(mar = c(4, 5, 1, 0))
curve(pfmx(x, dist = e1), xlim = c(-3,7), ylab = 'Probability')ggplot() +
geom_function(fun = pfmx, args = list(dist = e1), xlim = c(-3,7)) +
scale_y_continuous(labels = scales::label_percent())Quantile
par(mar = c(4, 5, 1, 0))
curve(qfmx(x, dist = e1), xlim = c(.001,.999), ylab = 'Quantile')ggplot() +
geom_function(fun = qfmx, args = list(dist = e1), n = 501L, xlim = c(.001, .999)) +
scale_x_continuous(labels = scales::label_percent())Simulation
par(mar = c(4, 5, 2, 0))
r1 = e1 |> rfmx(n = 1e3L)
r1 |> hist(main = '1000 obs from e1')new(Class = 'fmx', e1, data = r1)
#> Parameters
#> norm mean sd w
#> 1-comp. 0.00 1.00 50.0%
#> 2-comp. 3.00 0.70 50.0%
#>
#> Number of Observations: 1000
#>
#> 'log Lik.' -1841.197 (df=5)
#>
#> Kolmogorov-Smirnov Distance: 0.03080
#> Cramer-Von Mises Distance: 0.12372
#> Kullback-Leibler divergence: 0.00640Extract Subset
(d = fmx('norm', mean = c(1, 4, 7), w = c(1, 1, 1)))
#> Parameters
#> norm mean sd w
#> 1-comp. 1.00 1.00 33.3%
#> 2-comp. 4.00 1.00 33.3%
#> 3-comp. 7.00 1.00 33.3%d[1:2]
#> Parameters
#> norm mean sd w
#> 1-comp. 1.00 1.00 50.0%
#> 2-comp. 4.00 1.00 50.0%Multiple 'fmx' Objects
ggplot() +
geom_function(fun = qfmx, args = list(dist=e1), mapping = aes(color='Normal')) +
geom_function(fun = qfmx, args = list(dist=e2), mapping = aes(color='Tukey gh')) +
scale_x_continuous(labels = scales::label_percent(), limits = c(.001, .999)) +
labs(x = NULL, y = 'Quantile', color = 'Mixture\nModels')Tukey’s g-&-h Mixture
Consider three Tukey’s g-&-h mixture distributions.
(d0 = fmx('GH', A = c(1,4), g = c(.2,.1), h = c(.05,.1), w = c(1,1)))
#> Parameters
#> GH A B g h w
#> 1-comp. 1.00 1.00 0.20 0.05 50.0%
#> 2-comp. 4.00 1.00 0.10 0.10 50.0%
(d1 = fmx('GH', A = c(1,4), g = c(.2,0), h = c(0,.1), w = c(1,1)))
#> Parameters
#> GH A B g h w
#> 1-comp. 1.00 1.00 0.20 . 50.0%
#> 2-comp. 4.00 1.00 . 0.10 50.0%
(d2 = fmx('GH', A = c(1,4), g = c(.2,0), h = c(.15,.1), w = c(1,1)))
#> Parameters
#> GH A B g h w
#> 1-comp. 1.00 1.00 0.20 0.15 50.0%
#> 2-comp. 4.00 1.00 . 0.10 50.0%gh-Constraints
d0 |> fmx_constraint()
#> integer(0)d1 |> fmx_constraint()
#> [1] 6 7
#> attr(,"user")
#> [1] "g2" "h1"
#> attr(,"tex")
#> [1] "g_{2}" "h_{1}"
#> attr(,"gid")
#> [1] 2
#> attr(,"hid")
#> [1] 1d2 |> fmx_constraint()
#> [1] 6
#> attr(,"user")
#> [1] "g2"
#> attr(,"tex")
#> [1] "g_{2}"
#> attr(,"gid")
#> [1] 2
#> attr(,"hid")
#> integer(0)getTeX(d0)
#> [1] "Full GH2"getTeX(d1)
#> [1] "$g_{2}=h_{1}=0$"getTeX(d2)
#> [1] "$g_{2}=0$"Moment per Component
From 'fmx' to moments
Function moment_fmx() …
d2 |> moment_fmx()
#> Mean: 1.129, 4.000
#> Standard Deviation: 1.367, 1.182
#> Skewness: 1.601, 0.000
#> (Excess) Kurtosis: 17.736, 2.508From moments to 'fmx'
Function moment2fmx() …
m = c(-1.5, 1.5)
s = c(.9, 1.1)
sk = c(.2, -.3)
kt = c(.5, .75)
w = c(2, 3)(m1 = moment2fmx(distname = 'GH', w = w, mean = m, sd = s, skewness = sk, kurtosis = kt))
#> Parameters
#> GH A B g h w
#> 1-comp. -1.53 0.86 0.06 0.03 40.0%
#> 2-comp. 1.55 1.03 -0.08 0.04 60.0%moment_fmx(m1)
#> Mean: -1.500, 1.500
#> Standard Deviation: 0.900, 1.100
#> Skewness: 0.200, -0.300
#> (Excess) Kurtosis: 0.500, 0.750(m2 = moment2fmx(distname = 'st', w = w, mean = m, sd = s, skewness = sk, kurtosis = kt))
#> Parameters
#> st xi omega alpha nu w
#> 1-comp. -2.01 0.98 0.81 18.12 40.0%
#> 2-comp. 2.20 1.21 -0.95 14.04 60.0%moment_fmx(m2)
#> Distribution: st
#> Mean: -1.500, 1.500
#> Standard Deviation: 0.900, 1.100
#> Skewness: 0.200, -0.300
#> (Excess) Kurtosis: 0.500, 0.750ggplot() +
geom_function(aes(color = 'GH'), fun = dfmx, args = list(dist=m1), n = 1001L) +
geom_function(aes(color = 'st'), fun = dfmx, args = list(dist=m2), n = 1001L) +
xlim(-5, 6)Two curves looks really close, but actually not identical.
x = seq.int(from = -5L, to = 6L, length.out = 1001L)
all.equal.numeric(dfmx(x, dist = m1), dfmx(x, dist = m2))
#> [1] "Attributes: < Component \"posterior\": Mean relative difference: 0.002626751 >"
#> [2] "Mean relative difference: 0.00224298"Appendix
Terms and Abbreviations
| Term / Abbreviation | Description |
|---|---|
|> |
Forward pipe operator introduced in R 4.1.0 |
CRAN, R |
The Comprehensive R Archive Network |
curve |
Function plots |
fitdist |
Mixture of distribution from package fitdistrplus |