The categorical distribution is a discrete distribution that determines the probability of observing one of K possible outcomes. If K = 2, it may equal to Berunoulli distribution.
We will concern about the world having four possible parameters (K = 4), where 0 =< λ k <= 1 ,sum(λ k) = 1 (k = 1, 2, …, K).
set.seed(126)
n<-1000
lambda<-c(0.1,0.2,0.4,0.3)
k<-length(lambda)
catk<-rmultinom(n,1 ,prob=lambda)
row.names(catk)<-as.character(1:k)
barplot(apply(catk,1,sum)/n,col=3,main="Categolical distribution n=1000")
#
cat30<-rmultinom(30,1,prob=lambda)
row.names(cat30)<-as.character(1:k)
barplot(apply(cat30,1,sum)/30,col=4,main="Categolical distribution n=30")
Its probability for data xki = 1 in whole x is λ k.
We can make function named as PrCat(x, lambda), where x is K sets of vectors x = (x1,x2,…,xK) having series of 1 or 0. xk has mk numbers of 1 (xk = c(1,0,1,0,…,0),sum(xki) = mk).
This function will back the value of λ k^sum(x[k,])/nK (k = 1 to K, nK = sum(x[1:K,])).
PrCat has not constant value;
gamma(n)/(gamma(n1)…gamma(nk))*Πλ k^nK.
PrCat<-function(x,lambda){
k<-nrow(x)
n<-ncol(x)
p<-1
for(ki in 1:k){ p<- p*(lambda[ki])^(sum(x[ki,])/n)}
p}
#
PrCat(x=matrix(c(1,0,0,0,1,0,0,0,1),nrow=3),lambda=c(.333,.333,.333))
## [1] 0.333
PrCat(x=catk,lambda=lambda)
## [1] 0.278675
PrCat(x=cat30,lambda=lambda)
## [1] 0.2665648
Distribution of λin the world may be estimated and visualized as a graph from the data x. Maximum likelihood of world λ may be gained also form this distribution.
Four categolical parameters as the axies of the world are generated by the function seek4lambda.
Where 0 =< λ < =1 ,sum(λ )=1.
seek4lambda<-function(bigin=c(0,0,0,0),end=c(1,1,1,1),step=c(0.1,0.1,0.1,0.1)){
la<-NULL
for(l4 in seq(bigin[4],end[4], step[4])){
for(l3 in seq(bigin[3] ,min(1-l4,end[3]),step[3])){
for(l2 in seq(bigin[2], min(1-l4-l3,end[2]),step[2])){
l1=1-l4-l3-l2
if (l1>=bigin[1] && l1<=end[1]) la<-rbind(la,c(l1,l2,l3,l4))
}}}
la}
#####
la<-seek4lambda(step=c(0.025,0.025,0.025,0.025))
head(la)
## [,1] [,2] [,3] [,4]
## [1,] 1.000 0.000 0 0
## [2,] 0.975 0.025 0 0
## [3,] 0.950 0.050 0 0
## [4,] 0.925 0.075 0 0
## [5,] 0.900 0.100 0 0
## [6,] 0.875 0.125 0 0
Along this axies probabilities of the likelifood will be estimated.
Likelifood.Cat<-function(x,lam){
Lk<-NULL
for (i in 1:nrow(lam)){
p<-PrCat(x=x,lambda=lam[i,])
if( p==Inf) p<-0 #
Lk<-rbind(Lk,c(lam[i,],p))
}
Lk<-data.frame(Lk)
sp<-sum(Lk[,5]) # 1/sp = CONSTANT
Lk[,5]<-Lk[,5]/sp
names(Lk)<-c("L1","L2","L3","L4","Likelihood")
Lk
}
#####
library(scatterplot3d)
col.n<-100
ch.col<-topo.colors(col.n+1,alpha=0.4)
#####
#n=1000
LkCat<-Likelifood.Cat(x=catk,lam=la)
ch.max<-max(LkCat$Likelihood)
col<-ch.col[(1-LkCat$Likelihood/ch.max)*col.n+1]
scatterplot3d(LkCat[,1:3],color=col,angle=140, pch = 16,main="Likelihood distribution of λ n=1000")
#ML
LkCat[LkCat$Likelihood==max(LkCat$Likelihood),]
## L1 L2 L3 L4 Likelihood
## 8199 0.1 0.2 0.4 0.3 0.0001633475
#n=30
LkCat<-Likelifood.Cat(x=cat30,lam=la)
ch.max<-max(LkCat$Likelihood)
col<-ch.col[(1-LkCat$Likelihood/ch.max)*col.n+1]
scatterplot3d(LkCat[,1:3],color=col,angle=140,pch=16,main="Likelihood distribution of λ n=30")
#ML
LkCat[LkCat$Likelihood==max(LkCat$Likelihood),]
## L1 L2 L3 L4 Likelihood
## 9321 0.1 0.225 0.3 0.375 0.0001608031
Dirichlet function could represent distribution of the value lambda.
Drch<-function(a,lambda){
PrD<-NULL
for(n in 1:nrow(lambda)){
lam<-lambda[n,]
p<-1
for(ki in 1:length(a)){
p<-p*lam[ki]^(a[ki]-1)}
PrD<-rbind(PrD,c(lam,p))}
sp<-sum(PrD[,5])
PrD[,5]<-PrD[,5]/sp
PrD<-data.frame(PrD)
names(PrD)<-c("L1","L2","L3","L4","prior")
PrD
}
#
a<-c(1,2,4,3)*2
prior<-Drch(a=a,lambda=la)
ch.max<-max(prior$prior)
col<-ch.col[(1-prior$prior/ch.max)*col.n+1]
scatterplot3d(prior[,1:3],color=col,angle=140,pch=16,main="Dirichlet distribution of λ ")
As you see Categolical is conjugate to Dirichlet.
It means both have the same form
λ i^xi
So
PrCat(x,lambda)*Drch(a,lambda)=k(x,a)Drch((x/n+a),lambda).
where k(x,a) is constant.
Previously we called PrCat(x,λ) as likelihood. This time we may call Drch(x,a,lambda) as prior distribution of λ.
According to Bayes’ rule
Posterior = Liklihood * Prior / Evidence.
In upper case
Liklihood = PrCat(x,λ); Pr(x|λ)
Prior = Drch(a,λ); Pr(λ)
Evidence = ∫ PrCat(x,λ)dλ
Posterior distribution of λin the world may be estimated and visualized as a graph from the data x.
Posterior.Cat<-function(x,a,lam){
k<-length(a)
n<-ncol(x)
for(ki in 1:k){ a[ki]<- a[ki]+sum(x[ki,])/n-1}
p<-Drch(a=a,lambda=lam)
names(p)[5]<- "posterior"
p}
#n=1000
Post<-Posterior.Cat(x=catk,a=a,lam=la)
ch.max<-max(Post$posterior)
col<-ch.col[(1-Post$posterior/ch.max)*col.n+1]
scatterplot3d(Post[,1:3],color=col,angle=140,pch=16,main="Posterior distribution of λ n=1000")
#ML
Post[Post$posterior==max(Post$posterior),]
## L1 L2 L3 L4 posterior
## 8650 0.025 0.175 0.475 0.325 0.002874661
#
Post<-Posterior.Cat(x=cat30,a=a,lam=la)
ch.max<-max(Post$posterior)
col<-ch.col[(1-Post$posterior/ch.max)*col.n+1]
scatterplot3d(Post[,1:3],color=col,angle=140,pch=16,main="Posterior distribution of λ n=30")
#ML
Post[Post$posterior==max(Post$posterior),]
## L1 L2 L3 L4 posterior
## 8650 0.025 0.175 0.475 0.325 0.002867657
If your data is not large, then the shape of the distribution will be dull also in the case of posterior estimate as in the case of the likelifood.
You may use MCmultinomdirichlet {MCMCpack}.
library(MCMCpack)
## Loading required package: coda
## Loading required package: MASS
## ##
## ## Markov Chain Monte Carlo Package (MCMCpack)
## ## Copyright (C) 2003-2016 Andrew D. Martin, Kevin M. Quinn, and Jong Hee Park
## ##
## ## Support provided by the U.S. National Science Foundation
## ## (Grants SES-0350646 and SES-0350613)
## ##
posterior <- MCmultinomdirichlet(apply(catk,1,sum), a, mc=10000)
summary(posterior)
##
## Iterations = 1:10000
## Thinning interval = 1
## Number of chains = 1
## Sample size per chain = 10000
##
## 1. Empirical mean and standard deviation for each variable,
## plus standard error of the mean:
##
## Mean SD Naive SE Time-series SE
## pi.1 0.1030 0.009591 9.591e-05 9.591e-05
## pi.2 0.1885 0.012310 1.231e-04 1.210e-04
## pi.3 0.4016 0.015352 1.535e-04 1.574e-04
## pi.4 0.3068 0.014458 1.446e-04 1.414e-04
##
## 2. Quantiles for each variable:
##
## 2.5% 25% 50% 75% 97.5%
## pi.1 0.08487 0.09649 0.1027 0.1092 0.1230
## pi.2 0.16477 0.18006 0.1883 0.1967 0.2134
## pi.3 0.37192 0.39135 0.4014 0.4120 0.4317
## pi.4 0.27901 0.29704 0.3067 0.3165 0.3357
plot(posterior)
#Estimate categolical probability at the given lambda
PrCat<-function(x,lambda){
k<-nrow(x)
n<-ncol(x)
p<-1
for(ki in 1:k){ p<- p*(lambda[ki])^(sum(x[ki,])/n)}
p
}
#Categolical Likelifood along lambda sequence
Likelifood.Cat<-function(x,lam){
Lk<-NULL
for (i in 1:nrow(lam)){
p<-PrCat(x=x,lambda=lam[i,])
if( p==Inf) p<-0 #
Lk<-rbind(Lk,c(lam[i,],p))
}
Lk<-data.frame(Lk)
sp<-sum(Lk[,5]) # 1/sp = CONSTANT
Lk[,5]<-Lk[,5]/sp
names(Lk)<-c("L1","L2","L3","L4","Likelihood")
Lk
}
#Dirichlet distribution as prior
Drch<-function(a,lambda){
PrD<-NULL
for(n in 1:nrow(lambda)){
lam<-lambda[n,]
p<-1
for(ki in 1:length(a)){
p<-p*lam[ki]^(a[ki]-1)}
PrD<-rbind(PrD,c(lam,p))}
sp<-sum(PrD[,5])
PrD[,5]<-PrD[,5]/sp
PrD<-data.frame(PrD)
names(PrD)<-c("L1","L2","L3","L4","prior")
PrD
}
#Distribution of the posterior probability along lambda
Posterior.Cat<-function(x,a,lam){
k<-length(a)
n<-ncol(x)
for(ki in 1:k){ a[ki]<- a[ki]+sum(x[ki,])/n-1}
p<-Drch(a=a,lambda=lam)
names(p)[5]<- "posterior"
p
}