Example usuing Mean Shift Clustering

Generate the simulated data of example 1

set.seed(1)
ctgr <- c(sample(1:50, 10, replace = TRUE), sample(100:200, 15, replace = TRUE), sample(250:300, 5, replace = TRUE))
ctgrscore <- (ctgr - mean(ctgr))/sd(ctgr)

nCtgr <- c(sample(1:10, 15, replace = TRUE), sample(20:30, 15, replace = TRUE))
nCtgrscore <- (nCtgr - mean(nCtgr))/sd(nCtgr)

nBrnd <- c(sample(10:30, 20, replace = TRUE), sample(1:10, 10, replace = TRUE))
nBrndscore <- (nBrnd - mean(nBrnd))/sd(nBrnd)

nMdl <- c(sample(50:100, 20, replace = TRUE), sample(1:50, 10, replace = TRUE))
nMdlscore <- (nMdl - mean(nMdl))/sd(nMdl)

pPrdct <- sample(1:300, size = 30, replace = TRUE)
pPrdctscore <- (pPrdct - mean(pPrdct))/sd(pPrdct)

pCtgr <- sample(1:100, size = 30, replace = TRUE)
pCtgrscore <- (pCtgr - mean(pCtgr))/sd(pCtgr)

pNp <- c(sample(30:70, size = 20, replace = TRUE), sample(1:100, size = 10, replace = TRUE))
pNpscore <- (pNp - mean(pNp))/sd(pNp)

larget <- sample(c(rep(0, times = 5), sample(10:30, size = 5, replace = TRUE)), size = 10)
mediant <- sample(c(rep(0, times = 6), rep(100, times = 2), 20, 10), size = 10)
smallt <- sample(c(rep(0, times = 6), rep(100, times = 4)), size = 10)
pUt <- c(larget, mediant, smallt)
pUtscore <- (pUt - mean(pUt))/sd(pUt)

largeu <- sample(c(rep(0, times = 8), 3, 5), size = 10)
medianu <- sample(c(rep(0, times = 7), 3, 5, 8), size = 10)
smallu <- sample(c(rep(0, times = 6), sample(1:20, size = 4, replace = TRUE)), size = 10)
pUp <- c(largeu, medianu, smallu)
pUpscore <- (pUp - mean(pUp))/sd(pUp)

simMV <- cbind(ctgrscore, nCtgrscore, nBrndscore, nMdlscore, pPrdctscore, pCtgrscore, pNpscore, pUtscore, pUpscore)

Read in data of example 2 and adjust it into the required form.

cidata <- read.csv(file = "/Users/YanYang/code/Car_Insurance_Example/2014 Auto Insurance Average Premium Data.csv")
lap <- sapply(1:nrow(cidata), function(x) mean(as.numeric(cidata[x, 3:7])))
cap <- sapply(1:nrow(cidata), function(x) mean(as.numeric(cidata[x, 8:12])))
cpap <- sapply(1:nrow(cidata), function(x) mean(as.numeric(cidata[x, 13:17])))
simCI <- cbind(lap, cap, cpap)

Load the package of Mean Shift Clustering

library(MeanShift)

Application of Mean Shift Clustering with simulated data we used in application of K-means Clustering in the first example.

Figure out a right bandwidth by oberseving changing of group number

plot(v, type = "l", xlab = "Index of quantiles", 
     ylab = "Number of clusters", 
     main = "Change of Cluster number as increasing bandwidth", col = "red")

Plot the clusters with quantile lables equal to 10 based on observation

plot(x = simMV[, 1], y = simMV[, 2], col=cluMV$labels+2, cex=0.8, 
                           pch=16, 
     xlab="Number of category available in wholesaler", 
     ylab="Average number of products in each category" )
points(cluMV$components[1,], cluMV$components[2,],
                           col=2+(1:ncol(cluMV$components)), 
                           cex=1.8, pch=16)

Figure out a right bandwidth by oberseving changing of in-group distance

plot(v, type = "l", xlab = "Index of quantiles", 
     ylab = "Sum of distance within each group", 
     main = "Change of distance sum as increasing bandwidth", col = "red")

Plot the clusters with quantile lables equal to 22 based on observation

plot(x = simMV[, 1], y = simMV[, 2], col=cluMV$labels+2, cex=0.8, 
                           pch=16, 
     xlab="Number of category available in wholesaler", 
     ylab="Average number of products in each category" )
points(cluMV$components[1,], cluMV$components[2,],
                           col=2+(1:ncol(cluMV$components)), 
                           cex=1.8, pch=16)

Application of Mean Shift Clustering with real data we used in application of K-means Clustering in the second example.