set.seed(1)
ctgr <- c(sample(1:50, 10, replace = TRUE), sample(100:200, 15, replace = TRUE), sample(250:300, 5, replace = TRUE))
ctgrscore <- (ctgr - mean(ctgr))/sd(ctgr)
nCtgr <- c(sample(1:10, 15, replace = TRUE), sample(20:30, 15, replace = TRUE))
nCtgrscore <- (nCtgr - mean(nCtgr))/sd(nCtgr)
nBrnd <- c(sample(10:30, 20, replace = TRUE), sample(1:10, 10, replace = TRUE))
nBrndscore <- (nBrnd - mean(nBrnd))/sd(nBrnd)
nMdl <- c(sample(50:100, 20, replace = TRUE), sample(1:50, 10, replace = TRUE))
nMdlscore <- (nMdl - mean(nMdl))/sd(nMdl)
pPrdct <- sample(1:300, size = 30, replace = TRUE)
pPrdctscore <- (pPrdct - mean(pPrdct))/sd(pPrdct)
pCtgr <- sample(1:100, size = 30, replace = TRUE)
pCtgrscore <- (pCtgr - mean(pCtgr))/sd(pCtgr)
pNp <- c(sample(30:70, size = 20, replace = TRUE), sample(1:100, size = 10, replace = TRUE))
pNpscore <- (pNp - mean(pNp))/sd(pNp)
larget <- sample(c(rep(0, times = 5), sample(10:30, size = 5, replace = TRUE)), size = 10)
mediant <- sample(c(rep(0, times = 6), rep(100, times = 2), 20, 10), size = 10)
smallt <- sample(c(rep(0, times = 6), rep(100, times = 4)), size = 10)
pUt <- c(larget, mediant, smallt)
pUtscore <- (pUt - mean(pUt))/sd(pUt)
largeu <- sample(c(rep(0, times = 8), 3, 5), size = 10)
medianu <- sample(c(rep(0, times = 7), 3, 5, 8), size = 10)
smallu <- sample(c(rep(0, times = 6), sample(1:20, size = 4, replace = TRUE)), size = 10)
pUp <- c(largeu, medianu, smallu)
pUpscore <- (pUp - mean(pUp))/sd(pUp)
simMV <- cbind(ctgrscore, nCtgrscore, nBrndscore, nMdlscore, pPrdctscore, pCtgrscore, pNpscore, pUtscore, pUpscore)
cidata <- read.csv(file = "/Users/YanYang/code/Car_Insurance_Example/2014 Auto Insurance Average Premium Data.csv")
lap <- sapply(1:nrow(cidata), function(x) mean(as.numeric(cidata[x, 3:7])))
cap <- sapply(1:nrow(cidata), function(x) mean(as.numeric(cidata[x, 8:12])))
cpap <- sapply(1:nrow(cidata), function(x) mean(as.numeric(cidata[x, 13:17])))
simCI <- cbind(lap, cap, cpap)
library(MeanShift)
Figure out a right bandwidth by oberseving changing of group number
plot(v, type = "l", xlab = "Index of quantiles",
ylab = "Number of clusters",
main = "Change of Cluster number as increasing bandwidth", col = "red")
Plot the clusters with quantile lables equal to 10 based on observation
plot(x = simMV[, 1], y = simMV[, 2], col=cluMV$labels+2, cex=0.8,
pch=16,
xlab="Number of category available in wholesaler",
ylab="Average number of products in each category" )
points(cluMV$components[1,], cluMV$components[2,],
col=2+(1:ncol(cluMV$components)),
cex=1.8, pch=16)
Figure out a right bandwidth by oberseving changing of in-group distance
plot(v, type = "l", xlab = "Index of quantiles",
ylab = "Sum of distance within each group",
main = "Change of distance sum as increasing bandwidth", col = "red")
Plot the clusters with quantile lables equal to 22 based on observation
plot(x = simMV[, 1], y = simMV[, 2], col=cluMV$labels+2, cex=0.8,
pch=16,
xlab="Number of category available in wholesaler",
ylab="Average number of products in each category" )
points(cluMV$components[1,], cluMV$components[2,],
col=2+(1:ncol(cluMV$components)),
cex=1.8, pch=16)
Figure out a right bandwidth by oberseving changing of in-group distance
plot(v, type = "l", xlab = "Index of quantiles",
ylab = "Sum of distance within each group",
main = "Change of distance sum as increasing bandwidth", col = "red")
Plot the clusters with quantile lables equal to 17 based on observation
plot(x = simCI[, 1], y = simCI[, 2], col=cluCI$labels+2, cex=0.8,
pch=16, xlab="Average Liablity Premium",
ylab="Average Collision Premium" )
points(cluCI$components[1,], cluCI$components[2,],
col=2+( 1:ncol(cluCI$components ) ), cex=1.8, pch=16 )