对于不同类型数据,聚类分析都可以使用,只是采取的具体方法不同。目的是把分类对象按一定的规则分成若干类,寻找相似度(距离)或相异度(相关系数)。

  聚类分析类型:

  聚类分析方法:

x1=c(5,7,3,6,6)
x2=c(7,1,2,5,6)
plot(x1,x2)
text(x1,x2,labels=c(1:5),adj=-0.5) 

X=cbind(x1,x2)
dist(X)#默认为euclidean距离
##          1        2        3        4
## 2 6.324555                           
## 3 5.385165 4.123106                  
## 4 2.236068 4.123106 4.242641         
## 5 1.414214 5.099020 5.000000 1.000000
dist(X,diag=TRUE)#添加主对角线距离
##          1        2        3        4        5
## 1 0.000000                                    
## 2 6.324555 0.000000                           
## 3 5.385165 4.123106 0.000000                  
## 4 2.236068 4.123106 4.242641 0.000000         
## 5 1.414214 5.099020 5.000000 1.000000 0.000000
dist(X,method="manhattan")#manhattan距离
##   1 2 3 4
## 2 8      
## 3 7 5    
## 4 3 5 6  
## 5 2 6 7 1
dist(X,method="minkowski",p=1)#manhattan距离
##   1 2 3 4
## 2 8      
## 3 7 5    
## 4 3 5 6  
## 5 2 6 7 1
dist(X,upper=TRUE)#添加上三角距离        
##          1        2        3        4        5
## 1          6.324555 5.385165 2.236068 1.414214
## 2 6.324555          4.123106 4.123106 5.099020
## 3 5.385165 4.123106          4.242641 5.000000
## 4 2.236068 4.123106 4.242641          1.000000
## 5 1.414214 5.099020 5.000000 1.000000
dist(X,method="minkowski",p=2)#euclidean距离
##          1        2        3        4
## 2 6.324555                           
## 3 5.385165 4.123106                  
## 4 2.236068 4.123106 4.242641         
## 5 1.414214 5.099020 5.000000 1.000000
hc<-hclust(dist(X),"single")#最短距离法
cbind(hc$merge,hc$height)#分类过程
##      [,1] [,2]     [,3]
## [1,]   -4   -5 1.000000
## [2,]   -1    1 1.414214
## [3,]   -2    2 4.123106
## [4,]   -3    3 4.123106
plot(hc)#聚类图

hc<-hclust(dist(X),"ward.D")#ward距离法 
cbind(hc$merge,hc$height)#分类过程
##      [,1] [,2]     [,3]
## [1,]   -4   -5 1.000000
## [2,]   -1    1 2.100188
## [3,]   -2   -3 4.123106
## [4,]    2    3 8.355856
plot(hc)#聚类图

setwd("C:/Users/lenovo/Desktop")
d7.2=read.table("d7.2.txt",header=T)
plot(d7.2)

library(mvstats)
H.clust(d7.2,"euclidean","single",plot=T)#最短距离法

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : single 
## Distance         : euclidean 
## Number of objects: 31
H.clust(d7.2,"euclidean","complete",plot=T)#最长距离法

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : complete 
## Distance         : euclidean 
## Number of objects: 31
H.clust(d7.2,"euclidean","median",plot=T)#中间距离法 

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : median 
## Distance         : euclidean 
## Number of objects: 31
H.clust(d7.2,"euclidean","average",plot=T)#类平均法        

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : average 
## Distance         : euclidean 
## Number of objects: 31
H.clust(d7.2,"euclidean","centroid",plot=T)#重心法        

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : centroid 
## Distance         : euclidean 
## Number of objects: 31
H.clust(d7.2,"euclidean","ward",plot=T)#ward法
## The "ward" method has been renamed to "ward.D"; note new "ward.D2"

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : ward.D 
## Distance         : euclidean 
## Number of objects: 31
x1=matrix(rnorm(1000,mean=0,sd=0.3),ncol=10)#均值1,标准差为0.3的100x10的正态随机数矩阵
x2=matrix(rnorm(1000,mean=1,sd=0.3),ncol=10) 
x=rbind(x1,x2)
H.clust(x,"euclidean","complete")

## 
## Call:
## hclust(d = D, method = m)
## 
## Cluster method   : complete 
## Distance         : euclidean 
## Number of objects: 200
cl=kmeans(x,2)#kmeans聚类
pch1=rep("1",100)
pch2=rep("2",100)
plot(x,col=cl$cluster,pch=c(pch1,pch2),cex=0.7)
points(cl$centers,col=3,pch="*",cex=3)

x1=matrix(rnorm(10000,mean=0,sd=0.3),ncol=10)#均值1,标准差为0.3的1000x10的正态随机数矩阵
x2=matrix(rnorm(10000,mean=1,sd=0.3),ncol=10) 
x=rbind(x1,x2)
cl=kmeans(x,2)#kmeans聚类
pch1=rep("1",1000)
pch2=rep("2",1000)
plot(x,col=cl$cluster,pch=c(pch1,pch2),cex=0.7)
points(cl$centers,col=3,pch ="*",cex=3)