1 例題：クラスター分析

事例：アイリスに関する次の特徴量（変数）をもつデータがあるとする。

(1)ガクの長さ
(2)ガクの幅
(3)花弁の長さ
(4)花弁の幅

これらの特徴に基づき、アイリスの類型を析出したい。上記4つの変数に基づき、各サンプル間の（非）類似度をユークリッド距離で計算し、Ward法を用いて階層的クラスター分析を行う。

1.1 データの読み込み

dat <- iris                     # Rのデフォルトデータを用いる
head(dat)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

summary(dat)

##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
##

1.2 階層的クラスター分析

# クラスター分析用に必要な変数のみからなるデータを作成する
dat2 <- dat[ ,1:4]

# ケース間の距離を、ユークリッド距離に基づき計算する
D <- dist(dat2, method = "euclidean")

# 参考のため中身を見てみる(ここでは最初の５行、５列のみ)
as.matrix(D)[1:5, 1:5]

##           1         2        3         4         5
## 1 0.0000000 0.5385165 0.509902 0.6480741 0.1414214
## 2 0.5385165 0.0000000 0.300000 0.3316625 0.6082763
## 3 0.5099020 0.3000000 0.000000 0.2449490 0.5099020
## 4 0.6480741 0.3316625 0.244949 0.0000000 0.6480741
## 5 0.1414214 0.6082763 0.509902 0.6480741 0.0000000

# ウォード法をもとにクラスタリング
C <- hclust(D, method = "ward.D2")

# デンドログラムをプロットする
plot(C)

# クラスター数をここでは3に設定してグループ化する
gr <- cutree(C, 
             k = 3)    # グループ数

# グループ分けの結果
gr

##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 2 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2

1.3 グループの特徴を調べる

# グループ数
table(gr)

## gr
##  1  2  3 
## 50 64 36

# 各特徴量、およびANOVA
tapply(X = dat$Sepal.Length, INDEX = gr, FUN = mean) # 額の長さ

##        1        2        3 
## 5.006000 5.920312 6.869444

tapply(X = dat$Sepal.Width, INDEX = gr, FUN = mean)  # 額の幅

##        1        2        3 
## 3.428000 2.751562 3.086111

tapply(X = dat$Petal.Length, INDEX = gr, FUN = mean) # 花弁の長さ

##        1        2        3 
## 1.462000 4.420312 5.769444

tapply(X = dat$Petal.Width, INDEX = gr, FUN = mean)  # 花弁の幅

##        1        2        3 
## 0.246000 1.434375 2.105556

#　以上を一つの表にまとめる
group.table <- data.frame(
  n = table(gr),
  Sepal.Length = tapply(X = dat$Sepal.Length, INDEX = gr, FUN = mean), # 額の長さ
  Sepal.Width = tapply(X = dat$Sepal.Width, INDEX = gr, FUN = mean),　# 額の幅
  Petal.Length = tapply(X = dat$Petal.Length, INDEX = gr, FUN = mean), # 花弁の長さ
  Petal.Width = tapply(X = dat$Petal.Width, INDEX = gr, FUN = mean)  # 花弁の幅
)
group.table

##   n.gr n.Freq Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1    1     50     5.006000    3.428000     1.462000    0.246000
## 2    2     64     5.920312    2.751562     4.420312    1.434375
## 3    3     36     6.869444    3.086111     5.769444    2.105556

2 さまざまな距離関数

# データを読み込む
dat <- iris
dat2 <- dat[ ,1:4]

2.1 マンハッタン距離

D1 <- dist(dat2, method = "manhattan")

2.2 チェビシェフ距離

【注】これ以降の距離では、proxyパッケージが必要となる

# 以降は別のパッケージが必要
library(proxy)
D2 <- dist(dat2, method = "Chebyshev")

2.3 マハラノビス距離

D3 <- dist(dat2, method = "Mahalanobis")

2.4 コサイン類似度に基づく距離

【注】以下は類似度である。類似度をクラスター分析に用いるには、本来、類似度を距離に変換する（「1－類似度」を計算するなど）必要があるが、パッケージが距離にデフォルトで計算してくれるため、そのまま用いればよい。

D4 <- dist(dat2, method = "cosine")

2.5 相関係数に基づく距離

D5 <- dist(dat2, method = "correlation")

2.6 多変量番jaccard係数に基づく距離

D6 <- dist(dat2, method = "eJaccard")

3 さまざまな連結関数

# データを読み込む
dat <- iris
dat2 <- dat[ ,1:4]

# ユークリッド距離
D <- dist(dat2, method = "euclidean")

# クラスター分析--------
C1 <- hclust(D, method = "ward.D2")  # ウォード法
C2 <- hclust(D, method = "single")   # 最短距離法
C3 <- hclust(D, method = "complete")  # 最遠距離法
C4 <- hclust(D, method = "average")  # グループ平均

4 分類評価

# クラスター分析
dat2 <- iris[,1:4]
D <- dist(dat2, method = "euclidean")
C <- hclust(D, method = "ward.D2")
gr <- cutree(C, k = 3)

# クラスター評価
library(fpc)
clu.stats <- cluster.stats(d = D, clustering = gr)

clu.stats$average.within # クラスター内平均距離

## [1] 0.9200867

clu.stats$average.between # クラスター間平均距離

## [1] 3.400042

clu.stats$ch      # pseudo-F

## [1] 558.058

clu.stats$dunn    # Dunn指標

## [1] 0.1127947

# Silhouette指標
library(cluster)
sil <- silhouette(x = gr, dist = D)
plot(sil)

5 k-means法によるクラスター分析

dat2 <- iris[,1:4]

# クラスター分析（k-means）
out <- kmeans(dat2,                          # 元データ
              centers = 3, 　　　　　　　　　# クラスター数
              algorithm = "Hartigan-Wong")　 # 一般的にこのHartiganが最もよい結果を出すと言われている

out

## K-means clustering with 3 clusters of sizes 50, 62, 38
## 
## Cluster means:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.006000    3.428000     1.462000    0.246000
## 2     5.901613    2.748387     4.393548    1.433871
## 3     6.850000    3.073684     5.742105    2.071053
## 
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 3 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2
## 
## Within cluster sum of squares by cluster:
## [1] 15.15100 39.82097 23.87947
##  (between_SS / total_SS =  88.4 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"

6 Fuzzy c-means法によるクラスター分析

# データ
dat2 <- iris[,1:4]

# クラスター分析（Fuzzy c-means）
library(e1071)
out <- cmeans(dat2, 
              centers = 3, 
              method = "cmeans")
out

## Fuzzy c-means clustering with 3 clusters
## 
## Cluster centers:
##   Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1     5.003966    3.414086     1.482821   0.2535486
## 2     6.775087    3.052404     5.646877   2.0535832
## 3     5.888995    2.761092     4.364044   1.3973624
## 
## Memberships:
##                  1            2            3
##   [1,] 0.996623580 0.0010720476 0.0023043725
##   [2,] 0.975854198 0.0074975340 0.0166482683
##   [3,] 0.979826999 0.0064143117 0.0137586897
##   [4,] 0.967429637 0.0101069797 0.0224633833
##   [5,] 0.994470394 0.0017679407 0.0037616650
##   [6,] 0.934577652 0.0206187077 0.0448036404
##   [7,] 0.979492786 0.0065042372 0.0140029765
##   [8,] 0.999547293 0.0001411972 0.0003115101
##   [9,] 0.930383959 0.0219013930 0.0477146481
##  [10,] 0.982724212 0.0053412480 0.0119345402
##  [11,] 0.968043858 0.0102005394 0.0217556028
##  [12,] 0.992137777 0.0024308977 0.0054313250
##  [13,] 0.970642026 0.0091767068 0.0201812669
##  [14,] 0.922973676 0.0252350133 0.0517913107
##  [15,] 0.889763041 0.0376304300 0.0726065290
##  [16,] 0.841350753 0.0543040753 0.1043451719
##  [17,] 0.946927545 0.0174949615 0.0355774934
##  [18,] 0.996652776 0.0010587845 0.0022884395
##  [19,] 0.904140359 0.0303065902 0.0655530510
##  [20,] 0.979189970 0.0066534079 0.0141566220
##  [21,] 0.968606737 0.0095396911 0.0218535715
##  [22,] 0.984833697 0.0047936657 0.0103726370
##  [23,] 0.958659460 0.0138324578 0.0275080817
##  [24,] 0.979429451 0.0061109401 0.0144596093
##  [25,] 0.966922047 0.0098374549 0.0232404981
##  [26,] 0.973570299 0.0079728721 0.0184568289
##  [27,] 0.994846111 0.0015730808 0.0035808081
##  [28,] 0.993348570 0.0020866529 0.0045647772
##  [29,] 0.993676217 0.0019959170 0.0043278663
##  [30,] 0.979517878 0.0062821315 0.0141999906
##  [31,] 0.978729605 0.0064729640 0.0147974308
##  [32,] 0.974366265 0.0079389587 0.0176947759
##  [33,] 0.938523322 0.0203333609 0.0411433168
##  [34,] 0.904174149 0.0323470833 0.0634787679
##  [35,] 0.985066558 0.0045936615 0.0103397803
##  [36,] 0.984994283 0.0048050309 0.0102006860
##  [37,] 0.964186243 0.0115636153 0.0242501420
##  [38,] 0.990891136 0.0029265305 0.0061823336
##  [39,] 0.939687689 0.0192316056 0.0410807052
##  [40,] 0.998289156 0.0005332111 0.0011776326
##  [41,] 0.994727792 0.0016883371 0.0035838708
##  [42,] 0.850743887 0.0469923323 0.1022637811
##  [43,] 0.952617977 0.0152077117 0.0321743114
##  [44,] 0.979289763 0.0063289551 0.0143812816
##  [45,] 0.945275105 0.0165995708 0.0381253240
##  [46,] 0.972148406 0.0086309426 0.0192206519
##  [47,] 0.976794242 0.0073646150 0.0158411431
##  [48,] 0.974222498 0.0081263353 0.0176511671
##  [49,] 0.977221457 0.0072592576 0.0155192857
##  [50,] 0.997072632 0.0009192756 0.0020080921
##  [51,] 0.044575390 0.5010985191 0.4543260912
##  [52,] 0.029157433 0.2067488401 0.7640937269
##  [53,] 0.031268337 0.5998953973 0.3688362662
##  [54,] 0.049363885 0.0804545367 0.8701815778
##  [55,] 0.024102548 0.2170728858 0.7588245663
##  [56,] 0.005740357 0.0204620551 0.9737975883
##  [57,] 0.029791713 0.2971161723 0.6730921145
##  [58,] 0.285213291 0.1323099022 0.5824768069
##  [59,] 0.031227914 0.2476727782 0.7210993079
##  [60,] 0.074778296 0.0947100784 0.8305116259
##  [61,] 0.218447249 0.1450648583 0.6364878925
##  [62,] 0.009187078 0.0287377544 0.9620751677
##  [63,] 0.055639603 0.1011258690 0.8432345285
##  [64,] 0.012107334 0.0881843310 0.8997083346
##  [65,] 0.091668782 0.0921954492 0.8161357685
##  [66,] 0.041715666 0.2683409346 0.6899433994
##  [67,] 0.014152739 0.0526661219 0.9331811396
##  [68,] 0.025904444 0.0483837511 0.9257118052
##  [69,] 0.027141508 0.1373894448 0.8354690468
##  [70,] 0.051576295 0.0707857497 0.8776379555
##  [71,] 0.027611651 0.2506570200 0.7217313295
##  [72,] 0.019499385 0.0462378247 0.9342627903
##  [73,] 0.023966516 0.2704944861 0.7055389983
##  [74,] 0.013928195 0.0832874528 0.9027843518
##  [75,] 0.022902157 0.1011551790 0.8759426641
##  [76,] 0.033935958 0.2112175932 0.7548464485
##  [77,] 0.033604923 0.4426406168 0.5237544601
##  [78,] 0.021189817 0.6723689291 0.3064412539
##  [79,] 0.004931495 0.0261606646 0.9689078400
##  [80,] 0.128349114 0.1046684816 0.7669824046
##  [81,] 0.077827905 0.0899015460 0.8322705487
##  [82,] 0.103805798 0.1007543919 0.7954398099
##  [83,] 0.030996163 0.0503206881 0.9186831488
##  [84,] 0.023967179 0.3196034298 0.6564293913
##  [85,] 0.026382525 0.0824433108 0.8911741646
##  [86,] 0.032137899 0.1706541086 0.7972079925
##  [87,] 0.033451993 0.4110337290 0.5555142780
##  [88,] 0.026862206 0.1154515053 0.8576862891
##  [89,] 0.024100705 0.0469966172 0.9289026773
##  [90,] 0.038193123 0.0623924369 0.8994144396
##  [91,] 0.019635779 0.0492323229 0.9311318979
##  [92,] 0.011611234 0.0726806512 0.9157081147
##  [93,] 0.022529480 0.0418033764 0.9356671441
##  [94,] 0.268968061 0.1327423182 0.5982896211
##  [95,] 0.012688586 0.0284548849 0.9588565286
##  [96,] 0.016790068 0.0376864607 0.9455234709
##  [97,] 0.009583298 0.0230247716 0.9673919302
##  [98,] 0.011404421 0.0447117790 0.9438838003
##  [99,] 0.355391789 0.1245117878 0.5200964237
## [100,] 0.012699822 0.0267539562 0.9605462219
## [101,] 0.019355834 0.8599048937 0.1207392723
## [102,] 0.029294215 0.3550229824 0.6156828026
## [103,] 0.006069621 0.9558111547 0.0381192239
## [104,] 0.012528454 0.8454589853 0.1420125606
## [105,] 0.004754170 0.9576012734 0.0376445563
## [106,] 0.035447764 0.8119129282 0.1526393082
## [107,] 0.072979714 0.1670296918 0.7599905944
## [108,] 0.021888794 0.8630051947 0.1151060111
## [109,] 0.013992332 0.8686534849 0.1173541835
## [110,] 0.024406891 0.8610337112 0.1145593974
## [111,] 0.016768216 0.7732483685 0.2099834160
## [112,] 0.015758969 0.7610002947 0.2232407368
## [113,] 0.001167641 0.9888025197 0.0100298389
## [114,] 0.034360954 0.3055994936 0.6600395523
## [115,] 0.038369668 0.5005573496 0.4610729829
## [116,] 0.014066295 0.8498082625 0.1361254422
## [117,] 0.007165079 0.9131562680 0.0796786528
## [118,] 0.050575015 0.7636357389 0.1857892463
## [119,] 0.049169990 0.7576028397 0.1932271708
## [120,] 0.032183132 0.2570294165 0.7107874516
## [121,] 0.003824296 0.9704952708 0.0256804333
## [122,] 0.033635988 0.2593202234 0.7070437882
## [123,] 0.041958728 0.7832435017 0.1747977699
## [124,] 0.022797828 0.3813198112 0.5958823608
## [125,] 0.002912586 0.9753479014 0.0217395127
## [126,] 0.012900286 0.9119637573 0.0751359569
## [127,] 0.020972608 0.2711385934 0.7078887981
## [128,] 0.023086476 0.3298918286 0.6470216957
## [129,] 0.008360432 0.9085942637 0.0830453043
## [130,] 0.014440942 0.8908272627 0.0947317954
## [131,] 0.019777706 0.8728073458 0.1074149480
## [132,] 0.050890458 0.7601429065 0.1889666354
## [133,] 0.008958505 0.9061619076 0.0848795878
## [134,] 0.023387830 0.4362802409 0.5403319289
## [135,] 0.031184370 0.5760120827 0.3928035475
## [136,] 0.028650566 0.8396094166 0.1317400171
## [137,] 0.017226810 0.8540521833 0.1287210070
## [138,] 0.009787228 0.8800732944 0.1101394775
## [139,] 0.021717466 0.2282862466 0.7499962870
## [140,] 0.003490065 0.9675360046 0.0289739300
## [141,] 0.005076420 0.9572456314 0.0376779486
## [142,] 0.015403032 0.8550805954 0.1295163723
## [143,] 0.029294215 0.3550229824 0.6156828026
## [144,] 0.005245027 0.9610449473 0.0337100255
## [145,] 0.009698265 0.9271306711 0.0631710643
## [146,] 0.011265043 0.8823047596 0.1064301971
## [147,] 0.025795862 0.4665656060 0.5076385319
## [148,] 0.012118086 0.8313543632 0.1565275508
## [149,] 0.021583723 0.7893315152 0.1890847616
## [150,] 0.026917533 0.3911901775 0.5818922899
## 
## Closest hard clustering:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
##  [75] 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 2 2 2 3 2 2 2 2
## [112] 2 2 3 2 2 2 2 2 3 2 3 2 3 2 2 3 3 2 2 2 2 2 3 2 2 2 2 3 2 2 2 3 2 2 2 3 2
## [149] 2 3
## 
## Available components:
## [1] "centers"     "size"        "cluster"     "membership"  "iter"       
## [6] "withinerror" "call"

7 【参考】階層的クラスター分析を一括で行う自作関数を用いる

階層的クラスターは非常にオプションが多く、かつステップが多いのでそれらを一つの関数に統合した自作関数を用いて分析を行う。

はじめに以下の関数を読み込む（そのままスクリプトに貼り付け、実行すればよい）。

myclust <- function(dat = dat, 
                    dist.method = "Euclidean",
                    clust.method = "ward.D2",
                    num.clust = 3,
                    plot.dendrogram = F,
                    group.summary = F,
                    clust.stat = F){
# 距離関数
D <- proxy::dist(dat, method = dist.method)
# 階層的クラスター
C <- hclust(D, method = clust.method)
# デンドログラム
if(plot.dendrogram==T){
  plot(C)
}
# クラスター出力
gr <- cutree(C, k = num.clust)

# 特徴表
if(group.summary == T){
  gr.n  <- table(gr)
  p <- ncol(dat)
  group.table <- matrix(NA, num.clust, p, byrow = T)
  colnames(group.table) <- colnames(dat)
  rownames(group.table) <- 1:num.clust
  aov.result <- rep(NA, p)
  for(i in 1:p){
    group.table[ ,i] <- tapply(dat[,i], gr, mean, na.rm = T)
    aov.result[i] <- oneway.test(dat[,i]~gr)$p.value
  }
  summary.table <- cbind(
    n = c(gr.n, p = NA), 
    rbind(group.table, p = aov.result))
}else{
  summary.table <- NULL
}

# クラスター統計
if(clust.stat == T){
  clust.eval <- fpc::cluster.stats(d = D, clustering = gr)
}else{
  clust.eval <- NULL
}
# アウトプット
print(gr)
out <- list(gr = gr, 
            gr.table = summary.table,
            cluster.stat = clust.eval)
invisible(out)
}

つぎに、この自作関数で用いているパッケージも読み込んでおく。

# パッケージを読み込む
library(proxy); library(fpc)

あとは通常通りクラスター分析を行う。

# データ
dat <- iris[,1:4]

# クラスター分析
out <- myclust(dat = dat, 　　　　　　　　　　　# 元データ　　　　
               dist.method = "Euclidean", 　　　# 距離関数（proxyのdist関数に基づく）
               clust.method = "ward.D2",        # クラスタリング手法（hclust関数に基づく）
               num.clust = 3, 　　　　　　　　　# クラスター数
               plot.dendrogram = F,　　　　　　 # デンドログラムを出力するか
               group.summary = T,               # グループごとの特徴量表を出すか(p値はAnova)
               clust.stat = T)　　　　　　　　　# クラスター評価統計を出力するか

##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 2 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2

out$gr　　　　　　　# グループ

##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
##  [75] 2 2 2 3 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 3 2 3 3 3 3 2 3 3 3 3
## [112] 3 3 2 2 3 3 3 3 2 3 2 3 2 3 3 2 2 3 3 3 3 3 2 2 3 3 3 2 3 3 3 2 3 3 3 2 3
## [149] 3 2

out$gr.table　　　　# 特徴量

##    n Sepal.Length  Sepal.Width Petal.Length  Petal.Width
## 1 50 5.006000e+00 3.428000e+00 1.462000e+00 2.460000e-01
## 2 64 5.920312e+00 2.751562e+00 4.420312e+00 1.434375e+00
## 3 36 6.869444e+00 3.086111e+00 5.769444e+00 2.105556e+00
## p NA 6.292119e-33 2.905386e-16 5.045025e-63 3.119516e-57

out$cluster.stat　　# クラスター評価統計

## $n
## [1] 150
## 
## $cluster.number
## [1] 3
## 
## $cluster.size
## [1] 50 64 36
## 
## $min.cluster.size
## [1] 36
## 
## $noisen
## [1] 0
## 
## $diameter
## [1] 2.428992 2.803569 2.267157
## 
## $average.distance
## [1] 0.6968169 1.0528456 0.9941680
## 
## $median.distance
## [1] 0.6164414 0.9949874 0.8831761
## 
## $separation
## [1] 1.6401219 0.3162278 0.3162278
## 
## $average.toother
## [1] 4.062683 2.840353 3.343346
## 
## $separation.matrix
##          [,1]      [,2]      [,3]
## [1,] 0.000000 1.6401219 3.8065733
## [2,] 1.640122 0.0000000 0.3162278
## [3,] 3.806573 0.3162278 0.0000000
## 
## $ave.between.matrix
##          [,1]     [,2]     [,3]
## [1,] 0.000000 3.472754 5.111445
## [2,] 3.472754 0.000000 1.962019
## [3,] 5.111445 1.962019 0.000000
## 
## $average.between
## [1] 3.400042
## 
## $average.within
## [1] 0.9200867
## 
## $n.between
## [1] 7304
## 
## $n.within
## [1] 3871
## 
## $max.diameter
## [1] 2.803569
## 
## $min.separation
## [1] 0.3162278
## 
## $within.cluster.ss
## [1] 79.29713
## 
## $clus.avg.silwidths
##         1         2         3 
## 0.7997791 0.4081896 0.4732071 
## 
## $avg.silwidth
## [1] 0.5543237
## 
## $g2
## NULL
## 
## $g3
## NULL
## 
## $pearsongamma
## [1] 0.7189928
## 
## $dunn
## [1] 0.1127947
## 
## $dunn2
## [1] 1.863539
## 
## $entropy
## [1] 1.072126
## 
## $wb.ratio
## [1] 0.2706104
## 
## $ch
## [1] 558.058
## 
## $cwidegap
## [1] 0.6244998 0.7348469 0.8185353
## 
## $widestgap
## [1] 0.8185353
## 
## $sindex
## [1] 0.4523616
## 
## $corrected.rand
## NULL
## 
## $vi
## NULL

参考文献

新納浩幸, 2007, 『Rで学ぶクラスタ解析』オーム社．

クラスター分析

佐藤圭一

2023-10-19