Import the dataset

library(readr)
schoolExpenditure <- read_csv("C:/Users/dnred/Downloads/schoolExpenditure.csv")
## Rows: 220 Columns: 14
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (14): expreg, expspecial, expbil, expocc, exptot, scratio, special, lunc...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(schoolExpenditure)

Load the packages

library(mlbench)
## Warning: package 'mlbench' was built under R version 4.3.3
library(e1071)
## Warning: package 'e1071' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: ggplot2
## Loading required package: lattice
library(dendextend)
## Warning: package 'dendextend' was built under R version 4.3.3
## 
## ---------------------
## Welcome to dendextend version 1.17.1
## Type citation('dendextend') for how to cite the package.
## 
## Type browseVignettes(package = 'dendextend') for the package vignette.
## The github page is: https://github.com/talgalili/dendextend/
## 
## Suggestions and bug-reports can be submitted at: https://github.com/talgalili/dendextend/issues
## You may ask questions at stackoverflow, use the r and dendextend tags: 
##   https://stackoverflow.com/questions/tagged/dendextend
## 
##  To suppress this message use:  suppressPackageStartupMessages(library(dendextend))
## ---------------------
## 
## Attaching package: 'dendextend'
## The following object is masked from 'package:stats':
## 
##     cutree
library(cluster)
library(fpc)
## Warning: package 'fpc' was built under R version 4.3.3
library(clValid)
## Warning: package 'clValid' was built under R version 4.3.3

Statistical Analysis

se <- schoolExpenditure
str(se)
## spc_tbl_ [220 × 14] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ expreg    : num [1:220] 4201 4129 3627 4015 4273 ...
##  $ expspecial: num [1:220] 7376 8574 8082 8181 7037 ...
##  $ expbil    : num [1:220] 0 0 0 0 0 ...
##  $ expocc    : num [1:220] 0 0 0 0 0 ...
##  $ exptot    : num [1:220] 4646 4930 4281 4826 4824 ...
##  $ scratio   : num [1:220] 16.6 5.7 7.5 8.6 6.1 ...
##  $ special   : num [1:220] 14.6 17.4 12.1 21.1 16.8 ...
##  $ lunch     : num [1:220] 11.8 2.5 14.1 12.1 17.4 ...
##  $ stratio   : num [1:220] 19 22.6 19.3 17.9 17.5 ...
##  $ income    : num [1:220] 16.4 25.8 14 16.1 15.4 ...
##  $ score4    : num [1:220] 714 731 704 704 701 714 725 717 702 701 ...
##  $ score8    : num [1:220] 691 NA 693 691 699 NA 728 715 705 688 ...
##  $ salary    : num [1:220] 34.4 38.1 32.5 33.1 34.4 ...
##  $ english   : num [1:220] 0 1.246 0 0.323 0 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   expreg = col_double(),
##   ..   expspecial = col_double(),
##   ..   expbil = col_double(),
##   ..   expocc = col_double(),
##   ..   exptot = col_double(),
##   ..   scratio = col_double(),
##   ..   special = col_double(),
##   ..   lunch = col_double(),
##   ..   stratio = col_double(),
##   ..   income = col_double(),
##   ..   score4 = col_double(),
##   ..   score8 = col_double(),
##   ..   salary = col_double(),
##   ..   english = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
View(se)
#summarize the dataset
summary(se)
##      expreg       expspecial        expbil           expocc          exptot    
##  Min.   :2905   Min.   : 3832   Min.   :     0   Min.   :    0   Min.   :3465  
##  1st Qu.:4065   1st Qu.: 7442   1st Qu.:     0   1st Qu.:    0   1st Qu.:4730  
##  Median :4488   Median : 8354   Median :     0   Median :    0   Median :5155  
##  Mean   :4605   Mean   : 8901   Mean   :  3037   Mean   : 1104   Mean   :5370  
##  3rd Qu.:4972   3rd Qu.: 9722   3rd Qu.:     0   3rd Qu.:    0   3rd Qu.:5789  
##  Max.   :8759   Max.   :53569   Max.   :295140   Max.   :15088   Max.   :9868  
##                                                                                
##     scratio          special          lunch          stratio     
##  Min.   : 2.300   Min.   : 8.10   Min.   : 0.40   Min.   :11.40  
##  1st Qu.: 6.100   1st Qu.:13.38   1st Qu.: 5.30   1st Qu.:15.80  
##  Median : 7.800   Median :15.45   Median :10.55   Median :17.10  
##  Mean   : 8.107   Mean   :15.97   Mean   :15.32   Mean   :17.34  
##  3rd Qu.: 9.800   3rd Qu.:17.93   3rd Qu.:20.02   3rd Qu.:19.02  
##  Max.   :18.400   Max.   :34.30   Max.   :76.20   Max.   :27.00  
##  NA's   :9                                                       
##      income           score4          score8          salary     
##  Min.   : 9.686   Min.   :658.0   Min.   :641.0   Min.   :24.96  
##  1st Qu.:15.223   1st Qu.:701.0   1st Qu.:685.0   1st Qu.:33.80  
##  Median :17.128   Median :711.0   Median :698.0   Median :35.88  
##  Mean   :18.747   Mean   :709.8   Mean   :698.4   Mean   :35.99  
##  3rd Qu.:20.376   3rd Qu.:720.0   3rd Qu.:712.0   3rd Qu.:37.96  
##  Max.   :46.855   Max.   :740.0   Max.   :747.0   Max.   :44.49  
##                                   NA's   :40      NA's   :25     
##     english       
##  Min.   : 0.0000  
##  1st Qu.: 0.0000  
##  Median : 0.0000  
##  Mean   : 1.1177  
##  3rd Qu.: 0.8859  
##  Max.   :24.4939  
## 
#display first 20 rows of data
head(se, n=20)
## # A tibble: 20 × 14
##    expreg expspecial expbil expocc exptot scratio special lunch stratio income
##     <dbl>      <dbl>  <dbl>  <dbl>  <dbl>   <dbl>   <dbl> <dbl>   <dbl>  <dbl>
##  1   4201      7376.      0      0   4646   16.6     14.6 11.8     19     16.4
##  2   4129      8574.      0      0   4930    5.70    17.4  2.5     22.6   25.8
##  3   3627      8082.      0      0   4281    7.5     12.1 14.1     19.3   14.0
##  4   4015      8181.      0      0   4826    8.60    21.1 12.1     17.9   16.1
##  5   4273      7037.      0      0   4824    6.10    16.8 17.4     17.5   15.4
##  6   5183     10596.   6235      0   6454    7.70    17.2 26.8     15.7   11.1
##  7   4685     12280.      0      0   5537    5.40    11.3  3.30    17.1   26.3
##  8   5518     10055.      0      0   6405    7.10    20.4 11.2     16.8   21.4
##  9   5009      8841.      0      0   5649   10.6     13.9  8.60    17.3   21.9
## 10   3823      9547.  12943  11519   4814    6.70    13.2 20.7     20.5   15.0
## 11   4625      8213.      0      0   5210   12.5     11.1 10.7     15.6   17.5
## 12   4777     11875.      0      0   5615    7.60    13.3 11.9     15.5   16.2
## 13   6554     12105.      0      0   7389    4.20    16.8 23.9     11.4   14.6
## 14   4484      9321.   1741      0   5323    8.80    14.9 18.7     17.6   17.4
## 15   6240     10993.      0      0   7234    4.80    17.2  2.90    14.8   24.6
## 16   4254      7516.   5281   1470   5048    9.40    20    8.10    19.6   15.9
## 17   5323      8725.      0      0   6065   15       15.1  6.5     17.3   26.8
## 18   3079      7755.      0      0   3930   13.8     19.6  5.60    20.1   14.6
## 19   4836     10220.      0      0   6121    6       16.6  7.5     14.9   19.1
## 20   4205      8288.      0      0   4961    9.80    17.3 15.3     17     18.4
## # ℹ 4 more variables: score4 <dbl>, score8 <dbl>, salary <dbl>, english <dbl>
#display the dimensions of the dataset
dim(se)
## [1] 220  14
#list types for each attribute
sapply(se, class)
##     expreg expspecial     expbil     expocc     exptot    scratio    special 
##  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"  "numeric" 
##      lunch    stratio     income     score4     score8     salary    english 
##  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"  "numeric"
# distribution of class variable
y <- se$expreg
cbind(freq=table(y), percentage=prop.table(table(y))*100)
##      freq percentage
## 2905    1  0.4545455
## 2956    1  0.4545455
## 3023    1  0.4545455
## 3079    1  0.4545455
## 3149    1  0.4545455
## 3287    1  0.4545455
## 3326    1  0.4545455
## 3333    1  0.4545455
## 3337    1  0.4545455
## 3400    1  0.4545455
## 3407    1  0.4545455
## 3465    1  0.4545455
## 3522    1  0.4545455
## 3533    1  0.4545455
## 3555    1  0.4545455
## 3580    1  0.4545455
## 3614    1  0.4545455
## 3627    1  0.4545455
## 3647    1  0.4545455
## 3679    2  0.9090909
## 3687    1  0.4545455
## 3693    1  0.4545455
## 3703    1  0.4545455
## 3704    1  0.4545455
## 3744    1  0.4545455
## 3747    1  0.4545455
## 3748    1  0.4545455
## 3795    1  0.4545455
## 3802    1  0.4545455
## 3823    2  0.9090909
## 3839    1  0.4545455
## 3845    1  0.4545455
## 3850    1  0.4545455
## 3858    1  0.4545455
## 3862    1  0.4545455
## 3867    1  0.4545455
## 3903    1  0.4545455
## 3905    1  0.4545455
## 3906    1  0.4545455
## 3908    1  0.4545455
## 3913    2  0.9090909
## 3928    1  0.4545455
## 3935    1  0.4545455
## 3969    1  0.4545455
## 3979    1  0.4545455
## 3986    1  0.4545455
## 4009    1  0.4545455
## 4013    1  0.4545455
## 4015    1  0.4545455
## 4016    1  0.4545455
## 4034    1  0.4545455
## 4062    1  0.4545455
## 4066    1  0.4545455
## 4071    1  0.4545455
## 4078    1  0.4545455
## 4079    1  0.4545455
## 4100    2  0.9090909
## 4105    1  0.4545455
## 4106    1  0.4545455
## 4122    1  0.4545455
## 4129    1  0.4545455
## 4134    1  0.4545455
## 4138    1  0.4545455
## 4150    1  0.4545455
## 4166    1  0.4545455
## 4177    1  0.4545455
## 4189    1  0.4545455
## 4201    1  0.4545455
## 4204    1  0.4545455
## 4205    2  0.9090909
## 4221    2  0.9090909
## 4234    1  0.4545455
## 4239    1  0.4545455
## 4240    1  0.4545455
## 4242    1  0.4545455
## 4247    1  0.4545455
## 4254    1  0.4545455
## 4255    1  0.4545455
## 4263    1  0.4545455
## 4267    1  0.4545455
## 4271    1  0.4545455
## 4273    1  0.4545455
## 4283    1  0.4545455
## 4287    1  0.4545455
## 4297    1  0.4545455
## 4312    2  0.9090909
## 4328    1  0.4545455
## 4344    1  0.4545455
## 4389    1  0.4545455
## 4398    1  0.4545455
## 4411    1  0.4545455
## 4413    1  0.4545455
## 4415    1  0.4545455
## 4436    1  0.4545455
## 4439    1  0.4545455
## 4440    1  0.4545455
## 4442    2  0.9090909
## 4447    1  0.4545455
## 4464    1  0.4545455
## 4465    1  0.4545455
## 4472    1  0.4545455
## 4484    1  0.4545455
## 4493    1  0.4545455
## 4494    1  0.4545455
## 4500    1  0.4545455
## 4506    1  0.4545455
## 4511    1  0.4545455
## 4512    2  0.9090909
## 4521    1  0.4545455
## 4529    1  0.4545455
## 4537    1  0.4545455
## 4539    1  0.4545455
## 4545    1  0.4545455
## 4557    1  0.4545455
## 4558    1  0.4545455
## 4562    1  0.4545455
## 4564    1  0.4545455
## 4566    1  0.4545455
## 4578    1  0.4545455
## 4579    1  0.4545455
## 4586    1  0.4545455
## 4598    1  0.4545455
## 4606    1  0.4545455
## 4609    1  0.4545455
## 4611    1  0.4545455
## 4617    1  0.4545455
## 4623    2  0.9090909
## 4625    1  0.4545455
## 4638    1  0.4545455
## 4640    1  0.4545455
## 4643    1  0.4545455
## 4660    1  0.4545455
## 4682    1  0.4545455
## 4685    1  0.4545455
## 4686    1  0.4545455
## 4707    1  0.4545455
## 4716    1  0.4545455
## 4754    1  0.4545455
## 4756    1  0.4545455
## 4764    1  0.4545455
## 4777    1  0.4545455
## 4793    1  0.4545455
## 4810    1  0.4545455
## 4821    1  0.4545455
## 4825    1  0.4545455
## 4832    1  0.4545455
## 4836    1  0.4545455
## 4859    2  0.9090909
## 4899    1  0.4545455
## 4922    1  0.4545455
## 4932    1  0.4545455
## 4945    1  0.4545455
## 4961    2  0.9090909
## 5004    1  0.4545455
## 5006    1  0.4545455
## 5009    1  0.4545455
## 5014    1  0.4545455
## 5048    1  0.4545455
## 5066    1  0.4545455
## 5087    1  0.4545455
## 5107    1  0.4545455
## 5146    1  0.4545455
## 5152    1  0.4545455
## 5183    1  0.4545455
## 5196    1  0.4545455
## 5242    1  0.4545455
## 5254    1  0.4545455
## 5273    1  0.4545455
## 5304    1  0.4545455
## 5323    1  0.4545455
## 5338    1  0.4545455
## 5345    1  0.4545455
## 5372    1  0.4545455
## 5384    1  0.4545455
## 5387    1  0.4545455
## 5396    1  0.4545455
## 5455    1  0.4545455
## 5457    1  0.4545455
## 5518    1  0.4545455
## 5548    1  0.4545455
## 5556    1  0.4545455
## 5558    1  0.4545455
## 5608    1  0.4545455
## 5613    1  0.4545455
## 5631    1  0.4545455
## 5690    1  0.4545455
## 5764    1  0.4545455
## 5770    1  0.4545455
## 5859    1  0.4545455
## 5896    1  0.4545455
## 5937    1  0.4545455
## 5978    1  0.4545455
## 6049    1  0.4545455
## 6152    1  0.4545455
## 6158    1  0.4545455
## 6240    1  0.4545455
## 6277    1  0.4545455
## 6337    1  0.4545455
## 6370    1  0.4545455
## 6415    1  0.4545455
## 6554    1  0.4545455
## 6672    2  0.9090909
## 6881    1  0.4545455
## 6902    1  0.4545455
## 7763    1  0.4545455
## 7944    1  0.4545455
## 8759    1  0.4545455
#calculate standard deviation for all attributes
sapply(se[,1:14], sd)
##       expreg   expspecial       expbil       expocc       exptot      scratio 
##   880.252436  3511.696142 20259.258149  2732.448792   977.040427           NA 
##      special        lunch      stratio       income       score4       score8 
##     3.538000    15.060068     2.276666     5.807637    15.126474           NA 
##       salary      english 
##           NA     2.900940
#calculate skewness for each variable
skew <- apply(se[,1:14], 2, skewness)
# display skewness, larger/smaller deviations from 0 show more skew
print(skew)
##     expreg expspecial     expbil     expocc     exptot    scratio    special 
##  1.2853907  9.4422682 13.6557024  2.6068895  1.3033470         NA  0.9859238 
##      lunch    stratio     income     score4     score8     salary    english 
##  1.9386044  0.3256516  1.7373340 -0.5278722         NA         NA  4.5268155
#calculate a correlation matrix for numeric variables
correlations <- cor(se[,1:8])
#display the correlation matrix
print(correlations)
##                   expreg  expspecial        expbil     expocc      exptot
## expreg      1.0000000000  0.40147653 -0.0001108855 0.06408180 0.965985748
## expspecial  0.4014765288  1.00000000 -0.0422537622 0.03031629 0.442009487
## expbil     -0.0001108855 -0.04225376  1.0000000000 0.05660895 0.001748274
## expocc      0.0640818035  0.03031629  0.0566089499 1.00000000 0.142213495
## exptot      0.9659857477  0.44200949  0.0017482739 0.14221350 1.000000000
## scratio               NA          NA            NA         NA          NA
## special     0.0071764034 -0.07084139  0.1278220698 0.09571385 0.055256770
## lunch      -0.0210092797  0.03905312  0.0628743969 0.52063374 0.070473676
##            scratio      special       lunch
## expreg          NA  0.007176403 -0.02100928
## expspecial      NA -0.070841393  0.03905312
## expbil          NA  0.127822070  0.06287440
## expocc          NA  0.095713851  0.52063374
## exptot          NA  0.055256770  0.07047368
## scratio          1           NA          NA
## special         NA  1.000000000  0.19377989
## lunch           NA  0.193779894  1.00000000

Removing null values

any(is.na(se))
## [1] TRUE
se<-na.omit(se)
str(se)
## tibble [155 × 14] (S3: tbl_df/tbl/data.frame)
##  $ expreg    : num [1:155] 4201 3627 4015 4273 4685 ...
##  $ expspecial: num [1:155] 7376 8082 8181 7037 12280 ...
##  $ expbil    : num [1:155] 0 0 0 0 0 ...
##  $ expocc    : num [1:155] 0 0 0 0 0 ...
##  $ exptot    : num [1:155] 4646 4281 4826 4824 5537 ...
##  $ scratio   : num [1:155] 16.6 7.5 8.6 6.1 5.4 ...
##  $ special   : num [1:155] 14.6 12.1 21.1 16.8 11.3 ...
##  $ lunch     : num [1:155] 11.8 14.1 12.1 17.4 3.3 ...
##  $ stratio   : num [1:155] 19 19.3 17.9 17.5 17.1 ...
##  $ income    : num [1:155] 16.4 14 16.1 15.4 26.3 ...
##  $ score4    : num [1:155] 714 704 704 701 725 717 702 701 713 703 ...
##  $ score8    : num [1:155] 691 693 691 699 728 715 705 688 703 680 ...
##  $ salary    : num [1:155] 34.4 32.5 33.1 34.4 41.6 ...
##  $ english   : num [1:155] 0 0 0.323 0 0 ...
##  - attr(*, "na.action")= 'omit' Named int [1:65] 2 6 12 19 21 24 25 26 28 29 ...
##   ..- attr(*, "names")= chr [1:65] "2" "6" "12" "19" ...
View(se)

Scale Data

#summarize data
summary(se[,1:4])
##      expreg       expspecial        expbil           expocc     
##  Min.   :3023   Min.   : 3832   Min.   :     0   Min.   :    0  
##  1st Qu.:4144   1st Qu.: 7421   1st Qu.:     0   1st Qu.:    0  
##  Median :4512   Median : 8288   Median :     0   Median :    0  
##  Mean   :4637   Mean   : 8592   Mean   :  4119   Mean   : 1318  
##  3rd Qu.:5008   3rd Qu.: 9540   3rd Qu.:  3628   3rd Qu.:    0  
##  Max.   :7944   Max.   :15741   Max.   :295140   Max.   :15088
#calculate the pre-process parameters from the dataset
preprocessParams <- preProcess(se[,1:4], method=c("scale"))
#summarize transform parameters
print(preprocessParams)
## Created from 155 samples and 4 variables
## 
## Pre-processing:
##   - ignored (0)
##   - scaled (4)
#transform the dataset using the parameters
transformed <- predict(preprocessParams, se[,1:4])
#summarize the transformed dataset
summary(transformed)
##      expreg         expspecial        expbil            expocc      
##  Min.   : 3.931   Min.   :2.242   Min.   : 0.0000   Min.   :0.0000  
##  1st Qu.: 5.389   1st Qu.:4.341   1st Qu.: 0.0000   1st Qu.:0.0000  
##  Median : 5.867   Median :4.849   Median : 0.0000   Median :0.0000  
##  Mean   : 6.030   Mean   :5.027   Mean   : 0.1712   Mean   :0.4537  
##  3rd Qu.: 6.512   3rd Qu.:5.581   3rd Qu.: 0.1508   3rd Qu.:0.0000  
##  Max.   :10.330   Max.   :9.208   Max.   :12.2714   Max.   :5.1929

Center Data

#calculate the pre-process parameters from the dataset
preprocessParams <- preProcess(se[,1:4], method=c("center"))
#summarize transform parameters
print(preprocessParams)
## Created from 155 samples and 4 variables
## 
## Pre-processing:
##   - centered (4)
##   - ignored (0)
#transform the dataset using the parameters
transformed <- predict(preprocessParams, se[,1:4])
#summarize the transformed dataset
summary(transformed)
##      expreg          expspecial          expbil             expocc     
##  Min.   :-1614.4   Min.   :-4760.0   Min.   : -4118.6   Min.   :-1318  
##  1st Qu.: -493.4   1st Qu.:-1171.6   1st Qu.: -4118.6   1st Qu.:-1318  
##  Median : -125.4   Median : -303.8   Median : -4118.6   Median :-1318  
##  Mean   :    0.0   Mean   :    0.0   Mean   :     0.0   Mean   :    0  
##  3rd Qu.:  370.1   3rd Qu.:  947.3   3rd Qu.:  -490.6   3rd Qu.:-1318  
##  Max.   : 3306.6   Max.   : 7148.4   Max.   :291021.4   Max.   :13770

Standardize Data

#calculate the pre-process parameters from the dataset
preprocessParams <- preProcess(se[,1:4], method=c("center", "scale"))
#summarize transform parameters
print(preprocessParams)
## Created from 155 samples and 4 variables
## 
## Pre-processing:
##   - centered (4)
##   - ignored (0)
##   - scaled (4)
#transform the dataset using the parameters
transformed <- predict(preprocessParams, se[,1:4])
#summarize the transformed dataset
summary(transformed)
##      expreg          expspecial          expbil            expocc       
##  Min.   :-2.0993   Min.   :-2.7847   Min.   :-0.1712   Min.   :-0.4537  
##  1st Qu.:-0.6416   1st Qu.:-0.6854   1st Qu.:-0.1712   1st Qu.:-0.4537  
##  Median :-0.1631   Median :-0.1777   Median :-0.1712   Median :-0.4537  
##  Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000   Mean   : 0.0000  
##  3rd Qu.: 0.4813   3rd Qu.: 0.5542   3rd Qu.:-0.0204   3rd Qu.:-0.4537  
##  Max.   : 4.2999   Max.   : 4.1819   Max.   :12.1002   Max.   : 4.7391

Normalize Data

#calculate the pre-process parameters from the dataset
preprocessParams <- preProcess(se[,1:4], method=c("range"))
#summarize transform parameters
print(preprocessParams)
## Created from 155 samples and 4 variables
## 
## Pre-processing:
##   - ignored (0)
##   - re-scaling to [0, 1] (4)
#transform the dataset using the parameters
transformed <- predict(preprocessParams, se[,1:4])
#summarize the transformed dataset
summary(transformed)
##      expreg         expspecial         expbil            expocc       
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.00000   Min.   :0.00000  
##  1st Qu.:0.2278   1st Qu.:0.3013   1st Qu.:0.00000   1st Qu.:0.00000  
##  Median :0.3026   Median :0.3742   Median :0.00000   Median :0.00000  
##  Mean   :0.3281   Mean   :0.3997   Mean   :0.01395   Mean   :0.08738  
##  3rd Qu.:0.4033   3rd Qu.:0.4793   3rd Qu.:0.01229   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.00000   Max.   :1.00000

K VALUE OF 6

Hierarchical Clustering

df_se<-as.data.frame(scale(se))
str(df_se)
## 'data.frame':    155 obs. of  14 variables:
##  $ expreg    : num  -0.5675 -1.3139 -0.8094 -0.4739 0.0619 ...
##  $ expspecial: num  -0.712 -0.299 -0.24 -0.91 2.157 ...
##  $ expbil    : num  -0.171 -0.171 -0.171 -0.171 -0.171 ...
##  $ expocc    : num  -0.454 -0.454 -0.454 -0.454 -0.454 ...
##  $ exptot    : num  -0.864 -1.288 -0.655 -0.657 0.171 ...
##  $ scratio   : num  3.173 -0.254 0.161 -0.781 -1.044 ...
##  $ special   : num  -0.415 -1.181 1.575 0.258 -1.426 ...
##  $ lunch     : num  -0.2955 -0.157 -0.2774 0.0418 -0.8074 ...
##  $ stratio   : num  0.7484 0.8861 0.2434 0.0598 -0.1238 ...
##  $ income    : num  -0.366 -0.801 -0.416 -0.544 1.483 ...
##  $ score4    : num  0.352 -0.27 -0.27 -0.456 1.036 ...
##  $ score8    : num  -0.309 -0.215 -0.309 0.064 1.415 ...
##  $ salary    : num  -0.561 -1.14 -0.949 -0.537 1.688 ...
##  $ english   : num  -0.413 -0.413 -0.314 -0.413 -0.413 ...
dist_mat<-dist(df_se, method='euclidean')
#View(dist_mat)
hclust_avg<-hclust(dist_mat, method='average')
plot(hclust_avg, hang=- 0.1, cex=0.3)

cut_avg<-cutree(hclust_avg, k=6)
plot(hclust_avg)
rect.hclust(hclust_avg , k = 6, border = 2:6)
abline(h = 6, col = "red")

avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 6)
plot(avg_col_dend)

K-Means Clustering

#setting a seed for the random number generator
set.seed (1234)
se3 <- kmeans(se, center=6, iter.max=200)
se3
## K-means clustering with 6 clusters of sizes 10, 16, 57, 49, 1, 22
## 
## Cluster means:
##     expreg expspecial       expbil     expocc   exptot  scratio  special
## 1 4688.700   8486.307  15791.10000  548.50000 5606.200 8.470000 17.46000
## 2 6035.875  11212.531   1442.93750   43.87500 6983.688 6.143750 15.08750
## 3 4138.702   7219.053    486.54386   77.52632 4742.246 8.303509 16.32105
## 4 4790.531   9078.824     35.53061  582.87755 5518.265 8.628571 15.01837
## 5 4239.000   6270.070 295140.00000    0.00000 4757.000 9.900000 20.70000
## 6 4566.091   9314.210   6035.18182 7508.18182 5551.545 8.086364 16.83636
##      lunch  stratio   income   score4   score8   salary   english
## 1 18.17000 17.16000 18.37410 706.3000 699.5000 37.63980 1.6255800
## 2 11.53750 15.39375 24.94112 719.3125 717.0000 39.36941 1.8215147
## 3 13.56316 17.71579 16.96451 709.1404 697.4035 35.36532 0.4777922
## 4 10.23469 16.97551 19.76882 713.2653 704.4490 36.08104 0.3375868
## 5 13.90000 17.60000 14.12200 699.0000 684.0000 35.82300 0.0000000
## 6 42.48636 18.87273 14.16468 688.6364 668.6818 35.46720 5.4283942
## 
## Clustering vector:
##   [1] 3 3 3 3 2 4 4 6 4 2 4 2 3 4 3 3 6 4 3 6 2 4 4 5 2 3 2 6 4 3 4 2 4 3 4 3 3
##  [38] 3 3 3 3 4 6 4 6 3 3 3 2 3 3 4 4 3 3 3 4 3 2 4 4 4 4 6 4 1 4 3 6 6 3 2 2 4
##  [75] 4 6 1 3 6 4 2 3 4 3 4 3 3 6 3 3 6 4 1 3 2 4 1 4 1 4 4 3 3 4 1 3 3 3 4 4 1
## [112] 1 6 4 3 6 4 4 1 4 3 3 6 4 3 3 6 4 6 4 3 4 3 6 3 3 4 4 3 3 1 4 2 3 3 4 6 3
## [149] 2 3 2 6 4 3 6
## 
## Within cluster sum of squares by cluster:
## [1] 882141509 166488539 192843723 167726291         0 456768965
##  (between_SS / total_SS =  98.0 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
se3$betweenss
## [1] 89170633404
table(se3$cluster, se$expreg)
##    
##     3023 3079 3287 3400 3627 3647 3679 3693 3703 3704 3744 3747 3802 3823 3839
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    1    1    1    1    1    1    1    1    1    1    1    1    1    0    1
##   4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0
##    
##     3850 3858 3862 3867 3903 3913 3928 3935 3969 3979 4009 4015 4034 4062 4071
##   1    1    0    0    0    0    0    1    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    1    1    1    1    1    0    0    1    1    0    1    1    0    0
##   4    0    0    0    0    0    0    0    1    0    0    0    0    0    1    1
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0
##    
##     4079 4100 4105 4122 4134 4138 4150 4166 4177 4189 4201 4204 4205 4221 4234
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    1    1    1    1    1    0    0    1    1    1    1    1    1    0
##   4    0    1    0    0    0    0    1    1    0    0    0    0    0    1    1
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0
##    
##     4239 4240 4242 4247 4254 4255 4267 4273 4283 4287 4297 4312 4344 4389 4411
##   1    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    1
##   3    0    0    1    0    1    0    0    1    0    1    0    1    0    1    0
##   4    0    0    0    1    0    1    1    0    0    0    1    0    1    0    0
##   5    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0
##    
##     4413 4415 4439 4440 4442 4447 4465 4472 4484 4494 4500 4512 4521 4529 4539
##   1    1    0    0    0    0    0    0    0    0    0    0    1    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    1    0    1    0    0    1    1    0    0    0    0    0    0    0
##   4    0    0    0    0    1    1    0    0    1    0    1    1    0    1    1
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    1    0    0    0    0    0    0    1    0    0    1    0    0
##    
##     4545 4557 4562 4564 4579 4586 4598 4606 4609 4611 4617 4623 4625 4638 4643
##   1    0    0    0    1    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    0    1    0    1    1    0    1    0    1    0    1    0    0    1
##   4    0    1    0    0    0    0    0    0    1    0    0    1    1    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    1    0    0    0    0    0    1    0    0    0    1    0    0    1    0
##    
##     4682 4685 4686 4707 4716 4754 4756 4764 4793 4810 4821 4859 4899 4922 4932
##   1    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0
##   2    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    1    0    0    0    0    0    1    0    1    1    0    0    0    1    0
##   4    0    0    1    0    0    0    0    0    0    0    1    0    1    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    1    1    1    0    0    0    0    0    1    0    0    1
##    
##     4961 5004 5006 5009 5014 5048 5066 5107 5146 5152 5196 5242 5254 5273 5304
##   1    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0
##   3    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0
##   4    2    1    1    1    0    1    1    0    0    1    0    1    1    1    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    1    0    0    0    0    0    1
##    
##     5323 5338 5345 5372 5387 5396 5457 5518 5548 5558 5608 5613 5690 5764 5770
##   1    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0
##   2    0    0    0    0    0    0    0    0    0    1    1    0    0    0    1
##   3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   4    1    1    1    1    1    1    1    1    1    0    0    0    1    1    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##    
##     5896 5937 5978 6049 6158 6240 6337 6415 6554 6881 6902 7944
##   1    1    0    0    0    0    0    0    0    0    0    0    0
##   2    0    1    1    0    1    1    1    1    1    1    1    1
##   3    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    0    0    0    0    0    0    0    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    1    0    0    0    0    0    0    0    0
cm<- table(se3$cluster, se$expreg)
1-sum(diag(cm))/sum(cm)
## [1] 0.9935484

Silhouette coefficient

s <- silhouette(se3$cluster, dist(se))
plot(s)

sol <- pamk(se, krange=2:10, criterion="asw", usepam=TRUE)
sol
## $pamobject
## Medoids:
##      ID expreg expspecial expbil expocc exptot scratio special lunch stratio
## [1,] 18   4512    8280.89      0      0   5203    15.5    12.2  21.5    18.7
## [2,] 24   4239    6270.07 295140      0   4757     9.9    20.7  13.9    17.6
##      income score4 score8 salary english
## [1,] 14.962    702    685 36.670       0
## [2,] 14.122    699    684 35.823       0
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1
## Objective function:
##    build     swap 
## 4181.385 4181.385 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.9717478 0.5807691 0.6034074 0.5934414 0.3885390 0.3211006
##  [8] 0.3412241 0.3506747 0.3479333
set.seed(1234)
d <- dist(scale(se))
methds <- c('complete', 'single', 'average')
avgS <- matrix(NA, ncol=3, nrow=5,
               dimnames=list(2:6, methds))
for(k in 2:6)
  for(m in seq_along(methds)) {
    h <- hclust(d, meth=methds[m])
    c <- cutree(h, k)
    s <- silhouette(c, d)
    avgS[k-1, m] <- mean(s[ , 3])
  }
avgS
##    complete    single   average
## 2 0.6321872 0.6321872 0.6321872
## 3 0.2442047 0.4808063 0.5030745
## 4 0.2536492 0.4806255 0.4188783
## 5 0.2309225 0.4044959 0.4044959
## 6 0.2060003 0.2295902 0.2797655

Dunn’s coefficient

dunn(dist_mat,cut_avg)
## [1] 0.2443944

K VALUE OF 4

Hierarchical Clustering

df_se<-as.data.frame(scale(se))
str(df_se)
## 'data.frame':    155 obs. of  14 variables:
##  $ expreg    : num  -0.5675 -1.3139 -0.8094 -0.4739 0.0619 ...
##  $ expspecial: num  -0.712 -0.299 -0.24 -0.91 2.157 ...
##  $ expbil    : num  -0.171 -0.171 -0.171 -0.171 -0.171 ...
##  $ expocc    : num  -0.454 -0.454 -0.454 -0.454 -0.454 ...
##  $ exptot    : num  -0.864 -1.288 -0.655 -0.657 0.171 ...
##  $ scratio   : num  3.173 -0.254 0.161 -0.781 -1.044 ...
##  $ special   : num  -0.415 -1.181 1.575 0.258 -1.426 ...
##  $ lunch     : num  -0.2955 -0.157 -0.2774 0.0418 -0.8074 ...
##  $ stratio   : num  0.7484 0.8861 0.2434 0.0598 -0.1238 ...
##  $ income    : num  -0.366 -0.801 -0.416 -0.544 1.483 ...
##  $ score4    : num  0.352 -0.27 -0.27 -0.456 1.036 ...
##  $ score8    : num  -0.309 -0.215 -0.309 0.064 1.415 ...
##  $ salary    : num  -0.561 -1.14 -0.949 -0.537 1.688 ...
##  $ english   : num  -0.413 -0.413 -0.314 -0.413 -0.413 ...
dist_mat<-dist(df_se, method='euclidean')
#View(dist_mat)
hclust_avg<-hclust(dist_mat, method='average')
plot(hclust_avg, hang=- 0.1, cex=0.3)

cut_avg<-cutree(hclust_avg, k=4)
plot(hclust_avg)
rect.hclust(hclust_avg , k = 4, border = 2:6)
abline(h = 4, col = "red")

avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 4)
plot(avg_col_dend)

K-Means Clustering

#setting a seed for the random number generator
set.seed (1234)
se3 <- kmeans(se, center=4, iter.max=200)
se3
## K-means clustering with 4 clusters of sizes 1, 33, 77, 44
## 
## Cluster means:
##     expreg expspecial      expbil     expocc   exptot  scratio  special
## 1 4239.000   6270.070 295140.0000    0.00000 4757.000 9.900000 20.70000
## 2 4686.606   9167.305   9481.6667 4692.48485 5650.818 8.003030 17.04848
## 3 4257.701   7512.618    264.0130  594.41558 4889.636 8.429870 16.16883
## 4 5274.000  10102.999    227.7727   84.65909 6083.159 7.813636 14.65909
##      lunch  stratio   income   score4   score8   salary   english
## 1 13.90000 17.60000 14.12200 699.0000 684.0000 35.82300 0.0000000
## 2 34.86364 18.31818 15.95339 694.9091 679.9091 36.38385 4.4629281
## 3 13.45065 17.49481 17.04842 709.1429 696.8312 35.24503 0.3890154
## 4  8.85000 16.43409 22.52002 717.2045 712.6136 37.63288 0.7129501
## 
## Clustering vector:
##   [1] 3 3 3 3 4 4 4 2 3 4 4 4 3 4 3 3 2 3 3 2 2 4 3 1 4 3 4 2 3 2 4 4 4 3 3 3 3
##  [38] 3 3 3 3 4 2 4 2 3 3 3 2 3 3 3 3 3 3 3 4 3 4 4 4 4 4 2 3 2 3 3 2 3 3 4 4 4
##  [75] 4 2 2 3 2 4 4 3 4 3 3 3 3 2 3 3 2 3 2 3 4 4 2 4 2 3 4 3 3 4 2 3 3 3 4 4 2
## [112] 2 2 3 3 2 4 4 2 3 3 3 2 4 3 3 2 4 2 4 3 3 3 2 3 3 3 3 3 3 2 3 4 3 3 4 2 3
## [149] 4 3 4 3 3 3 2
## 
## Within cluster sum of squares by cluster:
## [1]          0 2242470845  399972756  177871627
##  (between_SS / total_SS =  96.9 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
se3$betweenss
## [1] 88216287203
table(se3$cluster, se$expreg)
##    
##     3023 3079 3287 3400 3627 3647 3679 3693 3703 3704 3744 3747 3802 3823 3839
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0
##   3    1    1    1    1    1    1    1    1    1    1    1    1    1    0    1
##   4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##    
##     3850 3858 3862 3867 3903 3913 3928 3935 3969 3979 4009 4015 4034 4062 4071
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    0    0    0    0    1    1    0    0    0    1    0    0    0    0
##   3    0    1    1    1    1    1    0    0    1    1    0    1    1    1    0
##   4    0    0    0    0    0    0    0    1    0    0    0    0    0    0    1
##    
##     4079 4100 4105 4122 4134 4138 4150 4166 4177 4189 4201 4204 4205 4221 4234
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0
##   3    0    2    1    1    1    1    1    0    1    1    1    1    1    1    1
##   4    0    0    0    0    0    0    0    1    0    0    0    0    0    1    0
##    
##     4239 4240 4242 4247 4254 4255 4267 4273 4283 4287 4297 4312 4344 4389 4411
##   1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0
##   3    0    0    1    0    1    1    0    1    0    1    0    1    0    1    0
##   4    0    0    0    1    0    0    1    0    0    0    1    0    1    0    1
##    
##     4413 4415 4439 4440 4442 4447 4465 4472 4484 4494 4500 4512 4521 4529 4539
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    1    0    0    0    0    0    0    0    1    0    1    1    0    0
##   3    0    0    1    1    1    1    1    1    0    0    1    1    0    1    1
##   4    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0
##    
##     4545 4557 4562 4564 4579 4586 4598 4606 4609 4611 4617 4623 4625 4638 4643
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    0    0    1    0    0    1    0    0    0    1    0    0    1    0
##   3    0    1    1    0    1    1    0    1    0    1    0    1    1    0    1
##   4    0    0    0    0    0    0    0    0    1    0    0    1    0    0    0
##    
##     4682 4685 4686 4707 4716 4754 4756 4764 4793 4810 4821 4859 4899 4922 4932
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    1    1    0    0    1    0    0    0    1    0    0    1
##   3    1    0    1    0    0    1    1    0    1    1    1    0    1    1    0
##   4    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0
##    
##     4961 5004 5006 5009 5014 5048 5066 5107 5146 5152 5196 5242 5254 5273 5304
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    1    1    0    0    0    0    0    1
##   3    0    0    1    0    1    0    0    0    0    0    0    0    0    0    0
##   4    2    1    0    1    0    1    1    0    0    1    1    1    1    1    0
##    
##     5323 5338 5345 5372 5387 5396 5457 5518 5548 5558 5608 5613 5690 5764 5770
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0
##   3    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0
##   4    1    0    1    1    1    1    0    1    1    1    1    0    1    1    1
##    
##     5896 5937 5978 6049 6158 6240 6337 6415 6554 6881 6902 7944
##   1    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    1    0    1    1    0    0    0    0    0    0    0
##   3    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    1    0    0    1    1    1    1    1    1    1
cm<- table(se3$cluster, se$expreg)
1-sum(diag(cm))/sum(cm)
## [1] 0.9935484

Silhouette coefficient

s <- silhouette(se3$cluster, dist(se))
plot(s)

sol <- pamk(se, krange=2:10, criterion="asw", usepam=TRUE)
sol
## $pamobject
## Medoids:
##      ID expreg expspecial expbil expocc exptot scratio special lunch stratio
## [1,] 18   4512    8280.89      0      0   5203    15.5    12.2  21.5    18.7
## [2,] 24   4239    6270.07 295140      0   4757     9.9    20.7  13.9    17.6
##      income score4 score8 salary english
## [1,] 14.962    702    685 36.670       0
## [2,] 14.122    699    684 35.823       0
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1
## Objective function:
##    build     swap 
## 4181.385 4181.385 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.9717478 0.5807691 0.6034074 0.5934414 0.3885390 0.3211006
##  [8] 0.3412241 0.3506747 0.3479333
set.seed(1234)
d <- dist(scale(se))
methds <- c('complete', 'single', 'average')
avgS <- matrix(NA, ncol=3, nrow=5,
               dimnames=list(2:6, methds))
for(k in 2:6)
  for(m in seq_along(methds)) {
    h <- hclust(d, meth=methds[m])
    c <- cutree(h, k)
    s <- silhouette(c, d)
    avgS[k-1, m] <- mean(s[ , 3])
  }
avgS
##    complete    single   average
## 2 0.6321872 0.6321872 0.6321872
## 3 0.2442047 0.4808063 0.5030745
## 4 0.2536492 0.4806255 0.4188783
## 5 0.2309225 0.4044959 0.4044959
## 6 0.2060003 0.2295902 0.2797655

Dunn’s coefficient

dunn(dist_mat,cut_avg)
## [1] 0.377325

K VALUE OF 3

Hierarchical Clustering

df_se<-as.data.frame(scale(se))
str(df_se)
## 'data.frame':    155 obs. of  14 variables:
##  $ expreg    : num  -0.5675 -1.3139 -0.8094 -0.4739 0.0619 ...
##  $ expspecial: num  -0.712 -0.299 -0.24 -0.91 2.157 ...
##  $ expbil    : num  -0.171 -0.171 -0.171 -0.171 -0.171 ...
##  $ expocc    : num  -0.454 -0.454 -0.454 -0.454 -0.454 ...
##  $ exptot    : num  -0.864 -1.288 -0.655 -0.657 0.171 ...
##  $ scratio   : num  3.173 -0.254 0.161 -0.781 -1.044 ...
##  $ special   : num  -0.415 -1.181 1.575 0.258 -1.426 ...
##  $ lunch     : num  -0.2955 -0.157 -0.2774 0.0418 -0.8074 ...
##  $ stratio   : num  0.7484 0.8861 0.2434 0.0598 -0.1238 ...
##  $ income    : num  -0.366 -0.801 -0.416 -0.544 1.483 ...
##  $ score4    : num  0.352 -0.27 -0.27 -0.456 1.036 ...
##  $ score8    : num  -0.309 -0.215 -0.309 0.064 1.415 ...
##  $ salary    : num  -0.561 -1.14 -0.949 -0.537 1.688 ...
##  $ english   : num  -0.413 -0.413 -0.314 -0.413 -0.413 ...
dist_mat<-dist(df_se, method='euclidean')
#View(dist_mat)
hclust_avg<-hclust(dist_mat, method='average')
plot(hclust_avg, hang=- 0.1, cex=0.3)

cut_avg<-cutree(hclust_avg, k=3)
plot(hclust_avg)
rect.hclust(hclust_avg , k = 3, border = 2:6)
abline(h = 3, col = "red")

avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 3)
plot(avg_col_dend)

K-Means Clustering

#setting a seed for the random number generator
set.seed (1234)
se3 <- kmeans(se, center=3, iter.max=200)
se3
## K-means clustering with 3 clusters of sizes 1, 33, 121
## 
## Cluster means:
##     expreg expspecial      expbil    expocc   exptot  scratio  special    lunch
## 1 4239.000   6270.070 295140.0000    0.0000 4757.000 9.900000 20.70000 13.90000
## 2 4686.606   9167.305   9481.6667 4692.4848 5650.818 8.003030 17.04848 34.86364
## 3 4627.264   8454.575    250.8347  409.0496 5323.645 8.205785 15.61983 11.77769
##    stratio   income   score4   score8   salary   english
## 1 17.60000 14.12200 699.0000 684.0000 35.82300 0.0000000
## 2 18.31818 15.95339 694.9091 679.9091 36.38385 4.4629281
## 3 17.10909 19.03809 712.0744 702.5702 36.11334 0.5068098
## 
## Clustering vector:
##   [1] 3 3 3 3 3 3 3 2 3 3 3 3 3 3 3 3 2 3 3 2 2 3 3 1 3 3 3 2 3 2 3 3 3 3 3 3 3
##  [38] 3 3 3 3 3 2 3 2 3 3 3 2 3 3 3 3 3 3 3 3 3 3 3 3 3 3 2 3 2 3 3 2 3 3 3 3 3
##  [75] 3 2 2 3 2 3 3 3 3 3 3 3 3 2 3 3 2 3 2 3 3 3 2 3 2 3 3 3 3 3 2 3 3 3 3 3 2
## [112] 2 2 3 3 2 3 3 2 3 3 3 2 3 3 3 2 3 2 3 3 3 3 2 3 3 3 3 3 3 2 3 3 3 3 3 2 3
## [149] 3 3 3 3 3 3 2
## 
## Within cluster sum of squares by cluster:
## [1]          0 2242470845  841855740
##  (between_SS / total_SS =  96.6 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
se3$betweenss
## [1] 87952275845
table(se3$cluster, se$expreg)
##    
##     3023 3079 3287 3400 3627 3647 3679 3693 3703 3704 3744 3747 3802 3823 3839
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0
##   3    1    1    1    1    1    1    1    1    1    1    1    1    1    0    1
##    
##     3850 3858 3862 3867 3903 3913 3928 3935 3969 3979 4009 4015 4034 4062 4071
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    0    0    0    0    1    1    0    0    0    1    0    0    0    0
##   3    0    1    1    1    1    1    0    1    1    1    0    1    1    1    1
##    
##     4079 4100 4105 4122 4134 4138 4150 4166 4177 4189 4201 4204 4205 4221 4234
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0
##   3    0    2    1    1    1    1    1    1    1    1    1    1    1    2    1
##    
##     4239 4240 4242 4247 4254 4255 4267 4273 4283 4287 4297 4312 4344 4389 4411
##   1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    1    0    0    0    0    0    0    1    0    0    0    0    0    0
##   3    0    0    1    1    1    1    1    1    0    1    1    1    1    1    1
##    
##     4413 4415 4439 4440 4442 4447 4465 4472 4484 4494 4500 4512 4521 4529 4539
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    1    0    0    0    0    0    0    0    1    0    1    1    0    0
##   3    0    0    1    1    1    1    1    1    1    0    1    1    0    1    1
##    
##     4545 4557 4562 4564 4579 4586 4598 4606 4609 4611 4617 4623 4625 4638 4643
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    0    0    1    0    0    1    0    0    0    1    0    0    1    0
##   3    0    1    1    0    1    1    0    1    1    1    0    2    1    0    1
##    
##     4682 4685 4686 4707 4716 4754 4756 4764 4793 4810 4821 4859 4899 4922 4932
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    1    1    0    0    1    0    0    0    1    0    0    1
##   3    1    1    1    0    0    1    1    0    1    1    1    0    1    1    0
##    
##     4961 5004 5006 5009 5014 5048 5066 5107 5146 5152 5196 5242 5254 5273 5304
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    1    1    0    0    0    0    0    1
##   3    2    1    1    1    1    1    1    0    0    1    1    1    1    1    0
##    
##     5323 5338 5345 5372 5387 5396 5457 5518 5548 5558 5608 5613 5690 5764 5770
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0
##   3    1    1    1    1    1    1    1    1    1    1    1    0    1    1    1
##    
##     5896 5937 5978 6049 6158 6240 6337 6415 6554 6881 6902 7944
##   1    0    0    0    0    0    0    0    0    0    0    0    0
##   2    1    1    0    1    1    0    0    0    0    0    0    0
##   3    0    0    1    0    0    1    1    1    1    1    1    1
cm<- table(se3$cluster, se$expreg)
1-sum(diag(cm))/sum(cm)
## [1] 0.9935484

Silhouette coefficient

s <- silhouette(se3$cluster, dist(se))
plot(s)

sol <- pamk(se, krange=2:10, criterion="asw", usepam=TRUE)
sol
## $pamobject
## Medoids:
##      ID expreg expspecial expbil expocc exptot scratio special lunch stratio
## [1,] 18   4512    8280.89      0      0   5203    15.5    12.2  21.5    18.7
## [2,] 24   4239    6270.07 295140      0   4757     9.9    20.7  13.9    17.6
##      income score4 score8 salary english
## [1,] 14.962    702    685 36.670       0
## [2,] 14.122    699    684 35.823       0
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1
## Objective function:
##    build     swap 
## 4181.385 4181.385 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.9717478 0.5807691 0.6034074 0.5934414 0.3885390 0.3211006
##  [8] 0.3412241 0.3506747 0.3479333
set.seed(1234)
d <- dist(scale(se))
methds <- c('complete', 'single', 'average')
avgS <- matrix(NA, ncol=3, nrow=5,
               dimnames=list(2:6, methds))
for(k in 2:6)
  for(m in seq_along(methds)) {
    h <- hclust(d, meth=methds[m])
    c <- cutree(h, k)
    s <- silhouette(c, d)
    avgS[k-1, m] <- mean(s[ , 3])
  }
avgS
##    complete    single   average
## 2 0.6321872 0.6321872 0.6321872
## 3 0.2442047 0.4808063 0.5030745
## 4 0.2536492 0.4806255 0.4188783
## 5 0.2309225 0.4044959 0.4044959
## 6 0.2060003 0.2295902 0.2797655

#Dunn’s coefficient

dunn(dist_mat,cut_avg)
## [1] 0.3876777

K VALUE OF 8

Hierarchical Clustering

df_se<-as.data.frame(scale(se))
str(df_se)
## 'data.frame':    155 obs. of  14 variables:
##  $ expreg    : num  -0.5675 -1.3139 -0.8094 -0.4739 0.0619 ...
##  $ expspecial: num  -0.712 -0.299 -0.24 -0.91 2.157 ...
##  $ expbil    : num  -0.171 -0.171 -0.171 -0.171 -0.171 ...
##  $ expocc    : num  -0.454 -0.454 -0.454 -0.454 -0.454 ...
##  $ exptot    : num  -0.864 -1.288 -0.655 -0.657 0.171 ...
##  $ scratio   : num  3.173 -0.254 0.161 -0.781 -1.044 ...
##  $ special   : num  -0.415 -1.181 1.575 0.258 -1.426 ...
##  $ lunch     : num  -0.2955 -0.157 -0.2774 0.0418 -0.8074 ...
##  $ stratio   : num  0.7484 0.8861 0.2434 0.0598 -0.1238 ...
##  $ income    : num  -0.366 -0.801 -0.416 -0.544 1.483 ...
##  $ score4    : num  0.352 -0.27 -0.27 -0.456 1.036 ...
##  $ score8    : num  -0.309 -0.215 -0.309 0.064 1.415 ...
##  $ salary    : num  -0.561 -1.14 -0.949 -0.537 1.688 ...
##  $ english   : num  -0.413 -0.413 -0.314 -0.413 -0.413 ...
dist_mat<-dist(df_se, method='euclidean')
#View(dist_mat)
hclust_avg<-hclust(dist_mat, method='average')
plot(hclust_avg, hang=- 0.1, cex=0.3)

cut_avg<-cutree(hclust_avg, k=8)
plot(hclust_avg)
rect.hclust(hclust_avg , k = 8, border = 2:6)
abline(h = 8, col = "red")

avg_dend_obj <- as.dendrogram(hclust_avg)
avg_col_dend <- color_branches(avg_dend_obj, h = 8)
plot(avg_col_dend)

K-Means Clustering

#setting a seed for the random number generator
set.seed (1234)
se3 <- kmeans(se, center=8, iter.max=200)
se3
## K-means clustering with 8 clusters of sizes 3, 13, 8, 41, 1, 19, 14, 56
## 
## Cluster means:
##     expreg expspecial       expbil     expocc   exptot  scratio  special
## 1 5307.333   9249.610  29109.33333 1828.33333 6305.333 6.266667 17.93333
## 2 6159.077  11309.758    267.53846    0.00000 7119.000 5.876923 14.86154
## 3 4689.625   8294.774      0.00000 5353.75000 5360.000 7.375000 16.57500
## 4 4809.683   9223.730     42.46341    0.00000 5545.902 8.736585 14.70488
## 5 4239.000   6270.070 295140.00000    0.00000 4757.000 9.900000 20.70000
## 6 4564.000   9447.155   6643.84211 7669.15789 5564.579 8.078947 16.56316
## 7 4633.857   8724.561   8208.42857  365.92857 5520.429 8.642857 17.22857
## 8 4147.554   7224.607    170.50000   92.76786 4748.214 8.394643 16.32500
##       lunch  stratio   income   score4   score8   salary   english
## 1  9.133333 16.63333 23.43200 717.6667 717.6667 40.39317 1.2555644
## 2  6.253846 14.96923 26.00369 723.3077 723.3846 39.60015 0.6455729
## 3 15.200000 16.73750 17.44413 710.6250 694.3750 34.29319 0.1849637
## 4  9.329268 16.99268 20.26588 714.1951 706.6341 36.37010 0.3415171
## 5 13.900000 17.60000 14.12200 699.0000 684.0000 35.82300 0.0000000
## 6 44.921053 19.32105 13.92363 686.0526 666.7368 35.48597 6.1129774
## 7 28.114286 17.66429 16.51321 700.0714 687.0000 36.40754 3.3531593
## 8 12.780357 17.59286 17.06320 709.5357 697.8214 35.44538 0.3190851
## 
## Clustering vector:
##   [1] 8 8 8 8 2 4 4 6 4 2 4 2 7 4 8 8 6 4 8 6 7 3 4 5 2 8 7 6 4 7 4 2 4 8 4 8 8
##  [38] 8 8 8 8 4 6 4 6 8 7 8 7 8 8 4 3 8 8 8 4 8 2 4 4 4 4 6 4 7 3 8 6 3 8 2 2 4
##  [75] 4 6 7 8 6 4 2 8 4 8 3 8 8 6 8 8 6 3 7 8 2 4 7 4 1 4 4 8 8 4 7 8 8 8 4 4 7
## [112] 1 6 4 8 6 4 4 7 4 8 8 6 4 8 8 6 4 6 4 8 4 8 6 8 8 3 4 8 8 1 8 2 8 8 4 6 8
## [149] 2 8 2 3 8 8 7
## 
## Within cluster sum of squares by cluster:
## [1]  72137725  46418519  32474772  49023226         0 348893454 145712315
## [8]  95398513
##  (between_SS / total_SS =  99.1 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
se3$betweenss
## [1] 90246543907
table(se3$cluster, se$expreg)
##    
##     3023 3079 3287 3400 3627 3647 3679 3693 3703 3704 3744 3747 3802 3823 3839
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    0    0    0    0    0    2    0
##   7    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   8    1    1    1    1    1    1    1    1    1    1    1    1    1    0    1
##    
##     3850 3858 3862 3867 3903 3913 3928 3935 3969 3979 4009 4015 4034 4062 4071
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    0    0    0    0    1    0    0    0    0    0    1    1
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0
##   7    1    0    0    0    0    0    1    0    0    0    0    0    0    0    0
##   8    0    1    1    1    1    1    0    0    1    1    0    1    1    0    0
##    
##     4079 4100 4105 4122 4134 4138 4150 4166 4177 4189 4201 4204 4205 4221 4234
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    1    0    0    0    0    1    0    0    0    0    0    0    0    0
##   4    0    0    0    0    0    0    0    1    0    0    0    0    0    1    1
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    1    0    0    0    0    0    0    0    0    0    0    0    1    0    0
##   7    0    0    0    0    0    0    0    0    0    1    0    0    0    0    0
##   8    0    1    1    1    1    1    0    0    1    0    1    1    1    1    0
##    
##     4239 4240 4242 4247 4254 4255 4267 4273 4283 4287 4297 4312 4344 4389 4411
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    1    0    1    1    0    0    0    1    0    1    0    0
##   5    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    1    0    0    0    0    0    0
##   7    0    1    0    0    1    0    0    0    0    0    0    0    0    0    1
##   8    0    0    1    0    0    0    0    1    0    1    0    1    0    1    0
##    
##     4413 4415 4439 4440 4442 4447 4465 4472 4484 4494 4500 4512 4521 4529 4539
##   1    1    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    0    1    1    0    0    1    0    1    1    0    0    1
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    0    1    0    0    1    0    0
##   7    0    1    0    0    0    0    0    0    0    0    0    1    0    0    0
##   8    0    0    0    1    0    0    1    1    0    0    0    0    0    1    0
##    
##     4545 4557 4562 4564 4579 4586 4598 4606 4609 4611 4617 4623 4625 4638 4643
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    0    0    0    0    0    1    0    0    1    1    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    1    0    0    0    1    0    0    1    0
##   7    1    0    0    1    0    0    0    0    0    0    0    0    0    0    0
##   8    0    0    1    0    1    1    0    1    0    1    0    1    0    0    1
##    
##     4682 4685 4686 4707 4716 4754 4756 4764 4793 4810 4821 4859 4899 4922 4932
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    1    0    0    0    0    0    0    0    0    0    0    0    0    0
##   3    0    0    0    0    0    1    0    0    0    0    1    0    0    0    0
##   4    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    1    1    0    0    0    0    0    0    1    0    0    1
##   7    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0
##   8    1    0    1    0    0    0    1    0    1    1    0    0    0    1    0
##    
##     4961 5004 5006 5009 5014 5048 5066 5107 5146 5152 5196 5242 5254 5273 5304
##   1    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    0    0    0    0    0    0    0    0    1    0    0    0    0
##   3    0    0    1    0    0    0    0    0    0    0    0    0    0    0    0
##   4    2    1    0    1    0    1    1    0    0    1    0    1    1    1    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    1    0    0    0    0    0    1
##   7    0    0    0    0    0    0    0    1    0    0    0    0    0    0    0
##   8    0    0    0    0    1    0    0    0    0    0    0    0    0    0    0
##    
##     5323 5338 5345 5372 5387 5396 5457 5518 5548 5558 5608 5613 5690 5764 5770
##   1    0    0    0    0    0    0    0    0    0    0    0    1    0    0    0
##   2    0    0    0    0    0    0    0    0    0    1    1    0    0    0    1
##   3    0    0    0    0    0    0    0    0    0    0    0    0    1    0    0
##   4    1    1    1    1    1    1    1    1    1    0    0    0    0    1    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   7    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##   8    0    0    0    0    0    0    0    0    0    0    0    0    0    0    0
##    
##     5896 5937 5978 6049 6158 6240 6337 6415 6554 6881 6902 7944
##   1    1    0    0    0    0    0    0    0    0    0    0    0
##   2    0    0    1    0    0    1    1    1    1    1    1    1
##   3    0    0    0    0    0    0    0    0    0    0    0    0
##   4    0    0    0    0    0    0    0    0    0    0    0    0
##   5    0    0    0    0    0    0    0    0    0    0    0    0
##   6    0    0    0    1    0    0    0    0    0    0    0    0
##   7    0    1    0    0    1    0    0    0    0    0    0    0
##   8    0    0    0    0    0    0    0    0    0    0    0    0
cm<- table(se3$cluster, se$expreg)
1-sum(diag(cm))/sum(cm)
## [1] 0.9935484

Silhouette coefficient

s <- silhouette(se3$cluster, dist(se))
plot(s)

sol <- pamk(se, krange=2:10, criterion="asw", usepam=TRUE)
sol
## $pamobject
## Medoids:
##      ID expreg expspecial expbil expocc exptot scratio special lunch stratio
## [1,] 18   4512    8280.89      0      0   5203    15.5    12.2  21.5    18.7
## [2,] 24   4239    6270.07 295140      0   4757     9.9    20.7  13.9    17.6
##      income score4 score8 salary english
## [1,] 14.962    702    685 36.670       0
## [2,] 14.122    699    684 35.823       0
## Clustering vector:
##   [1] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [38] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
##  [75] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [112] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## [149] 1 1 1 1 1 1 1
## Objective function:
##    build     swap 
## 4181.385 4181.385 
## 
## Available components:
##  [1] "medoids"    "id.med"     "clustering" "objective"  "isolation" 
##  [6] "clusinfo"   "silinfo"    "diss"       "call"       "data"      
## 
## $nc
## [1] 2
## 
## $crit
##  [1] 0.0000000 0.9717478 0.5807691 0.6034074 0.5934414 0.3885390 0.3211006
##  [8] 0.3412241 0.3506747 0.3479333
set.seed(1234)
d <- dist(scale(se))
methds <- c('complete', 'single', 'average')
avgS <- matrix(NA, ncol=3, nrow=5,
               dimnames=list(2:6, methds))
for(k in 2:6)
  for(m in seq_along(methds)) {
    h <- hclust(d, meth=methds[m])
    c <- cutree(h, k)
    s <- silhouette(c, d)
    avgS[k-1, m] <- mean(s[ , 3])
  }
avgS
##    complete    single   average
## 2 0.6321872 0.6321872 0.6321872
## 3 0.2442047 0.4808063 0.5030745
## 4 0.2536492 0.4806255 0.4188783
## 5 0.2309225 0.4044959 0.4044959
## 6 0.2060003 0.2295902 0.2797655

Dunn’s coefficient

dunn(dist_mat,cut_avg)
## [1] 0.2428229

A higher Dunn’s coefficient indicates better clustering, where clusters are more compact and well-separated. A k value of 3 gives the best result.