optics algorithm

omon das

2024-02-17

ecoTib=economics %>% 
  dplyr::select(-date) 


ecoscale=ecoTib %>% 
  scale()

lrn.optics=optics(ecoscale,minPts = 10)


plot(lrn.optics)




cluster_metrics <- function(data, clusters, dist_matrix) {
list(db = clusterSim::index.DB(data, clusters)$DB,
G1 = clusterSim::index.G1(data, clusters),
dunn = clValid::dunn(dist_matrix, clusters),
clusters = length(unique(clusters))
)
}

clust=extractXi(lrn.optics,xi=0.4)


clust
## OPTICS ordering/clustering for 574 objects.
## Parameters: minPts = 10, eps = 1.6148938033089, eps_cl = NA, xi = 0.4
## Available fields: order, reachdist, coredist, predecessor, minPts, eps,
##                   eps_cl, xi
plot(clust)

ecoBoot <- map(1:10, ~ {
ecoscale %>%
as_tibble() %>%
sample_n(size = nrow(.), replace = TRUE)
})

library(purrr)


tune=map(ecoBoot,function(boot){
  
  lrn.optics=optics(boot,minPts = 10)
  

    
    map(seq(0.01,0.2,0.01),function(k){
      
      
      clust=extractXi(lrn.optics,xi=k)
      
      cluster=as_tibble(clust$cluster)
    
      
      data_gathered=bind_cols(boot,cluster)
      
      distance=dist(dplyr::select(data_gathered,-value))
      
      
      measure=cluster_metrics(data=dplyr::select(data_gathered,-value),clusters = data_gathered$value,dist_matrix = distance)
    
      measure=as_tibble(measure)
  })  
      
  })
measure=bind_rows(tune) 

measure
## # A tibble: 200 × 4
##       db    G1  dunn clusters
##    <dbl> <dbl> <dbl>    <int>
##  1  2.00  305.     0       32
##  2  2.01  253.     0       33
##  3  2.07  269.     0       32
##  4  2.05  291.     0       29
##  5  2.23  276.     0       28
##  6  2.57  189.     0       26
##  7  2.63  196.     0       25
##  8  2.16  156.     0       24
##  9  2.10  146.     0       22
## 10  2.03  157.     0       17
## # ℹ 190 more rows
plot=measure %>% 
  gather(key = "variable",value = "value",-clusters)


plot
## # A tibble: 600 × 3
##    clusters variable value
##       <int> <chr>    <dbl>
##  1       32 db        2.00
##  2       33 db        2.01
##  3       32 db        2.07
##  4       29 db        2.05
##  5       28 db        2.23
##  6       26 db        2.57
##  7       25 db        2.63
##  8       24 db        2.16
##  9       22 db        2.10
## 10       17 db        2.03
## # ℹ 590 more rows
library(ggplot2)

ggplot(plot) +
 aes(x = clusters, y = value) +
 geom_point(shape = "circle", size = 1.5, colour = "#112446") +
  geom_line()+
 theme_minimal() +
 facet_wrap(vars(variable),scales = "free_y")