This function gets a vector “x” as an input and generates its descretized vector. The length of each discret cluster is “step”. The discret value in each cluster is the Median there. If “step” is even, the median will be the average of the middle elements.

discretize <- function( x, step ) {
  h = length(x)                 ## Find the size of x
  hh = floor(length(x)/step)*step       ## Find the number of steps
  d = data.frame( x, index = 1:h )      ## Make an index column
  d = d[order(d$x),]                ## Sort x
  ##
  ## Discretize x by the median in each group 
  for ( i in seq(from=1, to=hh, by=step) ) {
        ##
      ## Find the median in the current group
        med_index1 = i + floor((step-1)/2) 
        med_index2 = i + ceiling((step-1)/2)
        med = ( d$x[med_index1] + d$x[med_index2] )/ 2
        ##
      ## Replace the group elements with the median
      for ( j in i:(i+step-1) ) {
        d$x[j] = med            
      }
  }
  d=d[order(d$index),]              ## Put x back to its original order
  return(d$x)                   ## Return the discretized vector for x
}

## This is a test code for function "discretize.R"

#dump("add2", file="discretize.R")
#source("discretize.R")

data <- matrix(data=cbind(rnorm(30, 0), rnorm(30, 2), rnorm(30, 5)), nrow=30, ncol=3)
head(data)
##             [,1]      [,2]     [,3]
## [1,]  0.43961012 0.6509330 5.688139
## [2,] -0.02170716 3.4614587 7.039521
## [3,] -0.52733417 1.1165835 4.833965
## [4,]  0.18599627 2.7678167 5.186015
## [5,]  2.05573155 1.9584715 6.047592
## [6,]  0.89456137 0.9744364 5.127185
discret_data <- apply(data, 2, function(x) discretize(x,4))
head(discret_data)
##             [,1]     [,2]     [,3]
## [1,]  0.45617732 0.599541 5.623603
## [2,]  0.08388144 3.050305 6.634291
## [3,] -0.52067768 1.169088 5.018349
## [4,]  0.08388144 2.514256 5.259404
## [5,]  2.05573155 1.946419 5.885117
## [6,]  0.86455971 0.599541 5.018349
boxplot(data)

boxplot(discret_data)