classIntervals

Example rendered

library(classInt)

if (!require("spData", quietly=TRUE)) {
  message("spData package needed for examples")
  run <- FALSE
} else {
  run <- TRUE
}

## Warning: package 'spData' was built under R version 3.5.3

if (run) {
data(jenks71, package="spData")
pal1 <- c("wheat1", "red3")
opar <- par(mfrow=c(2,3))
plot(classIntervals(jenks71$jenks71, n=5, style="fixed",
 fixedBreaks=c(15.57, 25, 50, 75, 100, 155.30)), pal=pal1, main="Fixed")
plot(classIntervals(jenks71$jenks71, n=5, style="sd"), pal=pal1, main="Pretty standard deviations")
plot(classIntervals(jenks71$jenks71, n=5, style="equal"), pal=pal1, main="Equal intervals")
plot(classIntervals(jenks71$jenks71, n=5, style="quantile"), pal=pal1, main="Quantile")
set.seed(1)
plot(classIntervals(jenks71$jenks71, n=5, style="kmeans"), pal=pal1, main="K-means")
plot(classIntervals(jenks71$jenks71, n=5, style="hclust", method="complete"),
 pal=pal1, main="Complete cluster")
}

if (run) {
plot(classIntervals(jenks71$jenks71, n=5, style="hclust", method="single"),
 pal=pal1, main="Single cluster")
set.seed(1)
plot(classIntervals(jenks71$jenks71, n=5, style="bclust", verbose=FALSE),
 pal=pal1, main="Bagged cluster")
plot(classIntervals(jenks71$jenks71, n=5, style="fisher"), pal=pal1,
 main="Fisher's method")
plot(classIntervals(jenks71$jenks71, n=5, style="jenks"), pal=pal1,
 main="Jenks' method")
 plot(classIntervals(jenks71$jenks71, style="dpih"), pal=pal1,
 main="dpih method")
 plot(classIntervals(jenks71$jenks71, style="headtails", thr = 1), pal=pal1,
 main="Head Tails method")
par(opar)
}

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="fixed",
 fixedBreaks=c(15.57, 25, 50, 75, 100, 155.30)))
}

## style: fixed
##   one of 3,921,225 possible partitions of this variable into 5 classes
##  [15.57,25)     [25,50)     [50,75)    [75,100) [100,155.3] 
##           6          34          42          14           6

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="sd"))
}

## style: sd
##   one of 75,287,520 possible partitions of this variable into 6 classes
## [5.514407,30.89691) [30.89691,56.27941) [56.27941,81.66191) [81.66191,107.0444) 
##                  10                  47                  31                   9 
## [107.0444,132.4269) [132.4269,157.8094] 
##                   4                   1

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="equal"))
}

## style: equal
##   one of 3,921,225 possible partitions of this variable into 5 classes
##   [15.57,43.516)  [43.516,71.462)  [71.462,99.408) [99.408,127.354) 
##               35               44               17                4 
##  [127.354,155.3] 
##                2

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="quantile"))
}

## style: quantile
##   one of 3,921,225 possible partitions of this variable into 5 classes
##  [15.57,33.822) [33.822,50.114) [50.114,57.454) [57.454,73.368)  [73.368,155.3] 
##              21              20              20              20              21

if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans"))
}

## style: kmeans
##   one of 3,921,225 possible partitions of this variable into 5 classes
##  [15.57,36.905)  [36.905,53.33)  [53.33,72.185) [72.185,105.95)  [105.95,155.3] 
##              25              26              29              17               5

if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans", intervalClosure="right"))
}

## style: kmeans
##   one of 3,921,225 possible partitions of this variable into 5 classes
##  [15.57,36.905]  (36.905,53.33]  (53.33,72.185] (72.185,105.95]  (105.95,155.3] 
##              25              26              29              17               5

if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans", dataPrecision=0))
}

## style: kmeans
##   one of 3,921,225 possible partitions of this variable into 5 classes
##   [16,37)   [37,54)   [54,73)  [73,106) [106,156] 
##        25        26        29        17         5

if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans"), cutlabels=FALSE)
}

## style: kmeans
##   one of 3,921,225 possible partitions of this variable into 5 classes
##    under 36.905  36.905 - 53.33  53.33 - 72.185 72.185 - 105.95     over 105.95 
##              25              26              29              17               5

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="hclust", method="complete"))
}

## style: hclust
##   one of 3,921,225 possible partitions of this variable into 5 classes
##   [15.57,54.81)  [54.81,74.405) [74.405,105.95)  [105.95,143.4)   [143.4,155.3] 
##              55              27              15               4               1

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="hclust", method="single"))
}

## style: hclust
##   one of 3,921,225 possible partitions of this variable into 5 classes
##  [15.57,20.995) [20.995,105.95)  [105.95,125.7)   [125.7,143.4)   [143.4,155.3] 
##               4              93               3               1               1

if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="bclust", verbose=FALSE))
}

## style: bclust
##   one of 3,921,225 possible partitions of this variable into 5 classes
##    [15.57,43.3)   [43.3,82.175) [82.175,105.95)  [105.95,143.4)   [143.4,155.3] 
##              35              53               9               4               1

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="bclust",
 hclust.method="complete", verbose=FALSE))
}

## style: bclust
##   one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,29.84) [29.84,54.81) [54.81,90.16) [90.16,125.7) [125.7,155.3] 
##            10            45            38             7             2

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="fisher"))
}

## style: fisher
##   one of 3,921,225 possible partitions of this variable into 5 classes
##    [15.57,43.3)    [43.3,61.36)  [61.36,78.475) [78.475,105.95)  [105.95,155.3] 
##              35              33              18              11               5

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="jenks"))
}

## style: jenks
##   one of 3,921,225 possible partitions of this variable into 5 classes
##  [15.57,41.2]  (41.2,60.66] (60.66,77.29] (77.29,100.1] (100.1,155.3] 
##            35            33            18            11             5

if (run) {
print(classIntervals(jenks71$jenks71, style="dpih"))
}

## style: dpih
##   one of 16,007,560,800 possible partitions of this variable into 8 classes
##    [15.57,31.70792) [31.70792,47.84584) [47.84584,63.98376) [63.98376,80.12169) 
##                  14                  25                  33                  15 
## [80.12169,96.25961) [96.25961,112.3975) [112.3975,128.5355) [128.5355,144.6734] 
##                   7                   4                   2                   2

if (run) {
print(classIntervals(jenks71$jenks71, style="dpih", range.x=c(0, 160)))
}

## style: dpih
##   one of 186,087,894,300 possible partitions of this variable into 9 classes
##        [0,16.26458) [16.26458,32.52917) [32.52917,48.79375) [48.79375,65.05834) 
##                   2                  16                  21                  34 
## [65.05834,81.32292) [81.32292,97.58751) [97.58751,113.8521) [113.8521,130.1167) 
##                  15                   8                   2                   2 
## [130.1167,146.3813] 
##                   2

if (run) {
  print(classIntervals(jenks71$jenks71, style="headtails"))
}

## style: headtails
##   one of 100 possible partitions of this variable into 2 classes
## [15.57,56.27941) [56.27941,155.3] 
##               57               45

if (run) {
  print(classIntervals(jenks71$jenks71, style="headtails", thr = .45))
}

## style: headtails
##   one of 75,287,520 possible partitions of this variable into 6 classes
##    [15.57,56.27941) [56.27941,77.74533) [77.74533,100.5925)   [100.5925,126.98) 
##                  57                  29                  11                   3 
##      [126.98,143.4)       [143.4,155.3] 
##                   1                   1

x <- c(0, 0, 0, 1, 2, 50)
print(classIntervals(x, n=3, style="fisher"))

## style: fisher
##   one of 3 possible partitions of this variable into 3 classes
##  [0,0.5) [0.5,26)  [26,50] 
##        3        2        1

print(classIntervals(x, n=3, style="jenks"))

## style: jenks
##   one of 3 possible partitions of this variable into 3 classes
##  [0,0]  (0,2] (2,50] 
##      3      2      1

# Argument 'unique' will collapse the label of classes containing a 
# single value. This is particularly useful for 'censored' variables
# that contain for example many zeros. 

data_censored<-c(rep(0,10), rnorm(100, mean=20,sd=1),rep(26,10))
plot(density(data_censored))

cl2 <- classIntervals(data_censored, n=5, style="jenks", dataPrecision=2)
print(cl2, unique=FALSE)

## style: jenks
##   one of 4,082,925 possible partitions of this variable into 5 classes
##         [0,0]     (0,18.91] (18.91,20.33] (20.33,23.15]    (23.15,26] 
##            10            21            43            36            10

print(cl2, unique=TRUE)

## style: jenks
##   one of 4,082,925 possible partitions of this variable into 5 classes
## Class found with one single (possibly repeated) value: changed label
##             0     (0,18.91] (18.91,20.33] (20.33,23.15]            26 
##            10            21            43            36            10

## Not run: 
set.seed(1)
n <- 1e+05
x <- runif(n)
classIntervals(x, n=5, style="sd")

## style: sd
## [-0.07925682,0.06546355)   [0.06546355,0.2101839)    [0.2101839,0.3549043) 
##                     6597                    14579                    14403 
##    [0.3549043,0.4996247)     [0.4996247,0.644345)     [0.644345,0.7890654) 
##                    14395                    14446                    14439 
##    [0.7890654,0.9337858)     [0.9337858,1.078506] 
##                    14463                     6678

classIntervals(x, n=5, style="pretty")

## style: pretty
##   [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8)   [0.8,1] 
##     20142     19936     19888     19993     20041

classIntervals(x, n=5, style="equal")

## style: equal
## [3.895489e-06,0.1999925)    [0.1999925,0.3999812)    [0.3999812,0.5999698) 
##                    20141                    19935                    19888 
##    [0.5999698,0.7999584)    [0.7999584,0.9999471] 
##                    19991                    20045

classIntervals(x, n=5, style="quantile")

## style: quantile
## [3.895489e-06,0.1984408)    [0.1984408,0.3993007)    [0.3993007,0.6003913) 
##                    20000                    20000                    20000 
##    [0.6003913,0.8003984)    [0.8003984,0.9999471] 
##                    20000                    20000

# the class intervals found vary a little because of sampling
classIntervals(x, n=5, style="kmeans")

## Warning in classIntervals(x, n = 5, style = "kmeans"): N is large, and some
## styles will run very slowly; sampling imposed

## style: kmeans
## [3.895489e-06,0.1986138)    [0.1986138,0.3990233)    [0.3990233,0.5997581) 
##                    20017                    19944                    19977 
##    [0.5997581,0.8000952)    [0.8000952,0.9999471] 
##                    20025                    20037

classIntervals(x, n=5, style="fisher")

## Warning in classIntervals(x, n = 5, style = "fisher"): N is large, and some
## styles will run very slowly; sampling imposed

## style: fisher
## [3.895489e-06,0.2010968)    [0.2010968,0.4025035)    [0.4025035,0.6055526) 
##                    20265                    20046                    20192 
##     [0.6055526,0.804706)     [0.804706,0.9999471] 
##                    19943                    19554

classIntervals(x, n=5, style="fisher")

## Warning in classIntervals(x, n = 5, style = "fisher"): N is large, and some
## styles will run very slowly; sampling imposed

## style: fisher
## [3.895489e-06,0.1961348)    [0.1961348,0.3955674)    [0.3955674,0.5937815) 
##                    19779                    19841                    19709 
##    [0.5937815,0.7946715)    [0.7946715,0.9999471] 
##                    20102                    20569

classIntervals(x, n=5, style="fisher")

## Warning in classIntervals(x, n = 5, style = "fisher"): N is large, and some
## styles will run very slowly; sampling imposed

## style: fisher
## [3.895489e-06,0.2019671)    [0.2019671,0.4060473)    [0.4060473,0.6084683) 
##                    20359                    20297                    20122 
##    [0.6084683,0.8057677)    [0.8057677,0.9999471] 
##                    19768                    19454

## End(Not run)
have_units <- FALSE
if (require(units, quietly=TRUE)) have_units <- TRUE

## Warning: package 'units' was built under R version 3.5.3

## udunits system database from C:/Users/Diego/Documents/R/win-library/3.5/units/share/udunits

if (have_units) {
set.seed(1)
x_units <- set_units(sample(seq(1, 100, 0.25), 100), km/h)
classIntervals(x_units, n=5, style="sd")
}

## style: sd
##   one of 14,887,031,544 possible partitions of this [km/h] variable into 8 classes
## [-4.418012,10.15212)  [10.15212,24.72224)  [24.72224,39.29237) 
##                    7                   12                   19 
##   [39.29237,53.8625)   [53.8625,68.43263)  [68.43263,83.00276) 
##                    8                   19                   14 
##  [83.00276,97.57288)   [97.57288,112.143] 
##                   17                    4

if (have_units) {
classIntervals(x_units, n=5, style="pretty")
}

## style: pretty
##   one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
##   [0,20)  [20,40)  [40,60)  [60,80) [80,100] 
##       16       22       15       23       24

if (have_units) {
classIntervals(x_units, n=5, style="equal")
}

## style: equal
##   one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
##    [2,21.55) [21.55,41.1) [41.1,60.65) [60.65,80.2) [80.2,99.75] 
##           17           22           15           23           23

if (have_units) {
classIntervals(x_units, n=5, style="quantile")
}

## style: quantile
##   one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
##      [2,25.7)  [25.7,42.55)  [42.55,64.3)  [64.3,84.65) [84.65,99.75] 
##            20            20            20            20            20

if (have_units) {
classIntervals(x_units, n=5, style="kmeans")
}

## style: kmeans
##   one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
##      [2,23.875)  [23.875,46.75)  [46.75,68.375) [68.375,84.875)  [84.875,99.75] 
##              19              24              22              15              20

if (have_units) {
classIntervals(x_units, n=5, style="fisher")
}

## style: fisher
##   one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
##      [2,23.875)  [23.875,46.75)  [46.75,68.375) [68.375,84.875)  [84.875,99.75] 
##              19              24              22              15              20

if (have_units) {
classIntervals(x_units, style="headtails")
}

## style: headtails
##   one of 99 possible partitions of this [km/h] variable into 2 classes
##     [2,53.8625) [53.8625,99.75] 
##              46              54

st <- Sys.time()
x_POSIXt <- sample(st+((0:500)*3600), 100)
fx <- st+((0:5)*3600)*100
classIntervals(x_POSIXt, style="fixed", fixedBreaks=fx)

## style: fixed
##   one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-20 18:30:00,2020-03-24 22:30:00) 
##                                        18 
## [2020-03-24 22:30:00,2020-03-29 03:30:00) 
##                                        18 
## [2020-03-29 03:30:00,2020-04-02 07:30:00) 
##                                        26 
## [2020-04-02 07:30:00,2020-04-06 11:30:00) 
##                                        23 
## [2020-04-06 11:30:00,2020-04-10 15:30:00] 
##                                        15

classIntervals(x_POSIXt, n=5, style="sd")

## style: sd
##   one of 156,849 possible partitions of this variable into 4 classes
## [2020-03-19 17:47:39,2020-03-25 11:54:26) 
##                                        22 
## [2020-03-25 11:54:26,2020-03-31 07:01:12) 
##                                        28 
## [2020-03-31 07:01:12,2020-04-06 01:07:58) 
##                                        32 
## [2020-04-06 01:07:58,2020-04-11 19:14:44] 
##                                        18

classIntervals(x_POSIXt, n=5, style="pretty")

## style: pretty
##   one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-18 03:53:20,2020-03-23 22:46:40) 
##                                        11 
## [2020-03-23 22:46:40,2020-03-29 18:40:00) 
##                                        28 
## [2020-03-29 18:40:00,2020-04-04 13:33:20) 
##                                        34 
## [2020-04-04 13:33:20,2020-04-10 08:26:40) 
##                                        25 
## [2020-04-10 08:26:40,2020-04-16 03:20:00] 
##                                         2

classIntervals(x_POSIXt, n=5, style="equal")

## style: equal
##   one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-25 02:54:00) 
##                                        18 
## [2020-03-25 02:54:00,2020-03-29 06:18:00) 
##                                        18 
## [2020-03-29 06:18:00,2020-04-02 08:42:00) 
##                                        26 
## [2020-04-02 08:42:00,2020-04-06 11:06:00) 
##                                        23 
## [2020-04-06 11:06:00,2020-04-10 13:30:00] 
##                                        15

classIntervals(x_POSIXt, n=5, style="quantile")

## style: quantile
##   one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-25 07:18:00) 
##                                        20 
## [2020-03-25 07:18:00,2020-03-29 21:06:00) 
##                                        20 
## [2020-03-29 21:06:00,2020-04-01 20:18:00) 
##                                        20 
## [2020-04-01 20:18:00,2020-04-05 18:06:00) 
##                                        20 
## [2020-04-05 18:06:00,2020-04-10 13:30:00] 
##                                        20

classIntervals(x_POSIXt, n=5, style="kmeans")

## style: kmeans
##   one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-24 14:00:00) 
##                                        16 
## [2020-03-24 14:00:00,2020-03-28 16:30:00) 
##                                        19 
## [2020-03-28 16:30:00,2020-04-02 15:00:00) 
##                                        29 
## [2020-04-02 15:00:00,2020-04-06 23:30:00) 
##                                        22 
## [2020-04-06 23:30:00,2020-04-10 13:30:00] 
##                                        14

classIntervals(x_POSIXt, n=5, style="fisher")

## style: fisher
##   one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-24 14:00:00) 
##                                        16 
## [2020-03-24 14:00:00,2020-03-28 16:30:00) 
##                                        19 
## [2020-03-28 16:30:00,2020-04-02 15:00:00) 
##                                        29 
## [2020-04-02 15:00:00,2020-04-06 23:30:00) 
##                                        22 
## [2020-04-06 23:30:00,2020-04-10 13:30:00] 
##                                        14

classIntervals(x_POSIXt, style="headtails")

## style: headtails
##   one of 99 possible partitions of this variable into 2 classes
## [2020-03-21 00:30:00,2020-03-31 07:01:12) 
##                                        50 
## [2020-03-31 07:01:12,2020-04-10 13:30:00] 
##                                        50

# Head Tails method is suitable for right-sided heavy-tailed distributions
set.seed(1234)
# Heavy tails-----
# Pareto distributions a=7 b=14
paretodist <- 7 / (1 - runif(1000)) ^ (1 / 14)
# Lognorm
lognormdist <- rlnorm(1000)
# Weibull
weibulldist <- rweibull(1000, 1, scale = 5)

pal1 <- c("wheat1", "red3")
opar <- par(mfrow = c(2, 3))
plot(classIntervals(paretodist, style = "headtails"),
     pal = pal1,
     main = "HeadTails: Pareto Dist.")
plot(classIntervals(lognormdist, style = "headtails"),
     pal = pal1,
     main = "HeadTails: LogNormal Dist.")
plot(classIntervals(weibulldist, style = "headtails"),
     pal = pal1,
     main = "HeadTails: Weibull Dist.")
plot(classIntervals(paretodist, n = 5, style = "fisher"),
     pal = pal1,
     main = "Fisher: Pareto Dist.")
plot(classIntervals(lognormdist, n = 7, style = "fisher"),
     pal = pal1,
     main = "Fisher: LogNormal Dist.")
plot(classIntervals(weibulldist, n= 4, style = "fisher"),
     pal = pal1,
     main = "Fisher: Weibull Dist.")

par(opar)


#Non heavy tails, thr should be increased-----

#Normal dist
normdist <- rnorm(1000)
#Left-tailed truncated Normal distr
leftnorm <- rep(normdist[normdist < mean(normdist)], 2)
# Uniform distribution
unifdist <- runif(1000)
opar <- par(mfrow = c(2, 3))
plot(classIntervals(normdist, style = "headtails"),
     pal = pal1,
     main = "Normal Dist.")
plot(classIntervals(leftnorm, style = "headtails"),
     pal = pal1,
     main = "Truncated Normal Dist.")
plot(classIntervals(unifdist, style = "headtails"),
     pal = pal1,
     main = "Uniform Dist.")
# thr should be increased for non heavy-tailed distributions
plot(
  classIntervals(normdist, style = "headtails", thr = .6),
  pal = pal1,
  main = "Normal Dist. thr = .6"
)
plot(
  classIntervals(leftnorm, style = "headtails", thr = .6),
  pal = pal1,
  main = "Truncated Normal Distribution thr = .6"
)
plot(
  classIntervals(unifdist, style = "headtails", thr = .6),
  pal = pal1,
  main = "Uniform Distribution thr = .6"
)

par(opar)