classIntervals
Example rendered
library(classInt)
if (!require("spData", quietly=TRUE)) {
message("spData package needed for examples")
run <- FALSE
} else {
run <- TRUE
}
## Warning: package 'spData' was built under R version 3.5.3
if (run) {
data(jenks71, package="spData")
pal1 <- c("wheat1", "red3")
opar <- par(mfrow=c(2,3))
plot(classIntervals(jenks71$jenks71, n=5, style="fixed",
fixedBreaks=c(15.57, 25, 50, 75, 100, 155.30)), pal=pal1, main="Fixed")
plot(classIntervals(jenks71$jenks71, n=5, style="sd"), pal=pal1, main="Pretty standard deviations")
plot(classIntervals(jenks71$jenks71, n=5, style="equal"), pal=pal1, main="Equal intervals")
plot(classIntervals(jenks71$jenks71, n=5, style="quantile"), pal=pal1, main="Quantile")
set.seed(1)
plot(classIntervals(jenks71$jenks71, n=5, style="kmeans"), pal=pal1, main="K-means")
plot(classIntervals(jenks71$jenks71, n=5, style="hclust", method="complete"),
pal=pal1, main="Complete cluster")
}

if (run) {
plot(classIntervals(jenks71$jenks71, n=5, style="hclust", method="single"),
pal=pal1, main="Single cluster")
set.seed(1)
plot(classIntervals(jenks71$jenks71, n=5, style="bclust", verbose=FALSE),
pal=pal1, main="Bagged cluster")
plot(classIntervals(jenks71$jenks71, n=5, style="fisher"), pal=pal1,
main="Fisher's method")
plot(classIntervals(jenks71$jenks71, n=5, style="jenks"), pal=pal1,
main="Jenks' method")
plot(classIntervals(jenks71$jenks71, style="dpih"), pal=pal1,
main="dpih method")
plot(classIntervals(jenks71$jenks71, style="headtails", thr = 1), pal=pal1,
main="Head Tails method")
par(opar)
}

if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="fixed",
fixedBreaks=c(15.57, 25, 50, 75, 100, 155.30)))
}
## style: fixed
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,25) [25,50) [50,75) [75,100) [100,155.3]
## 6 34 42 14 6
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="sd"))
}
## style: sd
## one of 75,287,520 possible partitions of this variable into 6 classes
## [5.514407,30.89691) [30.89691,56.27941) [56.27941,81.66191) [81.66191,107.0444)
## 10 47 31 9
## [107.0444,132.4269) [132.4269,157.8094]
## 4 1
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="equal"))
}
## style: equal
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,43.516) [43.516,71.462) [71.462,99.408) [99.408,127.354)
## 35 44 17 4
## [127.354,155.3]
## 2
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="quantile"))
}
## style: quantile
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,33.822) [33.822,50.114) [50.114,57.454) [57.454,73.368) [73.368,155.3]
## 21 20 20 20 21
if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans"))
}
## style: kmeans
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,36.905) [36.905,53.33) [53.33,72.185) [72.185,105.95) [105.95,155.3]
## 25 26 29 17 5
if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans", intervalClosure="right"))
}
## style: kmeans
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,36.905] (36.905,53.33] (53.33,72.185] (72.185,105.95] (105.95,155.3]
## 25 26 29 17 5
if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans", dataPrecision=0))
}
## style: kmeans
## one of 3,921,225 possible partitions of this variable into 5 classes
## [16,37) [37,54) [54,73) [73,106) [106,156]
## 25 26 29 17 5
if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="kmeans"), cutlabels=FALSE)
}
## style: kmeans
## one of 3,921,225 possible partitions of this variable into 5 classes
## under 36.905 36.905 - 53.33 53.33 - 72.185 72.185 - 105.95 over 105.95
## 25 26 29 17 5
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="hclust", method="complete"))
}
## style: hclust
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,54.81) [54.81,74.405) [74.405,105.95) [105.95,143.4) [143.4,155.3]
## 55 27 15 4 1
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="hclust", method="single"))
}
## style: hclust
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,20.995) [20.995,105.95) [105.95,125.7) [125.7,143.4) [143.4,155.3]
## 4 93 3 1 1
if (run) {
set.seed(1)
print(classIntervals(jenks71$jenks71, n=5, style="bclust", verbose=FALSE))
}
## style: bclust
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,43.3) [43.3,82.175) [82.175,105.95) [105.95,143.4) [143.4,155.3]
## 35 53 9 4 1
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="bclust",
hclust.method="complete", verbose=FALSE))
}
## style: bclust
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,29.84) [29.84,54.81) [54.81,90.16) [90.16,125.7) [125.7,155.3]
## 10 45 38 7 2
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="fisher"))
}
## style: fisher
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,43.3) [43.3,61.36) [61.36,78.475) [78.475,105.95) [105.95,155.3]
## 35 33 18 11 5
if (run) {
print(classIntervals(jenks71$jenks71, n=5, style="jenks"))
}
## style: jenks
## one of 3,921,225 possible partitions of this variable into 5 classes
## [15.57,41.2] (41.2,60.66] (60.66,77.29] (77.29,100.1] (100.1,155.3]
## 35 33 18 11 5
if (run) {
print(classIntervals(jenks71$jenks71, style="dpih"))
}
## style: dpih
## one of 16,007,560,800 possible partitions of this variable into 8 classes
## [15.57,31.70792) [31.70792,47.84584) [47.84584,63.98376) [63.98376,80.12169)
## 14 25 33 15
## [80.12169,96.25961) [96.25961,112.3975) [112.3975,128.5355) [128.5355,144.6734]
## 7 4 2 2
if (run) {
print(classIntervals(jenks71$jenks71, style="dpih", range.x=c(0, 160)))
}
## style: dpih
## one of 186,087,894,300 possible partitions of this variable into 9 classes
## [0,16.26458) [16.26458,32.52917) [32.52917,48.79375) [48.79375,65.05834)
## 2 16 21 34
## [65.05834,81.32292) [81.32292,97.58751) [97.58751,113.8521) [113.8521,130.1167)
## 15 8 2 2
## [130.1167,146.3813]
## 2
if (run) {
print(classIntervals(jenks71$jenks71, style="headtails"))
}
## style: headtails
## one of 100 possible partitions of this variable into 2 classes
## [15.57,56.27941) [56.27941,155.3]
## 57 45
if (run) {
print(classIntervals(jenks71$jenks71, style="headtails", thr = .45))
}
## style: headtails
## one of 75,287,520 possible partitions of this variable into 6 classes
## [15.57,56.27941) [56.27941,77.74533) [77.74533,100.5925) [100.5925,126.98)
## 57 29 11 3
## [126.98,143.4) [143.4,155.3]
## 1 1
x <- c(0, 0, 0, 1, 2, 50)
print(classIntervals(x, n=3, style="fisher"))
## style: fisher
## one of 3 possible partitions of this variable into 3 classes
## [0,0.5) [0.5,26) [26,50]
## 3 2 1
print(classIntervals(x, n=3, style="jenks"))
## style: jenks
## one of 3 possible partitions of this variable into 3 classes
## [0,0] (0,2] (2,50]
## 3 2 1
# Argument 'unique' will collapse the label of classes containing a
# single value. This is particularly useful for 'censored' variables
# that contain for example many zeros.
data_censored<-c(rep(0,10), rnorm(100, mean=20,sd=1),rep(26,10))
plot(density(data_censored))

cl2 <- classIntervals(data_censored, n=5, style="jenks", dataPrecision=2)
print(cl2, unique=FALSE)
## style: jenks
## one of 4,082,925 possible partitions of this variable into 5 classes
## [0,0] (0,18.91] (18.91,20.33] (20.33,23.15] (23.15,26]
## 10 21 43 36 10
## style: jenks
## one of 4,082,925 possible partitions of this variable into 5 classes
## Class found with one single (possibly repeated) value: changed label
## 0 (0,18.91] (18.91,20.33] (20.33,23.15] 26
## 10 21 43 36 10
## Not run:
set.seed(1)
n <- 1e+05
x <- runif(n)
classIntervals(x, n=5, style="sd")
## style: sd
## [-0.07925682,0.06546355) [0.06546355,0.2101839) [0.2101839,0.3549043)
## 6597 14579 14403
## [0.3549043,0.4996247) [0.4996247,0.644345) [0.644345,0.7890654)
## 14395 14446 14439
## [0.7890654,0.9337858) [0.9337858,1.078506]
## 14463 6678
classIntervals(x, n=5, style="pretty")
## style: pretty
## [0,0.2) [0.2,0.4) [0.4,0.6) [0.6,0.8) [0.8,1]
## 20142 19936 19888 19993 20041
classIntervals(x, n=5, style="equal")
## style: equal
## [3.895489e-06,0.1999925) [0.1999925,0.3999812) [0.3999812,0.5999698)
## 20141 19935 19888
## [0.5999698,0.7999584) [0.7999584,0.9999471]
## 19991 20045
classIntervals(x, n=5, style="quantile")
## style: quantile
## [3.895489e-06,0.1984408) [0.1984408,0.3993007) [0.3993007,0.6003913)
## 20000 20000 20000
## [0.6003913,0.8003984) [0.8003984,0.9999471]
## 20000 20000
# the class intervals found vary a little because of sampling
classIntervals(x, n=5, style="kmeans")
## Warning in classIntervals(x, n = 5, style = "kmeans"): N is large, and some
## styles will run very slowly; sampling imposed
## style: kmeans
## [3.895489e-06,0.1986138) [0.1986138,0.3990233) [0.3990233,0.5997581)
## 20017 19944 19977
## [0.5997581,0.8000952) [0.8000952,0.9999471]
## 20025 20037
classIntervals(x, n=5, style="fisher")
## Warning in classIntervals(x, n = 5, style = "fisher"): N is large, and some
## styles will run very slowly; sampling imposed
## style: fisher
## [3.895489e-06,0.2010968) [0.2010968,0.4025035) [0.4025035,0.6055526)
## 20265 20046 20192
## [0.6055526,0.804706) [0.804706,0.9999471]
## 19943 19554
classIntervals(x, n=5, style="fisher")
## Warning in classIntervals(x, n = 5, style = "fisher"): N is large, and some
## styles will run very slowly; sampling imposed
## style: fisher
## [3.895489e-06,0.1961348) [0.1961348,0.3955674) [0.3955674,0.5937815)
## 19779 19841 19709
## [0.5937815,0.7946715) [0.7946715,0.9999471]
## 20102 20569
classIntervals(x, n=5, style="fisher")
## Warning in classIntervals(x, n = 5, style = "fisher"): N is large, and some
## styles will run very slowly; sampling imposed
## style: fisher
## [3.895489e-06,0.2019671) [0.2019671,0.4060473) [0.4060473,0.6084683)
## 20359 20297 20122
## [0.6084683,0.8057677) [0.8057677,0.9999471]
## 19768 19454
## End(Not run)
have_units <- FALSE
if (require(units, quietly=TRUE)) have_units <- TRUE
## Warning: package 'units' was built under R version 3.5.3
## udunits system database from C:/Users/Diego/Documents/R/win-library/3.5/units/share/udunits
if (have_units) {
set.seed(1)
x_units <- set_units(sample(seq(1, 100, 0.25), 100), km/h)
classIntervals(x_units, n=5, style="sd")
}
## style: sd
## one of 14,887,031,544 possible partitions of this [km/h] variable into 8 classes
## [-4.418012,10.15212) [10.15212,24.72224) [24.72224,39.29237)
## 7 12 19
## [39.29237,53.8625) [53.8625,68.43263) [68.43263,83.00276)
## 8 19 14
## [83.00276,97.57288) [97.57288,112.143]
## 17 4
if (have_units) {
classIntervals(x_units, n=5, style="pretty")
}
## style: pretty
## one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
## [0,20) [20,40) [40,60) [60,80) [80,100]
## 16 22 15 23 24
if (have_units) {
classIntervals(x_units, n=5, style="equal")
}
## style: equal
## one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
## [2,21.55) [21.55,41.1) [41.1,60.65) [60.65,80.2) [80.2,99.75]
## 17 22 15 23 23
if (have_units) {
classIntervals(x_units, n=5, style="quantile")
}
## style: quantile
## one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
## [2,25.7) [25.7,42.55) [42.55,64.3) [64.3,84.65) [84.65,99.75]
## 20 20 20 20 20
if (have_units) {
classIntervals(x_units, n=5, style="kmeans")
}
## style: kmeans
## one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
## [2,23.875) [23.875,46.75) [46.75,68.375) [68.375,84.875) [84.875,99.75]
## 19 24 22 15 20
if (have_units) {
classIntervals(x_units, n=5, style="fisher")
}
## style: fisher
## one of 3,764,376 possible partitions of this [km/h] variable into 5 classes
## [2,23.875) [23.875,46.75) [46.75,68.375) [68.375,84.875) [84.875,99.75]
## 19 24 22 15 20
if (have_units) {
classIntervals(x_units, style="headtails")
}
## style: headtails
## one of 99 possible partitions of this [km/h] variable into 2 classes
## [2,53.8625) [53.8625,99.75]
## 46 54
st <- Sys.time()
x_POSIXt <- sample(st+((0:500)*3600), 100)
fx <- st+((0:5)*3600)*100
classIntervals(x_POSIXt, style="fixed", fixedBreaks=fx)
## style: fixed
## one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-20 18:30:00,2020-03-24 22:30:00)
## 18
## [2020-03-24 22:30:00,2020-03-29 03:30:00)
## 18
## [2020-03-29 03:30:00,2020-04-02 07:30:00)
## 26
## [2020-04-02 07:30:00,2020-04-06 11:30:00)
## 23
## [2020-04-06 11:30:00,2020-04-10 15:30:00]
## 15
classIntervals(x_POSIXt, n=5, style="sd")
## style: sd
## one of 156,849 possible partitions of this variable into 4 classes
## [2020-03-19 17:47:39,2020-03-25 11:54:26)
## 22
## [2020-03-25 11:54:26,2020-03-31 07:01:12)
## 28
## [2020-03-31 07:01:12,2020-04-06 01:07:58)
## 32
## [2020-04-06 01:07:58,2020-04-11 19:14:44]
## 18
classIntervals(x_POSIXt, n=5, style="pretty")
## style: pretty
## one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-18 03:53:20,2020-03-23 22:46:40)
## 11
## [2020-03-23 22:46:40,2020-03-29 18:40:00)
## 28
## [2020-03-29 18:40:00,2020-04-04 13:33:20)
## 34
## [2020-04-04 13:33:20,2020-04-10 08:26:40)
## 25
## [2020-04-10 08:26:40,2020-04-16 03:20:00]
## 2
classIntervals(x_POSIXt, n=5, style="equal")
## style: equal
## one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-25 02:54:00)
## 18
## [2020-03-25 02:54:00,2020-03-29 06:18:00)
## 18
## [2020-03-29 06:18:00,2020-04-02 08:42:00)
## 26
## [2020-04-02 08:42:00,2020-04-06 11:06:00)
## 23
## [2020-04-06 11:06:00,2020-04-10 13:30:00]
## 15
classIntervals(x_POSIXt, n=5, style="quantile")
## style: quantile
## one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-25 07:18:00)
## 20
## [2020-03-25 07:18:00,2020-03-29 21:06:00)
## 20
## [2020-03-29 21:06:00,2020-04-01 20:18:00)
## 20
## [2020-04-01 20:18:00,2020-04-05 18:06:00)
## 20
## [2020-04-05 18:06:00,2020-04-10 13:30:00]
## 20
classIntervals(x_POSIXt, n=5, style="kmeans")
## style: kmeans
## one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-24 14:00:00)
## 16
## [2020-03-24 14:00:00,2020-03-28 16:30:00)
## 19
## [2020-03-28 16:30:00,2020-04-02 15:00:00)
## 29
## [2020-04-02 15:00:00,2020-04-06 23:30:00)
## 22
## [2020-04-06 23:30:00,2020-04-10 13:30:00]
## 14
classIntervals(x_POSIXt, n=5, style="fisher")
## style: fisher
## one of 3,764,376 possible partitions of this variable into 5 classes
## [2020-03-21 00:30:00,2020-03-24 14:00:00)
## 16
## [2020-03-24 14:00:00,2020-03-28 16:30:00)
## 19
## [2020-03-28 16:30:00,2020-04-02 15:00:00)
## 29
## [2020-04-02 15:00:00,2020-04-06 23:30:00)
## 22
## [2020-04-06 23:30:00,2020-04-10 13:30:00]
## 14
classIntervals(x_POSIXt, style="headtails")
## style: headtails
## one of 99 possible partitions of this variable into 2 classes
## [2020-03-21 00:30:00,2020-03-31 07:01:12)
## 50
## [2020-03-31 07:01:12,2020-04-10 13:30:00]
## 50
# Head Tails method is suitable for right-sided heavy-tailed distributions
set.seed(1234)
# Heavy tails-----
# Pareto distributions a=7 b=14
paretodist <- 7 / (1 - runif(1000)) ^ (1 / 14)
# Lognorm
lognormdist <- rlnorm(1000)
# Weibull
weibulldist <- rweibull(1000, 1, scale = 5)
pal1 <- c("wheat1", "red3")
opar <- par(mfrow = c(2, 3))
plot(classIntervals(paretodist, style = "headtails"),
pal = pal1,
main = "HeadTails: Pareto Dist.")
plot(classIntervals(lognormdist, style = "headtails"),
pal = pal1,
main = "HeadTails: LogNormal Dist.")
plot(classIntervals(weibulldist, style = "headtails"),
pal = pal1,
main = "HeadTails: Weibull Dist.")
plot(classIntervals(paretodist, n = 5, style = "fisher"),
pal = pal1,
main = "Fisher: Pareto Dist.")
plot(classIntervals(lognormdist, n = 7, style = "fisher"),
pal = pal1,
main = "Fisher: LogNormal Dist.")
plot(classIntervals(weibulldist, n= 4, style = "fisher"),
pal = pal1,
main = "Fisher: Weibull Dist.")

par(opar)
#Non heavy tails, thr should be increased-----
#Normal dist
normdist <- rnorm(1000)
#Left-tailed truncated Normal distr
leftnorm <- rep(normdist[normdist < mean(normdist)], 2)
# Uniform distribution
unifdist <- runif(1000)
opar <- par(mfrow = c(2, 3))
plot(classIntervals(normdist, style = "headtails"),
pal = pal1,
main = "Normal Dist.")
plot(classIntervals(leftnorm, style = "headtails"),
pal = pal1,
main = "Truncated Normal Dist.")
plot(classIntervals(unifdist, style = "headtails"),
pal = pal1,
main = "Uniform Dist.")
# thr should be increased for non heavy-tailed distributions
plot(
classIntervals(normdist, style = "headtails", thr = .6),
pal = pal1,
main = "Normal Dist. thr = .6"
)
plot(
classIntervals(leftnorm, style = "headtails", thr = .6),
pal = pal1,
main = "Truncated Normal Distribution thr = .6"
)
plot(
classIntervals(unifdist, style = "headtails", thr = .6),
pal = pal1,
main = "Uniform Distribution thr = .6"
)
