summary(USArrests)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
require(graphics)
pairs(USArrests, panel = panel.smooth, main = "USArrests data")
library(SOMbrero)
## Loading required package: igraph
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
##
## ***********************************************************
##
## This is 'SOMbrero' package, v 1.2
##
## Citation details with citation('SOMbrero')
##
## Further information with help(SOMbrero)...
##
## Use sombreroGUI() to start the Graphical Interface.
##
## ***********************************************************
set.seed(255)
# run the SOM algorithm with verbose set to TRUE
USArrests.som <- trainSOM(x.data=USArrests[,1:4], verbose=TRUE, nb.save=5)
## Self-Organizing Map algorithm...
##
## Parameters of the SOM
##
## SOM mode : online
## SOM type : numeric
## Affectation type : standard
## Grid :
## Self-Organizing Map structure
##
## Features :
## topology : square
## x dimension : 5
## y dimension : 5
## distance type: euclidean
##
## Number of iterations : 250
## Number of intermediate backups : 5
## Initializing prototypes method : random
## Data pre-processing type : unitvar
## Neighbourhood type : gaussian
##
## 0 % done
## 10 % done
## 20 % done
## 30 % done
## 40 % done
## 50 % done
## 60 % done
## 70 % done
## 80 % done
## 90 % done
## 100 % done
USArrests
## Murder Assault UrbanPop Rape
## Alabama 13.2 236 58 21.2
## Alaska 10.0 263 48 44.5
## Arizona 8.1 294 80 31.0
## Arkansas 8.8 190 50 19.5
## California 9.0 276 91 40.6
## Colorado 7.9 204 78 38.7
## Connecticut 3.3 110 77 11.1
## Delaware 5.9 238 72 15.8
## Florida 15.4 335 80 31.9
## Georgia 17.4 211 60 25.8
## Hawaii 5.3 46 83 20.2
## Idaho 2.6 120 54 14.2
## Illinois 10.4 249 83 24.0
## Indiana 7.2 113 65 21.0
## Iowa 2.2 56 57 11.3
## Kansas 6.0 115 66 18.0
## Kentucky 9.7 109 52 16.3
## Louisiana 15.4 249 66 22.2
## Maine 2.1 83 51 7.8
## Maryland 11.3 300 67 27.8
## Massachusetts 4.4 149 85 16.3
## Michigan 12.1 255 74 35.1
## Minnesota 2.7 72 66 14.9
## Mississippi 16.1 259 44 17.1
## Missouri 9.0 178 70 28.2
## Montana 6.0 109 53 16.4
## Nebraska 4.3 102 62 16.5
## Nevada 12.2 252 81 46.0
## New Hampshire 2.1 57 56 9.5
## New Jersey 7.4 159 89 18.8
## New Mexico 11.4 285 70 32.1
## New York 11.1 254 86 26.1
## North Carolina 13.0 337 45 16.1
## North Dakota 0.8 45 44 7.3
## Ohio 7.3 120 75 21.4
## Oklahoma 6.6 151 68 20.0
## Oregon 4.9 159 67 29.3
## Pennsylvania 6.3 106 72 14.9
## Rhode Island 3.4 174 87 8.3
## South Carolina 14.4 279 48 22.5
## South Dakota 3.8 86 45 12.8
## Tennessee 13.2 188 59 26.9
## Texas 12.7 201 80 25.5
## Utah 3.2 120 80 22.9
## Vermont 2.2 48 32 11.2
## Virginia 8.5 156 63 20.7
## Washington 4.0 145 73 26.2
## West Virginia 5.7 81 39 9.3
## Wisconsin 2.6 53 66 10.8
## Wyoming 6.8 161 60 15.6
plot(USArrests.som, what="energy")
plot(USArrests.som, what="obs", type="hitmap")
summary(USArrests)
## Murder Assault UrbanPop Rape
## Min. : 0.800 Min. : 45.0 Min. :32.00 Min. : 7.30
## 1st Qu.: 4.075 1st Qu.:109.0 1st Qu.:54.50 1st Qu.:15.07
## Median : 7.250 Median :159.0 Median :66.00 Median :20.10
## Mean : 7.788 Mean :170.8 Mean :65.54 Mean :21.23
## 3rd Qu.:11.250 3rd Qu.:249.0 3rd Qu.:77.75 3rd Qu.:26.18
## Max. :17.400 Max. :337.0 Max. :91.00 Max. :46.00
par(mfrow=c(2,2))
plot(USArrests.som, what="obs", type="color", variable=1, print.title=TRUE,
main="Murder")
plot(USArrests.som, what="obs", type="color", variable=2, print.title=TRUE,
main="Assault")
plot(USArrests.som, what="obs", type="color", variable=3, print.title=TRUE,
main="UrbanPop")
plot(USArrests.som, what="obs", type="color", variable=4, print.title=TRUE,
main="Rape")
plot(USArrests.som, what="prototypes", type="lines", print.title=TRUE)
plot(USArrests.som, what="obs", type="barplot", print.title=TRUE)
plot(USArrests.som, what="obs", type="radar", key.loc=c(-0.5,5), mar=c(0,10,2,0))
Boxplot
plot(USArrests.som, what="obs", type="boxplot", print.title=TRUE)
rownames(USArrests)
## [1] "Alabama" "Alaska" "Arizona" "Arkansas"
## [5] "California" "Colorado" "Connecticut" "Delaware"
## [9] "Florida" "Georgia" "Hawaii" "Idaho"
## [13] "Illinois" "Indiana" "Iowa" "Kansas"
## [17] "Kentucky" "Louisiana" "Maine" "Maryland"
## [21] "Massachusetts" "Michigan" "Minnesota" "Mississippi"
## [25] "Missouri" "Montana" "Nebraska" "Nevada"
## [29] "New Hampshire" "New Jersey" "New Mexico" "New York"
## [33] "North Carolina" "North Dakota" "Ohio" "Oklahoma"
## [37] "Oregon" "Pennsylvania" "Rhode Island" "South Carolina"
## [41] "South Dakota" "Tennessee" "Texas" "Utah"
## [45] "Vermont" "Virginia" "Washington" "West Virginia"
## [49] "Wisconsin" "Wyoming"
plot(USArrests.som, what="obs", type="names", print.title=TRUE, scale=c(0.9,0.5))
## Warning in wordcloud(scale = c(0.9, 0.5), colors = structure(c("#6A51A3", :
## New York could not be fit on page. It will not be plotted.
Quality analysis of projection
quality(USArrests.som)
## $topographic
## [1] 0
##
## $quantization
## [1] 1.53119
plot(superClass(USArrests.som))
## Warning in plot.somSC(superClass(USArrests.som)): Impossible to plot the rectangles: no super clusters.
help(HairEyeColor)
require(graphics)
## Full mosaic
mosaicplot(HairEyeColor)
## Aggregate over sex (as in Snee's original data)
x <- apply(HairEyeColor, c(1, 2), sum)
x
## Eye
## Hair Brown Blue Hazel Green
## Black 68 20 15 5
## Brown 119 84 54 29
## Red 26 17 14 14
## Blond 7 94 10 16
mosaicplot(x, main = "Relation between hair and eye color")
data(HairEyeColor)
apply(HairEyeColor, 2, sum)
## Brown Blue Hazel Green
## 220 215 93 64
set.seed(01091407)
HairEyeColor.som <- trainSOM(x.data=HairEyeColor[,,1], dimension=c(8,8),
type="korresp", scaling="chi2", nb.save=10,
radius.type="letremy")
HairEyeColor.som
## Self-Organizing Map object...
## online learning, type: korresp
## 8 x 8 grid with square topology
## neighbourhood type: letremy
## distance type: letremy
summary(HairEyeColor)
## Number of cases in table: 592
## Number of factors: 3
## Test for independence of all factors:
## Chisq = 164.92, df = 24, p-value = 5.321e-23
## Chi-squared approximation may be incorrect
plot(HairEyeColor.som, what="energy")
HairEyeColor.som$clustering
## Brown Blue Hazel Green Black Brown Red Blond
## 9 53 33 50 17 7 64 45
plot(HairEyeColor.som, what="obs", type="hitmap")
plot(HairEyeColor.som, what="obs", type="names", scale=c(0.9,0.5))
Clustering
plot(HairEyeColor.som, what="prototypes", type="lines", view="r", print.title=TRUE)
plot(HairEyeColor.som, what="prototypes", type="lines", view="c", print.title=TRUE)
plot(HairEyeColor.som, what="prototypes", type="color")
plot(HairEyeColor.som, what="prototypes", type="3d")
plot(HairEyeColor.som, what="prototypes", type="poly.dist", print.title=TRUE)
plot(HairEyeColor.som, what="prototypes", type="umatrix", print.title=TRUE)
plot(HairEyeColor.som, what="prototypes", type="smooth.dist", print.title=TRUE)
plot(HairEyeColor.som, what="prototypes", type="mds")
plot(HairEyeColor.som, what="prototypes", type="grid.dist")
quality(HairEyeColor.som)
## $topographic
## [1] 0.25
##
## $quantization
## [1] 0.3439474