Analysis datasets USArrests

summary(USArrests)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00
require(graphics)
pairs(USArrests, panel = panel.smooth, main = "USArrests data")

library(SOMbrero)
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## 
## ***********************************************************
## 
##       This is 'SOMbrero' package, v 1.2
## 
## Citation details with citation('SOMbrero')
## 
## Further information with help(SOMbrero)...
## 
## Use sombreroGUI() to start the Graphical Interface.
## 
## ***********************************************************
set.seed(255)
# run the SOM algorithm with verbose set to TRUE
USArrests.som <- trainSOM(x.data=USArrests[,1:4], verbose=TRUE, nb.save=5)
## Self-Organizing Map algorithm...
## 
##   Parameters of the SOM
## 
##     SOM mode                       :  online 
##     SOM type                       :  numeric 
##     Affectation type               :  standard 
##     Grid                           : 
##       Self-Organizing Map structure
## 
##         Features   :
##            topology     :  square 
##            x dimension  :  5 
##            y dimension  :  5 
##            distance type:  euclidean 
## 
##     Number of iterations           :  250 
##     Number of intermediate backups :  5 
##     Initializing prototypes method :  random 
##     Data pre-processing type       :  unitvar 
##     Neighbourhood type             :  gaussian 
## 
## 0 % done
## 10 % done
## 20 % done
## 30 % done
## 40 % done
## 50 % done
## 60 % done
## 70 % done
## 80 % done
## 90 % done
## 100 % done
USArrests
##                Murder Assault UrbanPop Rape
## Alabama          13.2     236       58 21.2
## Alaska           10.0     263       48 44.5
## Arizona           8.1     294       80 31.0
## Arkansas          8.8     190       50 19.5
## California        9.0     276       91 40.6
## Colorado          7.9     204       78 38.7
## Connecticut       3.3     110       77 11.1
## Delaware          5.9     238       72 15.8
## Florida          15.4     335       80 31.9
## Georgia          17.4     211       60 25.8
## Hawaii            5.3      46       83 20.2
## Idaho             2.6     120       54 14.2
## Illinois         10.4     249       83 24.0
## Indiana           7.2     113       65 21.0
## Iowa              2.2      56       57 11.3
## Kansas            6.0     115       66 18.0
## Kentucky          9.7     109       52 16.3
## Louisiana        15.4     249       66 22.2
## Maine             2.1      83       51  7.8
## Maryland         11.3     300       67 27.8
## Massachusetts     4.4     149       85 16.3
## Michigan         12.1     255       74 35.1
## Minnesota         2.7      72       66 14.9
## Mississippi      16.1     259       44 17.1
## Missouri          9.0     178       70 28.2
## Montana           6.0     109       53 16.4
## Nebraska          4.3     102       62 16.5
## Nevada           12.2     252       81 46.0
## New Hampshire     2.1      57       56  9.5
## New Jersey        7.4     159       89 18.8
## New Mexico       11.4     285       70 32.1
## New York         11.1     254       86 26.1
## North Carolina   13.0     337       45 16.1
## North Dakota      0.8      45       44  7.3
## Ohio              7.3     120       75 21.4
## Oklahoma          6.6     151       68 20.0
## Oregon            4.9     159       67 29.3
## Pennsylvania      6.3     106       72 14.9
## Rhode Island      3.4     174       87  8.3
## South Carolina   14.4     279       48 22.5
## South Dakota      3.8      86       45 12.8
## Tennessee        13.2     188       59 26.9
## Texas            12.7     201       80 25.5
## Utah              3.2     120       80 22.9
## Vermont           2.2      48       32 11.2
## Virginia          8.5     156       63 20.7
## Washington        4.0     145       73 26.2
## West Virginia     5.7      81       39  9.3
## Wisconsin         2.6      53       66 10.8
## Wyoming           6.8     161       60 15.6
plot(USArrests.som, what="energy")

plot(USArrests.som, what="obs", type="hitmap")

summary(USArrests)
##      Murder          Assault         UrbanPop          Rape      
##  Min.   : 0.800   Min.   : 45.0   Min.   :32.00   Min.   : 7.30  
##  1st Qu.: 4.075   1st Qu.:109.0   1st Qu.:54.50   1st Qu.:15.07  
##  Median : 7.250   Median :159.0   Median :66.00   Median :20.10  
##  Mean   : 7.788   Mean   :170.8   Mean   :65.54   Mean   :21.23  
##  3rd Qu.:11.250   3rd Qu.:249.0   3rd Qu.:77.75   3rd Qu.:26.18  
##  Max.   :17.400   Max.   :337.0   Max.   :91.00   Max.   :46.00
par(mfrow=c(2,2))
plot(USArrests.som, what="obs", type="color", variable=1, print.title=TRUE, 
     main="Murder")
plot(USArrests.som, what="obs", type="color", variable=2, print.title=TRUE, 
     main="Assault")
plot(USArrests.som, what="obs", type="color", variable=3, print.title=TRUE, 
     main="UrbanPop")
plot(USArrests.som, what="obs", type="color", variable=4, print.title=TRUE, 
     main="Rape")

plot(USArrests.som, what="prototypes", type="lines", print.title=TRUE)

plot(USArrests.som, what="obs", type="barplot", print.title=TRUE)

plot(USArrests.som, what="obs", type="radar", key.loc=c(-0.5,5), mar=c(0,10,2,0))

Boxplot

plot(USArrests.som, what="obs", type="boxplot", print.title=TRUE)

rownames(USArrests)
##  [1] "Alabama"        "Alaska"         "Arizona"        "Arkansas"      
##  [5] "California"     "Colorado"       "Connecticut"    "Delaware"      
##  [9] "Florida"        "Georgia"        "Hawaii"         "Idaho"         
## [13] "Illinois"       "Indiana"        "Iowa"           "Kansas"        
## [17] "Kentucky"       "Louisiana"      "Maine"          "Maryland"      
## [21] "Massachusetts"  "Michigan"       "Minnesota"      "Mississippi"   
## [25] "Missouri"       "Montana"        "Nebraska"       "Nevada"        
## [29] "New Hampshire"  "New Jersey"     "New Mexico"     "New York"      
## [33] "North Carolina" "North Dakota"   "Ohio"           "Oklahoma"      
## [37] "Oregon"         "Pennsylvania"   "Rhode Island"   "South Carolina"
## [41] "South Dakota"   "Tennessee"      "Texas"          "Utah"          
## [45] "Vermont"        "Virginia"       "Washington"     "West Virginia" 
## [49] "Wisconsin"      "Wyoming"
plot(USArrests.som, what="obs", type="names", print.title=TRUE, scale=c(0.9,0.5))
## Warning in wordcloud(scale = c(0.9, 0.5), colors = structure(c("#6A51A3", :
## New York could not be fit on page. It will not be plotted.

Quality analysis of projection

quality(USArrests.som)
## $topographic
## [1] 0
## 
## $quantization
## [1] 1.53119
plot(superClass(USArrests.som))
## Warning in plot.somSC(superClass(USArrests.som)): Impossible to plot the rectangles: no super clusters.

SOM for Contingency Tables (HairEyeColor)

help(HairEyeColor)
require(graphics)
## Full mosaic
mosaicplot(HairEyeColor)

## Aggregate over sex (as in Snee's original data)
x <- apply(HairEyeColor, c(1, 2), sum)
x
##        Eye
## Hair    Brown Blue Hazel Green
##   Black    68   20    15     5
##   Brown   119   84    54    29
##   Red      26   17    14    14
##   Blond     7   94    10    16
mosaicplot(x, main = "Relation between hair and eye color")

data(HairEyeColor)
apply(HairEyeColor, 2, sum)
## Brown  Blue Hazel Green 
##   220   215    93    64
set.seed(01091407)
HairEyeColor.som <- trainSOM(x.data=HairEyeColor[,,1], dimension=c(8,8),
                        type="korresp", scaling="chi2", nb.save=10,
                        radius.type="letremy")
HairEyeColor.som
##       Self-Organizing Map object...
##          online learning, type: korresp 
##          8 x 8 grid with square topology
##          neighbourhood type: letremy 
##          distance type: letremy
summary(HairEyeColor)
## Number of cases in table: 592 
## Number of factors: 3 
## Test for independence of all factors:
##  Chisq = 164.92, df = 24, p-value = 5.321e-23
##  Chi-squared approximation may be incorrect
plot(HairEyeColor.som, what="energy")

HairEyeColor.som$clustering
## Brown  Blue Hazel Green Black Brown   Red Blond 
##     9    53    33    50    17     7    64    45
plot(HairEyeColor.som, what="obs", type="hitmap")

plot(HairEyeColor.som, what="obs", type="names", scale=c(0.9,0.5))

Clustering

plot(HairEyeColor.som, what="prototypes", type="lines", view="r", print.title=TRUE)

plot(HairEyeColor.som, what="prototypes", type="lines", view="c", print.title=TRUE)

plot(HairEyeColor.som, what="prototypes", type="color")

plot(HairEyeColor.som, what="prototypes", type="3d")

plot(HairEyeColor.som, what="prototypes", type="poly.dist", print.title=TRUE)

plot(HairEyeColor.som, what="prototypes", type="umatrix", print.title=TRUE)

plot(HairEyeColor.som, what="prototypes", type="smooth.dist", print.title=TRUE)

plot(HairEyeColor.som, what="prototypes", type="mds")

plot(HairEyeColor.som, what="prototypes", type="grid.dist")

quality(HairEyeColor.som)
## $topographic
## [1] 0.25
## 
## $quantization
## [1] 0.3439474