Load libraries
library(kohonen)
## Warning: package 'kohonen' was built under R version 4.1.2
library(tidyr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.1.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggpubr)
## Loading required package: ggplot2
library(RColorBrewer)
library(countrycode)
## Warning: package 'countrycode' was built under R version 4.1.2
library(sf)
## Linking to GEOS 3.8.1, GDAL 3.2.1, PROJ 7.2.1
library(tmap)
## Registered S3 methods overwritten by 'stars':
## method from
## st_bbox.SpatRaster sf
## st_crs.SpatRaster sf
Read in Data
housing = read.csv("housing.csv")
head(housing)
## longitude latitude housing_median_age total_rooms total_bedrooms population
## 1 -122.23 37.88 41 880 129 322
## 2 -122.22 37.86 21 7099 1106 2401
## 3 -122.24 37.85 52 1467 190 496
## 4 -122.25 37.85 52 1274 235 558
## 5 -122.25 37.85 52 1627 280 565
## 6 -122.25 37.85 52 919 213 413
## households median_income median_house_value ocean_proximity
## 1 126 8.3252 452600 NEAR BAY
## 2 1138 8.3014 358500 NEAR BAY
## 3 177 7.2574 352100 NEAR BAY
## 4 219 5.6431 341300 NEAR BAY
## 5 259 3.8462 342200 NEAR BAY
## 6 193 4.0368 269700 NEAR BAY
Clean up data to only variables needed
housing <- housing %>%
filter(!is.na(total_bedrooms))
housing <- housing %>%
mutate(avg_rooms = total_rooms / households,
bedroom_ratio = total_bedrooms / total_rooms)
housing <- housing %>%
select(-longitude, -latitude, -total_rooms, -total_bedrooms,
-ocean_proximity, -households)
names(housing)
## [1] "housing_median_age" "population" "median_income"
## [4] "median_house_value" "avg_rooms" "bedroom_ratio"
housing_scale <- housing %>%
mutate(
housing_median_age = scale(log(housing_median_age)),
population = scale(log(population)),
median_income = scale(log(median_income)),
median_house_value = scale(log(median_house_value)),
avg_rooms = scale(log(avg_rooms)),
bedroom_ratio = scale(log(bedroom_ratio))
)
Train the SOM
som_grid = somgrid(xdim = 30, ydim = 20, topo = "hexagonal")
housing_mat = as.matrix(housing_scale)
housing_som = som(housing_mat, som_grid)
Plotting the change in the loss function
plot(housing_som, type = "changes")
Stabilize the loss function
housing_som = som(housing_mat, som_grid, rlen = 1000)
plot(housing_som, type = "changes")
A plot of the counts per nodes
plot(housing_som, type = "counts")
A plot of the codebook vectors
plot(housing_som, type = "code")
One or more heatmaps of the individual features on the SOM
plot(housing_som, type = "property", property = housing_som$codes[[1]][,"population"],
main = "population")
plot(housing_som, type = "property", property = housing_som$codes[[1]][,"housing_median_age"],
main = "housing_median_age")
A set of clusters constructed from the SOM nodes using the method in the lab (and a figure showing these on the SOM grid)
dist_codes = dist(housing_som$codes[[1]])
som_cluster = cutree(hclust(dist_codes), 6)
mypal = brewer.pal(6, "Set2")
plot(housing_som, type="mapping", bgcol = mypal[som_cluster],
main = "Clusters", pch = '.')
add.cluster.boundaries(housing_som, som_cluster)
Make clusters contiguous
dist_grid = unit.distances(som_grid)
dist_adj = as.matrix(dist_codes) * dist_grid
clust_adj = hclust(as.dist(dist_adj), 'ward.D2')
som_cluster_adj = cutree(clust_adj, 6)
plot(housing_som, type="mapping", bgcol = mypal[som_cluster_adj],
main = "Clusters", pch = '.')
add.cluster.boundaries(housing_som, som_cluster_adj)
A table given the average values of each feature per cluster
nodeByVariable <- housing_som$unit.classif
housing_scale$somclust <-
as.factor(som_cluster_adj[nodeByVariable])
housing$somclust <- housing_scale$somclust
housing_clusters <- merge(som_cluster_adj, housing_scale)
cluster_vals <- aggregate(housing[,1:6],
by = list(housing_scale$somclust), mean)
cluster_vals
## Group.1 housing_median_age population median_income median_house_value
## 1 1 29.59195 1306.1487 7.044211 362435.4
## 2 2 38.94354 863.0682 4.084544 251648.4
## 3 3 35.04350 1394.9215 2.309950 149567.6
## 4 4 17.53032 2009.6817 4.606481 202741.2
## 5 5 24.79307 1283.7228 2.899186 105242.1
## 6 6 24.23419 1524.2623 3.562486 254211.7
## avg_rooms bedroom_ratio
## 1 6.955522 0.1505183
## 2 5.260730 0.2004679
## 3 4.148897 0.2688367
## 4 5.856763 0.1839476
## 5 6.456228 0.2033079
## 6 4.608714 0.2411684