Sztuczne_sieci_neuronowe_SOM

Majkowska Agata

semestr letni 2025

Samoucząca się sieć neuronowa - SOM

Biblioteki

library(kohonen)
library(ggplot2)
library(clusterSim)
library(dplyr)

Zbiór danych

Wczytanie danych

ads <- read.csv("C:/Users/majko/OneDrive/Dokumenty/Zajecia_WZR/Zajecia 2021-2022/Podypolomowe_2021-22/Sztuczne_sieci_neuronowe/KAG_conversion_data.csv") %>% 
  glimpse()
## Rows: 1,143
## Columns: 11
## $ ad_id               <int> 708746, 708749, 708771, 708815, 708818, 708820, 70~
## $ xyz_campaign_id     <int> 916, 916, 916, 916, 916, 916, 916, 916, 916, 916, ~
## $ fb_campaign_id      <int> 103916, 103917, 103920, 103928, 103928, 103929, 10~
## $ age                 <chr> "30-34", "30-34", "30-34", "30-34", "30-34", "30-3~
## $ gender              <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", ~
## $ interest            <int> 15, 16, 20, 28, 28, 29, 15, 16, 27, 28, 31, 7, 16,~
## $ Impressions         <int> 7350, 17861, 693, 4259, 4133, 1915, 15615, 10951, ~
## $ Clicks              <int> 1, 2, 0, 1, 1, 0, 3, 1, 1, 3, 0, 0, 0, 0, 7, 0, 1,~
## $ Spent               <dbl> 1.43, 1.82, 0.00, 1.25, 1.29, 0.00, 4.77, 1.27, 1.~
## $ Total_Conversion    <int> 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ Approved_Conversion <int> 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,~

Przygotowanie danych

ads <- ads %>% 
  mutate(ad_id = as.factor(ad_id),
         xyz_campaign_id = as.factor(xyz_campaign_id),
         fb_campaign_id = as.factor(fb_campaign_id)) %>% 
  glimpse()
## Rows: 1,143
## Columns: 11
## $ ad_id               <fct> 708746, 708749, 708771, 708815, 708818, 708820, 70~
## $ xyz_campaign_id     <fct> 916, 916, 916, 916, 916, 916, 916, 916, 916, 916, ~
## $ fb_campaign_id      <fct> 103916, 103917, 103920, 103928, 103928, 103929, 10~
## $ age                 <chr> "30-34", "30-34", "30-34", "30-34", "30-34", "30-3~
## $ gender              <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M", ~
## $ interest            <int> 15, 16, 20, 28, 28, 29, 15, 16, 27, 28, 31, 7, 16,~
## $ Impressions         <int> 7350, 17861, 693, 4259, 4133, 1915, 15615, 10951, ~
## $ Clicks              <int> 1, 2, 0, 1, 1, 0, 3, 1, 1, 3, 0, 0, 0, 0, 7, 0, 1,~
## $ Spent               <dbl> 1.43, 1.82, 0.00, 1.25, 1.29, 0.00, 4.77, 1.27, 1.~
## $ Total_Conversion    <int> 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ Approved_Conversion <int> 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0,~
levels(ads$xyz_campaign_id)
## [1] "916"  "936"  "1178"

Zmiana zmiennej przedziałowej na zmienne numeryczne

ads.s <- ads %>% 
  mutate(genderM = ifelse(gender == "M", 1, 0),
         age2 = ifelse(age == "35-39", 1, 0),
         age3 = ifelse(age == "40-44", 1, 0),
         age4 = ifelse(age == "45-49", 1, 0)) %>% 
  select(-c(1,3:5))

Skalowanie danych i utworzenie macierzy

ads.train <- as.matrix(scale(ads.s[,-1]))

Budowa sieci

Struktura sieci:

Liczba neuronów

Kształt sieci:

-arkusz

-cylinder

-toroid

Topologia sieci:

-prostokątna - “rectangular”

-heksagonalna - “hexagonal”

set.seed(100)
ads.grid <- somgrid(xdim = 5, ydim = 5, topo = "rectangular")

Utworzenie modelu SOM

set.seed(100)
ads.model <- som(ads.train, ads.grid, rlen = 500, radius = 2, keep.data = TRUE,
                 dist.fcts = "euclidean")
str(ads.model)
## List of 14
##  $ data            :List of 1
##   ..$ : num [1:1143, 1:10] -0.659 -0.622 -0.474 -0.177 -0.177 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##   .. ..- attr(*, "scaled:center")= Named num [1:10] 3.28e+01 1.87e+05 3.34e+01 5.14e+01 2.86 ...
##   .. .. ..- attr(*, "names")= chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##   .. ..- attr(*, "scaled:scale")= Named num [1:10] 2.70e+01 3.13e+05 5.69e+01 8.69e+01 4.48 ...
##   .. .. ..- attr(*, "names")= chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##  $ unit.classif    : num [1:1143] 21 21 21 21 21 21 21 21 21 21 ...
##  $ distances       : num [1:1143] 0.418 0.42 0.42 0.472 0.439 ...
##  $ grid            :List of 6
##   ..$ pts              : int [1:25, 1:2] 1 2 3 4 5 1 2 3 4 5 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "x" "y"
##   ..$ xdim             : num 5
##   ..$ ydim             : num 5
##   ..$ topo             : chr "rectangular"
##   ..$ neighbourhood.fct: Factor w/ 2 levels "bubble","gaussian": 1
##   ..$ toroidal         : logi FALSE
##   ..- attr(*, "class")= chr "somgrid"
##  $ codes           :List of 1
##   ..$ : num [1:25, 1:10] -0.0684 0.1402 -0.4992 -0.3235 -0.1598 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:25] "V1" "V2" "V3" "V4" ...
##   .. .. ..$ : chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##  $ changes         : num [1:500, 1] 0.0197 0.0173 0.0164 0.0167 0.0177 ...
##  $ alpha           : num [1:2] 0.05 0.01
##  $ radius          : num [1:2] 2 0
##  $ na.rows         : int(0) 
##  $ user.weights    : num 1
##  $ distance.weights: num 1
##  $ whatmap         : int 1
##  $ maxNA.fraction  : int 0
##  $ dist.fcts       : chr "euclidean"
##  - attr(*, "class")= chr "kohonen"
head(ads.model$unit.classif, 10 )
##  [1] 21 21 21 21 21 21 21 21 21 21
table(ads.model$unit.classif)
## 
##   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  17  18  20  21  23 
##  28  18   3  19 156  87  21  21  23  35  72  27  16  18  79  39  24  81 168  19 
##  24  25 
## 108  81
ads.model$grid
## $pts
##       x y
##  [1,] 1 1
##  [2,] 2 1
##  [3,] 3 1
##  [4,] 4 1
##  [5,] 5 1
##  [6,] 1 2
##  [7,] 2 2
##  [8,] 3 2
##  [9,] 4 2
## [10,] 5 2
## [11,] 1 3
## [12,] 2 3
## [13,] 3 3
## [14,] 4 3
## [15,] 5 3
## [16,] 1 4
## [17,] 2 4
## [18,] 3 4
## [19,] 4 4
## [20,] 5 4
## [21,] 1 5
## [22,] 2 5
## [23,] 3 5
## [24,] 4 5
## [25,] 5 5
## 
## $xdim
## [1] 5
## 
## $ydim
## [1] 5
## 
## $topo
## [1] "rectangular"
## 
## $neighbourhood.fct
## [1] bubble
## Levels: bubble gaussian
## 
## $toroidal
## [1] FALSE
## 
## attr(,"class")
## [1] "somgrid"
plot(ads.model, type = "mapping", pchs = 19, shape = "round")

plot(ads.model, type = "codes", main = "Codes Plot", palette.name = rainbow)

plot(ads.model, type = "changes")

plot(ads.model, type = "counts")

plot(ads.model, type = "dist.neighbours")

Wkład poszczególnych zmiennych

heatmap.som <- function(model){
  for (i in 1:10) {
    plot(model, type = "property", property = getCodes(model)[,i], 
         main = colnames(getCodes(model))[i]) 
  }
}
par(mfrow=c(5,2))
heatmap.som(ads.model)

Zadanie 1

Porsze podzielić zbiór na uczący i testowy.

Zadanie 2

Proszę zmieniając liczbę neuronów na 7x7, 10x10 sprawdzić co się zmieni.

KLASYFIKACJA

set.seed(100)
ads.grid <- somgrid(xdim = 10, ydim = 10, topo = "hexagonal")

set.seed(100)
ads.model <- som(ads.train, ads.grid, rlen = 500, radius = 2, keep.data = TRUE,
                 dist.fcts = "euclidean")
str(ads.model)
## List of 14
##  $ data            :List of 1
##   ..$ : num [1:1143, 1:10] -0.659 -0.622 -0.474 -0.177 -0.177 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##   .. ..- attr(*, "scaled:center")= Named num [1:10] 3.28e+01 1.87e+05 3.34e+01 5.14e+01 2.86 ...
##   .. .. ..- attr(*, "names")= chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##   .. ..- attr(*, "scaled:scale")= Named num [1:10] 2.70e+01 3.13e+05 5.69e+01 8.69e+01 4.48 ...
##   .. .. ..- attr(*, "names")= chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##  $ unit.classif    : num [1:1143] 78 70 70 80 79 79 70 78 80 80 ...
##  $ distances       : num [1:1143] 0.1651 0.2175 0.1915 0.0484 0.1467 ...
##  $ grid            :List of 6
##   ..$ pts              : num [1:100, 1:2] 1.5 2.5 3.5 4.5 5.5 6.5 7.5 8.5 9.5 10.5 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : NULL
##   .. .. ..$ : chr [1:2] "x" "y"
##   ..$ xdim             : num 10
##   ..$ ydim             : num 10
##   ..$ topo             : chr "hexagonal"
##   ..$ neighbourhood.fct: Factor w/ 2 levels "bubble","gaussian": 1
##   ..$ toroidal         : logi FALSE
##   ..- attr(*, "class")= chr "somgrid"
##  $ codes           :List of 1
##   ..$ : num [1:100, 1:10] -0.4458 -0.0535 -0.4109 -0.4974 -0.4126 ...
##   .. ..- attr(*, "dimnames")=List of 2
##   .. .. ..$ : chr [1:100] "V1" "V2" "V3" "V4" ...
##   .. .. ..$ : chr [1:10] "interest" "Impressions" "Clicks" "Spent" ...
##  $ changes         : num [1:500, 1] 0.0148 0.0125 0.013 0.0139 0.014 ...
##  $ alpha           : num [1:2] 0.05 0.01
##  $ radius          : num [1:2] 2 0
##  $ na.rows         : int(0) 
##  $ user.weights    : num 1
##  $ distance.weights: num 1
##  $ whatmap         : int 1
##  $ maxNA.fraction  : int 0
##  $ dist.fcts       : chr "euclidean"
##  - attr(*, "class")= chr "kohonen"
plot(ads.model, type = "mapping", pchs = 19, shape = "round")

neurony<-as.data.frame( ads.model$codes)
dd = (dist(neurony, method = "euclidean"))
fitc <- hclust(dd, method="ward.D")

plot(fitc, hang=-1)

source("http://addictedtor.free.fr/packages/A2R/lastVersion/R/code.R")
op = par(bg = "#EFEFEF")
par(mfrow=c(1,1))

wys<-c(0,fitc$height)
Mojena1<-mean(wys)+1.25* sd(wys)
Mojena1
## [1] 17.37922
A2Rplot(fitc, k = 6, boxes = TRUE, col.up = "gray50", 
        col.down = c("#FF6B6B","#8470FF","green4","#66CDAA","#8B7E66","grey"), main="Klasyfikacja" )

groupes <- cutree(fitc,k=6)
table(groupes)
## groupes
##  1  2  3  4  5  6 
## 21 23  1 23 17 15
plot(ads.model,type="mapping",bgcol=c("steelblue1","sienna1","yellowgreen","grey","pink")[groupes])
add.cluster.boundaries(ads.model,clustering=groupes)

wnk.korelacje.kofenetyczne <- c()
met.dist <- c( "euclidean")
met.hcl <- c("single", "complete", "average","ward.D","centroid")

for(i in 1:1){ for(j in 1:5){ dds = dist(neurony, method = met.dist[i])
hcdds = hclust((dds), method = met.hcl[j])
hcdds.c = cophenetic(hcdds)
korelacja.kofenetyczna = cor(hcdds.c, dds)
wnk.korelacje.kofenetyczne = c(wnk.korelacje.kofenetyczne, korelacja.kofenetyczna)}}

wnk.korelacje.kofenetyczne = matrix(round( wnk.korelacje.kofenetyczne, 5), ncol = 1)

colnames(wnk.korelacje.kofenetyczne)<-"euclidean"
met.hcl1 <- c("single", "complete", "average","ward.D", "centroid")
rownames(wnk.korelacje.kofenetyczne)<-met.hcl1
wnk.korelacje.kofenetyczne
##          euclidean
## single     0.89785
## complete   0.87158
## average    0.90906
## ward.D     0.63975
## centroid   0.86199