Sztuczne_sieci_neuronowe

Majkowska Agata

semestr letni 2025

Wstęp

Biblioteki

library(DALEX)
library(DALEXtra)
library(keras)
library(tensorflow)
library(titanic)
library(fastDummies)
library(ggplot2)
library(clusterSim)
library(dplyr)

Budowa zbiorów

funkcja liniowa

x=seq(from=0, to=1, length=50)

y=seq(from=3 , to = 6, length=50)
liniowa=cbind(x,y)
plot(x,y)

funkcja sigmoidalna

set.seed(123)

x=seq(-10,10,0.01)

y=sin(x)

data = as.data.frame(
  x = x,
  y = y
)

### Zbiór z szumem

data$y = y + rnorm(n = nrow(data), mean = 0, sd = 0.2)

ggplot(data = data,
       aes(x, y)) +
  geom_point() +
  ggtitle("Sinusoida",subtitle = "W przedziale od -10 do 10") +
  xlab("Oś X") +
  ylab("Oś Y") +
  theme_bw()

Pierwsza sztuczna sieć nauronowa : funkcja liniowa

Instalacja pakietów z biblioteki tensorflow Keras

# reticulate::install_miniconda()
# tensorflow::install_tensorflow()

Do poczytania:

https://tensorflow.rstudio.com/guide/keras/

https://cran.r-project.org/web/packages/keras/keras.pdf

https://www.tensorflow.org/api_docs/python/tf/keras/Model

Budowa modelu

model_keras <- keras_model_sequential() %>% 
  layer_dense(units = 10, 
              activation = "relu", # 
              input_shape = c(1)) %>% 
  layer_dense(units = 1, activation = "linear", 
              kernel_initializer='normal')

summary(model_keras)

## Model: "sequential"
## ________________________________________________________________________________
##  Layer (type)                       Output Shape                    Param #     
## ================================================================================
##  dense_1 (Dense)                    (None, 10)                      20          
##  dense (Dense)                      (None, 1)                       11          
## ================================================================================
## Total params: 31
## Trainable params: 31
## Non-trainable params: 0
## ________________________________________________________________________________

Etap kompilacji

model_keras %>% compile(
  optimizer = optimizer_sgd(learning_rate = 0.01),
  loss = "mean_squared_error",
  metrics = c("mse")
)

Trening sieci

history <- model_keras %>% fit(
  liniowa[ , 1],
  liniowa[,2],
  epochs = 100,
  batch_size=10,
  validation_split = 0.0
)

Predykcja

y=predict(model_keras, as.matrix(liniowa[,1]))
pred<-data.frame(y=predict(model_keras, as.matrix(liniowa[,1])))
df<-data.frame(pred, liniowa)
head(df,10)

##           y          x      y.1
## 1  3.079777 0.00000000 3.000000
## 2  3.138037 0.02040816 3.061224
## 3  3.196297 0.04081633 3.122449
## 4  3.254558 0.06122449 3.183673
## 5  3.312818 0.08163265 3.244898
## 6  3.371078 0.10204082 3.306122
## 7  3.429338 0.12244898 3.367347
## 8  3.487598 0.14285714 3.428571
## 9  3.545858 0.16326531 3.489796
## 10 3.604118 0.18367347 3.551020

Wizualizacja

ggplot(data = df, aes(x, y=value, color= )) +
  geom_point(aes(y=y.1, col='wartości rzeczywiste')) +
  geom_point(aes(y=y, col='predykcje')) +
  ggtitle("Funkcja liniowa",subtitle = " ") +
  theme_bw()

Funckja sigmoidalna

Budowa modelu

model_keras <- keras_model_sequential() %>%  
  layer_dense(units = 128, # liczba nauronów
              activation = "sigmoid", # funkcja aktywacji
              input_shape = c(1)) %>%  # liczba wymiarów w zbiorze wejściowym
  layer_dense(units = 64, activation = "sigmoid") %>%  # dodanie kolejnej warstwy (analogicznie jak w w warstwie wejściowej, z wyłączeniem wymiaru zbioru)
  layer_dense(units = 32, activation = "sigmoid") %>%
  layer_dense(units = 16, activation = "sigmoid") %>%
  layer_dense(units = 8, activation = "sigmoid") %>%
  layer_dense(units = 1, activation = "linear", 
              kernel_initializer='normal')      # w warstwie wyjściowej liczba nauronów odpowiada wymiarowi predykcji

Etap kompilacji

model_keras %>% compile(
  optimizer = optimizer_sgd(learning_rate = 0.01),  # metoda optymalizacji wag 
  loss = "mean_squared_error",   # funkcja kosztu
  metrics = c("mse") # metryka 
)

Trening sieci

history <- model_keras %>% fit(
  data[ , 1], # zbiór wejściowy
  data[,2],  # wartość oczekiwana
  epochs = 100, # liczba iteracji
  validation_split = 0.0  # podział zbioru na cześc treningową i walidacyjną -> nie ma potrzeby podziału ręcznego
)

Predykcja

y<-predict(model_keras, as.matrix(data[,1]))
head(y,10)

##              [,1]
##  [1,] 0.003263231
##  [2,] 0.003263395
##  [3,] 0.003263563
##  [4,] 0.003263704
##  [5,] 0.003263872
##  [6,] 0.003264036
##  [7,] 0.003264192
##  [8,] 0.003264349
##  [9,] 0.003264520
## [10,] 0.003264677

Wizualizacja

pred<-data.frame(y=predict(model_keras, as.matrix(data[,1])))
df<-data.frame(pred, data)

ggplot(data = df, aes(x, y=value, color= )) +
  geom_point(aes(y=y.1, col='wartości rzeczywiste')) +
  geom_point(aes(y=y, col='predykcje')) +
  ggtitle("Funkcja Sinusoidalna",subtitle = "Sztuczna sieć neuronowa ") +
  theme_bw()

Zadanie 1

Wytrenuj sieć, która dopasuje się do zbioru sinusoidy. Zmieniając parametry iteracji, jak i funkcji aktywacji warst ukrytych na ‘relu’.

Notowania cen

dane <- read.csv("C:/Users/majko/OneDrive/Dokumenty/Zajecia_WZR/Zajecia 2021-2022/Podypolomowe_2021-22/Sztuczne_sieci_neuronowe/Stock.csv")

# head(dane,10)

summary(dane)

##      Date                Low              Open            Volume        
##  Length:12356       Min.   : 2.625   Min.   : 0.000   Min.   :       0  
##  Class :character   1st Qu.: 6.625   1st Qu.: 4.125   1st Qu.:   21200  
##  Mode  :character   Median :13.125   Median :12.480   Median :  110900  
##                     Mean   :21.880   Mean   :20.390   Mean   :  838617  
##                     3rd Qu.:34.525   3rd Qu.:35.205   3rd Qu.: 1389475  
##                     Max.   :85.380   Max.   :88.080   Max.   :15153000  
##       High            Close        Adjusted.Close  
##  Min.   : 3.000   Min.   : 2.812   Min.   : 2.156  
##  1st Qu.: 6.875   1st Qu.: 6.750   1st Qu.: 5.274  
##  Median :13.500   Median :13.290   Median :10.591  
##  Mean   :22.624   Mean   :22.220   Mean   :19.283  
##  3rd Qu.:35.920   3rd Qu.:35.242   3rd Qu.:31.111  
##  Max.   :89.230   Max.   :87.130   Max.   :83.969

dane1<-dane%>%
  mutate(Date1=c(1:12356))

dane1<- as.data.frame( cbind(dane1$Date1, dane$Close))

# dane1<-as.matrix(dane1)
dane1[,2]<-data.Normalization(dane[,2], type = "n5")

head(dane1,10)

##    V1         V2
## 1   1 -0.2835333
## 2   2 -0.2835333
## 3   3 -0.2835333
## 4   4 -0.2855018
## 5   5 -0.2855018
## 6   6 -0.2855018
## 7   7 -0.2795963
## 8   8 -0.2855018
## 9   9 -0.2835333
## 10 10 -0.2874702

dane1<-dane1%>%
  top_n(-1000, V1)

summary(dane1)

##        V1               V2         
##  Min.   :   1.0   Min.   :-0.2993  
##  1st Qu.: 250.8   1st Qu.:-0.2816  
##  Median : 500.5   Median :-0.2638  
##  Mean   : 500.5   Mean   :-0.2592  
##  3rd Qu.: 750.2   3rd Qu.:-0.2461  
##  Max.   :1000.0   Max.   :-0.1615

ggplot(data = dane1,
       aes(V1, V2)) +
  geom_line() +
  ggtitle("",subtitle = "") +
  xlab("Oś X") +
  ylab("Oś Y") +
  theme_bw()

model_cena <- keras_model_sequential() %>%  
  layer_dense(units = 128, 
              activation = "relu", 
              input_shape = c(1)
              ) %>%  
  layer_dense(units = 64, activation = "relu",kernel_initializer='normal') %>%   
  layer_dense(units = 64, activation = "relu",kernel_initializer='normal') %>%
  layer_dense(units = 10, activation = "relu") %>%
  layer_dense(units = 64, activation = "elu",kernel_initializer='normal') %>%
  layer_dense(units = 64, activation = "elu",kernel_initializer='normal') %>%
  layer_dense(units = 1, activation = "linear", 
              kernel_initializer='normal')

model_cena %>% compile(
  optimizer = optimizer_sgd(learning_rate = 0.01),  # metoda optymalizacji wag 
  loss = "mean_squared_error",   # funkcja kosztu
  metrics = c("mse") # metryka 
)

Trening sieci

history <- model_cena %>% fit(
  dane1[ , 1], 
  dane1[,2],  
  epochs = 100, # liczba iteracji
  batch_size=20,
  validation_split = 0.0  # podział zbioru na cześc treningową i walidacyjną -> nie ma potrzeby podziału ręcznego
)

Predykcja

p<-predict(model_cena, as.matrix(dane1[,1]))
head(p,10)

##             [,1]
##  [1,] -0.2750418
##  [2,] -0.2747234
##  [3,] -0.2744077
##  [4,] -0.2740923
##  [5,] -0.2737779
##  [6,] -0.2734644
##  [7,] -0.2731519
##  [8,] -0.2728405
##  [9,] -0.2725299
## [10,] -0.2722203

Wizualizacja

pred<-data.frame(p)
df<-data.frame(pred, dane1)

ggplot(data = df, aes(V1,y = value ,color= )) +
  geom_line(aes(y=V2, col='wartości rzeczywiste')) +
  geom_line(aes(y=p, col='predykcje')) +
  ggtitle("Notowania cen",subtitle = "Predykcja ") +
  theme_bw()