Torch_Deep learning

0.1 다층 신경망 모델

diamonds(ggplot2) 데이터 활용해 다이아몬드 가격 예측

torch 설치 문제

Can you try installing the VC++ redistributable runtime from here: https://support.microsoft.com/en-us/help/2977003/the-latest-supported-visual-c-downloads x64: vc_redist.x64.exe 설치

0.1.1 packages

library("torch")

library("Metrics")
library("ggplot2")
library("fastDummies")
library("caTools")
library("torch")

0.1.2 데이터 준비

# 오차를 시각적으로 확인하기 위해 ggplot2 를 활용한다. 추가로 사용자 정의 함수 minmax()는 정규화를 위해 지정하였다.
minmax = function(x){
  return((x - min(x)) /(max(x) - min(x)))
}

# 데이터 준비

df = as.data.frame(diamonds)
head(df)

##   carat       cut color clarity depth table price    x    y    z
## 1  0.23     Ideal     E     SI2  61.5    55   326 3.95 3.98 2.43
## 2  0.21   Premium     E     SI1  59.8    61   326 3.89 3.84 2.31
## 3  0.23      Good     E     VS1  56.9    65   327 4.05 4.07 2.31
## 4  0.29   Premium     I     VS2  62.4    58   334 4.20 4.23 2.63
## 5  0.31      Good     J     SI2  63.3    58   335 4.34 4.35 2.75
## 6  0.24 Very Good     J    VVS2  62.8    57   336 3.94 3.96 2.48

df_classes = unlist(lapply(sapply(df, FUN = "class"), FUN = "[", 1))
df_classes

##     carat       cut     color   clarity     depth     table     price         x 
## "numeric" "ordered" "ordered" "ordered" "numeric" "numeric" "integer" "numeric" 
##         y         z 
## "numeric" "numeric"

# 변수 속성이 순서형(ordered factor)이면 가변수 변환 함수가 제대로 동작하지 않기 때문에 다음과 같이 순서형 변수를 모두 문자형으로 변환해주었다.
pos_ordered = which(df_classes == "ordered")
df[, pos_ordered] = sapply(df[, pos_ordered], FUN = "as.character")
unlist(lapply(sapply(df, FUN = "class"), FUN = "[", 1))

##       carat         cut       color     clarity       depth       table 
##   "numeric" "character" "character" "character"   "numeric"   "numeric" 
##       price           x           y           z 
##   "integer"   "numeric"   "numeric"   "numeric"

# 물론 순서형 변수는 가변수 변환이 아니라 수치형으로 변환하여 활용해도 되는데 그 때는 as.numeric() 함수를 적용하면 되나 일단 factDummies 패키지의 dummy_cols() 함수로 가변수 변환을 해주었다. 여기서 가변수 변환 대상이 되는 변수의 제거를 쉽게 하기 위해 remove_select_columns = TRUE 로 지정해주었다. 다음으로는 신경망 학습을 보다 원활하게 하기 위해 사전에 정의한 minmax() 함수로 정규화를 해줬는데 이 과정을 생략하면 신경망 학습시 에러가 발생하거나 멈출 수 있으니(torch 패키지 버전이 높아지면 해결 될 수 있음) 주의하도록 한다.
df_dum = dummy_cols(df, 
                    remove_first_dummy = TRUE,
                    remove_selected_columns = TRUE)
df_dum = apply(X = df_dum, MARGIN = 2, FUN = "minmax")
head(df_dum, 2)

##            carat     depth     table price         x          y          z
## [1,] 0.006237006 0.5138889 0.2307692     0 0.3677840 0.06757216 0.07641509
## [2,] 0.002079002 0.4666667 0.3461538     0 0.3621974 0.06519525 0.07264151
##      cut_Good cut_Ideal cut_Premium cut_Very Good color_E color_F color_G
## [1,]        0         1           0             0       1       0       0
## [2,]        0         0           1             0       1       0       0
##      color_H color_I color_J clarity_IF clarity_SI1 clarity_SI2 clarity_VS1
## [1,]       0       0       0          0           0           1           0
## [2,]       0       0       0          0           1           0           0
##      clarity_VS2 clarity_VVS1 clarity_VVS2
## [1,]           0            0            0
## [2,]           0            0            0

# 데이터 분할 절차는 다음과 같이 한다. caTools 패키지를 활용하였으나 보다 간단하게 하려면 단순 sample() 함수를 사용해도 된다.
set.seed(123)
split_logical = sample.split(df_dum[, 1], SplitRatio = 0.8)
head(split_logical)

## [1]  TRUE FALSE FALSE  TRUE  TRUE  TRUE

## [1]  TRUE FALSE FALSE  TRUE  TRUE  TRUE

df_dum_train_x = df_dum[split_logical == TRUE, -4] # without price
df_dum_train_y = df_dum[split_logical == TRUE,  4] # price
df_dum_test  = df_dum[split_logical == FALSE, ]
head(df_dum_train_x, 2)

##            carat     depth     table         x          y          z cut_Good
## [1,] 0.006237006 0.5138889 0.2307692 0.3677840 0.06757216 0.07641509        0
## [2,] 0.018711019 0.5388889 0.2884615 0.3910615 0.07181664 0.08270440        0
##      cut_Ideal cut_Premium cut_Very Good color_E color_F color_G color_H
## [1,]         1           0             0       1       0       0       0
## [2,]         0           1             0       0       0       0       0
##      color_I color_J clarity_IF clarity_SI1 clarity_SI2 clarity_VS1 clarity_VS2
## [1,]       0       0          0           0           1           0           0
## [2,]       1       0          0           0           0           0           1
##      clarity_VVS1 clarity_VVS2
## [1,]            0            0
## [2,]            0            0

0.1.3 하이퍼파라미터 설정

# D_in 객체는 입력되는 변수의 개수로 지정하였다.
# D_out 객체는 출력되는 값이 price 하나여서 1로 지정하였다.

device = torch_device("cpu") # cuda

D_in = 23
H = 10
D_out = 1
learning_rate = 1e-3 # 1/1000

loss_fn = nnf_mse_loss # MSE
optimizer = optim_sgd # SGD Optimizer

# 하이퍼파라미터 설정에 맞춰 데이터 세트를 tensor로 변환한 결과는 다음과 같다.
tensor_train_x = torch_tensor(data = as.matrix(df_dum_train_x),
                              device = device,
                              requires_grad = TRUE)
tensor_train_y = torch_tensor(data = as.matrix(df_dum_train_y),
                              device = device,
                              requires_grad = FALSE)
dim(tensor_train_x)

## [1] 43152    23

dim(tensor_train_y)

## [1] 43152     1

0.1.4 신경망 구조 정의

# 두 개의 레이어를 사용하며 각 레이어의 입출력은 udf_init() 함수에서 정의하고 신경망 구조는 udf_str() 함수에서 정의한다. 두 레이어 사이에 위치하는 활성함수(activation function)은 ReLU로 지정하였다.

udf_init = function(D_in, H, D_out) {
  self$layer_01 = nn_linear(in_features = D_in, out_features = H)
  self$layer_02 = nn_linear(in_features = H, out_features = D_out)
}

udf_str = function(input) {
  input %>% 
    self$layer_01() %>% 
    nnf_relu() %>% 
    self$layer_02()
}

#  동일한 초기값을 위해 torch_manual_seed() 함수를 활용했고 최종 신경망 객체 model 를 생성하였다.
torch_manual_seed(123)
net_layer_02 = nn_module(classname = "two_layer_net",
                         initialize = udf_init,
                         forward = udf_str)

model = net_layer_02(D_in, H, D_out)

0.1.5 학습 및 평가

# 학습 및 평가
# 최적화 설정을 하고 200 스텝 마다 중간 결과가 출력되도록 하였다. 그리고 손실은 loss 객체에 모두 저장하였다.

optimizer = optimizer(model$parameters, lr = learning_rate)

loss_vec = c()
step = 2000
for(n_step in seq_len(step)){
  # n_step = 1
  y_pred = model(tensor_train_x)
  
  loss = loss_fn(y_pred, tensor_train_y)
  loss_vec = c(loss_vec, as.numeric(loss))
  if(n_step %% 200 == 0){
    cat("Step:", n_step, ":", as.numeric(loss), "\n")
  }
  
  optimizer$zero_grad()
  loss$backward()
  optimizer$step()
}

## Step: 200 : 0.05661283 
## Step: 400 : 0.05305122 
## Step: 600 : 0.05175806 
## Step: 800 : 0.05105871 
## Step: 1000 : 0.05053329 
## Step: 1200 : 0.05007544 
## Step: 1400 : 0.04965617 
## Step: 1600 : 0.04926623 
## Step: 1800 : 0.04890149 
## Step: 2000 : 0.04855897

0.1.6 학습에 따른 손실 그래프

# 학습에 따른 손실 그래프를 그려보면 다음과 같다.
df_loss = data.frame(step = 1:step,
                     loss = loss_vec)
ggplot(data = df_loss,
       aes(x = step,
           y = loss)) + 
  geom_line() + 
  theme_bw()