Objetivo

O relatorio é uma descriçao do processo feature engineering com a base de dados feita anteriomente.

O código

suppressMessages(suppressWarnings(library(magick)))
img <- image_read("D:/Users/Usuario/Downloads/RedesNeuraisDigit.jpg")
plot(img)

img <- image_read("D:/Users/Usuario/Downloads/RedesNeuraisDigit.jpg") %>% 
  image_scale(geometry = 28) %>% 
  image_data(channels = "gray") %>% 
  .[1, , ] %>% 
  as.numeric() %>% 
  matrix(nrow = 28,ncol = 28,byrow = TRUE) %>% 
  apply(2,rev) %>% # Pra deixar em pé
  t()  

img %>% 
  image() # Serve pra plotar imagens em formato de matriz

Nesse inicio do codigo vamos usar o pacote magik, essencial para fazer ediçoes nas imagens,como assim é feito acima para algumas alteraçoes. A imagem é transformada para a cor cinza e logo após é transformada para um esquema de calor de cores para ser plotada em forma de matriz.

dct28 <- mrbsizeR::dctMatrix(28) 
aplica_dct <- function(x){
  x <- x/255
  q <- x %>% 
    matrix(28,28,byrow = T) %>% 
    apply(2,rev) %>% 
    t()
  q <- t(dct28)%*%q%*%dct28
  return( as.numeric(q) )
}

img %>% 
  aplica_dct() %>% 
  matrix(28,28,byrow = T) %>% 
  head()
##           [,1]       [,2]        [,3]         [,4]       [,5]        [,6]
## [1,] 17.562303 -5.3441382  4.34027006 -2.714131243  1.8284572 -1.12456568
## [2,] -5.076840  1.3756457 -1.40944686  1.057768241 -0.3662663  0.06546695
## [3,]  4.261970 -1.4304730  0.38616292 -0.214611420  0.6644516 -0.42282668
## [4,] -2.256486  0.7778634 -0.53449904  0.007334087 -0.2464381  0.44883571
## [5,]  2.081315 -0.5538839  0.45181465 -0.362750459  0.2623780 -0.06869007
## [6,] -1.536523  0.4252067  0.01021965  0.319901084 -0.3086368 -0.06672195
##             [,7]         [,8]        [,9]       [,10]       [,11]
## [1,]  1.70012473 -1.178604956  1.18540781 -0.40851739  0.98276068
## [2,] -0.67689381  0.495486495 -0.18232281  0.02605626 -0.44785992
## [3,]  0.34908440  0.002754063  0.08491686 -0.35737702  0.36583473
## [4,] -0.08392305 -0.045254498 -0.28917203  0.11865271  0.02430075
## [5,]  0.07853420 -0.218146598  0.27929773  0.02138392 -0.06201612
## [6,] -0.06589996  0.144141657 -0.03333580  0.08560158 -0.10105244
##            [,12]       [,13]       [,14]       [,15]        [,16]
## [1,] -0.66169018  0.78866885 -0.24057918  0.71462773 -0.234603970
## [2,]  0.21749439 -0.09130330  0.05055694 -0.33806578  0.085884972
## [3,]  0.10407613  0.02409175 -0.18197662  0.28480082  0.001479248
## [4,]  0.07494815 -0.19737822  0.07251261  0.02360130 -0.052226330
## [5,] -0.15230789  0.27156301  0.02888095 -0.10433168 -0.071594822
## [6,] -0.02273319 -0.03610431  0.01683664 -0.08416796  0.101010088
##            [,17]       [,18]       [,19]       [,20]       [,21]
## [1,]  0.61246291 -0.15159252  0.45379447 -0.05194074  0.48203513
## [2,] -0.06963867  0.03489345 -0.21862496  0.00724903 -0.08864592
## [3,] -0.04131617 -0.04567576  0.26977685  0.02935309 -0.03508404
## [4,] -0.18781814  0.10424978  0.04535343 -0.06316358 -0.12243171
## [5,]  0.24555265  0.01508982 -0.10144713 -0.02413937  0.19083375
## [6,]  0.02761969 -0.10169223 -0.10767724  0.10472828  0.01427389
##              [,22]       [,23]       [,24]       [,25]       [,26]
## [1,] -0.0006051326  0.35351339  0.10534565  0.31284031  0.08845171
## [2,]  0.0140463932 -0.12750268 -0.05075131 -0.09126270 -0.01584536
## [3,] -0.0229521257  0.15208364  0.02931851  0.03518324  0.05534414
## [4,]  0.0424074316 -0.02261758 -0.06053511 -0.04370488  0.03602405
## [5,]  0.0087010440 -0.07382461  0.01409086  0.12280976 -0.01413816
## [6,] -0.0872384161 -0.03777471  0.08655306 -0.01040531 -0.10138114
##            [,27]       [,28]
## [1,]  0.22820877  0.23528821
## [2,] -0.06221214 -0.07847949
## [3,]  0.08591323  0.01373002
## [4,] -0.01094851 -0.07118311
## [5,] -0.03827272  0.06662419
## [6,] -0.05777931  0.06437127
dct28 <- mrbsizeR::dctMatrix(28) 
aplica_dct <- function(x){
  x <- x/255
  q <- x %>% 
    matrix(28,28,byrow = T) %>% 
    apply(2,rev) %>% 
    t()
  q <- t(dct28)%*%q%*%dct28
  return( as.numeric(q) )
}

img %>% 
  aplica_dct() %>% 
  matrix(28,28,byrow = T) %>% 
  head()
##           [,1]       [,2]        [,3]         [,4]       [,5]        [,6]
## [1,] 17.562303 -5.3441382  4.34027006 -2.714131243  1.8284572 -1.12456568
## [2,] -5.076840  1.3756457 -1.40944686  1.057768241 -0.3662663  0.06546695
## [3,]  4.261970 -1.4304730  0.38616292 -0.214611420  0.6644516 -0.42282668
## [4,] -2.256486  0.7778634 -0.53449904  0.007334087 -0.2464381  0.44883571
## [5,]  2.081315 -0.5538839  0.45181465 -0.362750459  0.2623780 -0.06869007
## [6,] -1.536523  0.4252067  0.01021965  0.319901084 -0.3086368 -0.06672195
##             [,7]         [,8]        [,9]       [,10]       [,11]
## [1,]  1.70012473 -1.178604956  1.18540781 -0.40851739  0.98276068
## [2,] -0.67689381  0.495486495 -0.18232281  0.02605626 -0.44785992
## [3,]  0.34908440  0.002754063  0.08491686 -0.35737702  0.36583473
## [4,] -0.08392305 -0.045254498 -0.28917203  0.11865271  0.02430075
## [5,]  0.07853420 -0.218146598  0.27929773  0.02138392 -0.06201612
## [6,] -0.06589996  0.144141657 -0.03333580  0.08560158 -0.10105244
##            [,12]       [,13]       [,14]       [,15]        [,16]
## [1,] -0.66169018  0.78866885 -0.24057918  0.71462773 -0.234603970
## [2,]  0.21749439 -0.09130330  0.05055694 -0.33806578  0.085884972
## [3,]  0.10407613  0.02409175 -0.18197662  0.28480082  0.001479248
## [4,]  0.07494815 -0.19737822  0.07251261  0.02360130 -0.052226330
## [5,] -0.15230789  0.27156301  0.02888095 -0.10433168 -0.071594822
## [6,] -0.02273319 -0.03610431  0.01683664 -0.08416796  0.101010088
##            [,17]       [,18]       [,19]       [,20]       [,21]
## [1,]  0.61246291 -0.15159252  0.45379447 -0.05194074  0.48203513
## [2,] -0.06963867  0.03489345 -0.21862496  0.00724903 -0.08864592
## [3,] -0.04131617 -0.04567576  0.26977685  0.02935309 -0.03508404
## [4,] -0.18781814  0.10424978  0.04535343 -0.06316358 -0.12243171
## [5,]  0.24555265  0.01508982 -0.10144713 -0.02413937  0.19083375
## [6,]  0.02761969 -0.10169223 -0.10767724  0.10472828  0.01427389
##              [,22]       [,23]       [,24]       [,25]       [,26]
## [1,] -0.0006051326  0.35351339  0.10534565  0.31284031  0.08845171
## [2,]  0.0140463932 -0.12750268 -0.05075131 -0.09126270 -0.01584536
## [3,] -0.0229521257  0.15208364  0.02931851  0.03518324  0.05534414
## [4,]  0.0424074316 -0.02261758 -0.06053511 -0.04370488  0.03602405
## [5,]  0.0087010440 -0.07382461  0.01409086  0.12280976 -0.01413816
## [6,] -0.0872384161 -0.03777471  0.08655306 -0.01040531 -0.10138114
##            [,27]       [,28]
## [1,]  0.22820877  0.23528821
## [2,] -0.06221214 -0.07847949
## [3,]  0.08591323  0.01373002
## [4,] -0.01094851 -0.07118311
## [5,] -0.03827272  0.06662419
## [6,] -0.05777931  0.06437127
# Imagem
img %>% 
  aplica_dct() %>% 
  matrix(28,28,byrow = T) %>% 
  apply(2,rev) %>% 
  t() %>% 
  image()

Logo após ela é editada para ficar no formato 28x28.

train <- read_rds("~/redesneurais/digit-recognizer/train.rds")
label <- read_rds("~/redesneurais/digit-recognizer/label_test.rds")

set.seed(13092019)

x_train <- train %>% 
  filter(Flag == 0) %>% 
  select(-label, -Flag) %>% 
  mutate_all(function(x) x/255) %>% 
  as.matrix()

y_train <- train %>% 
  filter(Flag == 0) %>% 
  select(label)  %>% as.matrix() %>% 
  keras::to_categorical(num_classes = 10)

# base de treino
x_train_transf <- x_train %>% 
  apply(1,aplica_dct) %>% 
  t()

Rotacionando a imagem, temos:

aplica_inversao <- function(x,theta){ 
  x %>% 
    matrix(28,28,byrow = T) %>%
    imager::as.cimg() %>% 
    imager::rotate_xy(theta,15,15) %>% 
    as.vector() %>% 
    return()
}

# Rotacao 45º
as.numeric(img) %>% aplica_inversao(45) %>% matrix(28,28,byrow = T) %>% image()

e aplicando a base de treinamento:

# Aplicando à base de dados
x_train_inversao_45 <- x_train %>% 
  apply(1,aplica_inversao,45) %>% 
  t()

x_train_inversao_90 <- x_train %>% 
  apply(1,aplica_inversao,90) %>% 
  t()

x_train_inversao_45_dct <- x_train_transf %>% 
  apply(1,aplica_inversao,45) %>% 
  t()

x_train_inversao_90_dct <- x_train_transf %>% 
  apply(1,aplica_inversao,90) %>% 
  t()

# Juntando as matrizes
x_train_max <- cbind(rbind(x_train,x_train_inversao_45,x_train_inversao_90),
                     rbind(x_train_transf,x_train_inversao_45_dct,
                           x_train_inversao_90_dct))