Matrizes para reconhecimento de números

EXPORTAR DADOS

knitr::opts_chunk$set(echo = TRUE)
# A variavel y representa o valor do digito nas 1000 posições. 
# A variavel x forma a figura de cada digito, ou seja, x[x1,] é o vetor com n pixels que representa a figura da posição x1. 
# x[,x1] são os valores do pixel x1 que varia de imagem para imagem (1000 imagens)



library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.3
## v tidyr   1.0.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dslabs)
#install.packages("matrixStats")
library(matrixStats)
## 
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
## 
##     count
if(!exists("mnist")) mnist <- read_mnist()

class(mnist$train$images)  #Ver a class
## [1] "matrix"
x <- mnist$train$images[1:1000,] 
y <- mnist$train$labels[1:1000]

x[1,]
##   [1]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [18]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [35]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [52]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [69]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
##  [86]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [103]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [120]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [137]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   3
## [154]  18  18  18 126 136 175  26 166 255 247 127   0   0   0   0   0   0
## [171]   0   0   0   0   0   0  30  36  94 154 170 253 253 253 253 253 225
## [188] 172 253 242 195  64   0   0   0   0   0   0   0   0   0   0   0  49
## [205] 238 253 253 253 253 253 253 253 253 251  93  82  82  56  39   0   0
## [222]   0   0   0   0   0   0   0   0   0   0  18 219 253 253 253 253 253
## [239] 198 182 247 241   0   0   0   0   0   0   0   0   0   0   0   0   0
## [256]   0   0   0   0   0  80 156 107 253 253 205  11   0  43 154   0   0
## [273]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [290]  14   1 154 253  90   0   0   0   0   0   0   0   0   0   0   0   0
## [307]   0   0   0   0   0   0   0   0   0   0   0   0   0 139 253 190   2
## [324]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [341]   0   0   0   0   0   0   0  11 190 253  70   0   0   0   0   0   0
## [358]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [375]   0   0  35 241 225 160 108   1   0   0   0   0   0   0   0   0   0
## [392]   0   0   0   0   0   0   0   0   0   0   0   0   0   0  81 240 253
## [409] 253 119  25   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [426]   0   0   0   0   0   0   0   0   0  45 186 253 253 150  27   0   0
## [443]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [460]   0   0   0   0  16  93 252 253 187   0   0   0   0   0   0   0   0
## [477]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [494] 249 253 249  64   0   0   0   0   0   0   0   0   0   0   0   0   0
## [511]   0   0   0   0   0   0   0   0  46 130 183 253 253 207   2   0   0
## [528]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [545]  39 148 229 253 253 253 250 182   0   0   0   0   0   0   0   0   0
## [562]   0   0   0   0   0   0   0   0   0  24 114 221 253 253 253 253 201
## [579]  78   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [596]   0  23  66 213 253 253 253 253 198  81   2   0   0   0   0   0   0
## [613]   0   0   0   0   0   0   0   0   0   0  18 171 219 253 253 253 253
## [630] 195  80   9   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [647]   0   0  55 172 226 253 253 253 253 244 133  11   0   0   0   0   0
## [664]   0   0   0   0   0   0   0   0   0   0   0   0   0 136 253 253 253
## [681] 212 135 132  16   0   0   0   0   0   0   0   0   0   0   0   0   0
## [698]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [715]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [732]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [749]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [766]   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0   0
## [783]   0   0
head(y) #valor real das imagens
## [1] 5 0 4 1 9 2

DESCODIFICAR IMAGEM 3

dim(x) #numero de linhas e colunas de uma matriz
## [1] 1000  784
k = length(matrix(x[3,])) #numero de linhas de uma matriz
k=sqrt(k) #Descobrir qual é o numero de linhas e colunas que devemos utilizar para transformar a linha
grid <- matrix(x[3,], k, k)
grid
##       [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
##  [1,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [2,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [3,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [4,]    0    0    0    0    0    0    0    0    0     0    46   120   159
##  [5,]    0    0    0    0    0    0   62  126  220   222   245   254   254
##  [6,]    0    0    0    0    0    0   81  163  163   163   163   163   120
##  [7,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [8,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##  [9,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [10,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [11,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [12,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [13,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [14,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [15,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [16,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [17,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [18,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [19,]    0    0    0    0    0    0    0    0    0     0     0    23   163
## [20,]    0    0    0    0    0    0    0    2   27   183   198   231   254
## [21,]    0    0    0    0    0   67  120  153  254   254   254   254   216
## [22,]    0    0    0    0    0  232  180  210  162   125    56    29    16
## [23,]    0    0    0    0    0   39   39   40    0     0     0     0     0
## [24,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [25,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [26,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [27,]    0    0    0    0    0    0    0    0    0     0     0     0     0
## [28,]    0    0    0    0    0    0    0    0    0     0     0     0     0
##       [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
##  [1,]     0     0     0     0     0     0     0     0     0     0     0
##  [2,]     0     0     0     0     0     0     0     0     0     0     0
##  [3,]     0     0     0     0     0     0     0     0     0     0     0
##  [4,]   159   159   150     0     0     0     0     0     0     0     0
##  [5,]   254   254   253   119     0     0     0     0     0     0     0
##  [6,]    67    85   237   177     0     0     0     0     0     0     0
##  [7,]     0     0   207   177     0     0     0     0     0     0     0
##  [8,]     0     0   207   177     0     0     0     0     0     0     0
##  [9,]     0     0   207   177     0     0     0     0     0     0     0
## [10,]     0    47   253   177     0     0     0     0     0     0     0
## [11,]     0    49   254    98     0     0     0     0     0     0     0
## [12,]     0   116   250    56     0     0     0     0     0     0     0
## [13,]     0   144   240     0     0     0     0     0     0     0     0
## [14,]     0   150   198     0     0     0     0     0     0     0     0
## [15,]     0   241   143     0     0     0     0     0     0     0     0
## [16,]    14   243    91     0     0     0     0     0     0     0     0
## [17,]    86   234    28     0     0     0     0     0     0     0     0
## [18,]   178   179     5   102   169   169   169   169   169   169   169
## [19,]   248   241   233   254   254   254   254   255   254   254   255
## [20,]   254   252   250   220   137    57    57    94    96   153   153
## [21,]    91    40     0     0     0     0     0     0     0     0     0
## [22,]     0     0     0     0     0     0     0     0     0     0     0
## [23,]     0     0     0     0     0     0     0     0     0     0     0
## [24,]     0     0     0     0     0     0     0     0     0     0     0
## [25,]     0     0     0     0     0     0     0     0     0     0     0
## [26,]     0     0     0     0     0     0     0     0     0     0     0
## [27,]     0     0     0     0     0     0     0     0     0     0     0
## [28,]     0     0     0     0     0     0     0     0     0     0     0
##       [,25] [,26] [,27] [,28]
##  [1,]     0     0     0     0
##  [2,]     0     0     0     0
##  [3,]     0     0     0     0
##  [4,]     0     0     0     0
##  [5,]     0     0     0     0
##  [6,]     0     0     0     0
##  [7,]     0     0     0     0
##  [8,]     0     0     0     0
##  [9,]     0     0     0     0
## [10,]     0     0     0     0
## [11,]     0     0     0     0
## [12,]     0     0     0     0
## [13,]     0     0     0     0
## [14,]     0     0     0     0
## [15,]     0     0     0     0
## [16,]     0     0     0     0
## [17,]     0     0     0     0
## [18,]    96     0     0     0
## [19,]   254     0     0     0
## [20,]   153     0     0     0
## [21,]     0     0     0     0
## [22,]     0     0     0     0
## [23,]     0     0     0     0
## [24,]     0     0     0     0
## [25,]     0     0     0     0
## [26,]     0     0     0     0
## [27,]     0     0     0     0
## [28,]     0     0     0     0
image(1:k, 1:k, grid) #Mostrar a imagem da matriz

# flip the image back
image(1:k, 1:k, grid[, k:1])  #Converter a matriz

sums <- rowSums(x) #Somar as linhas (cada linha representa uma imagem)
avg <- rowMeans(x) #Média das Linhas

data_frame(labels = as.factor(y), row_averages = avg) %>%
  qplot(labels, row_averages, data = ., geom = "boxplot")  #Intensidade de cada dígito
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.

k=k
sds <- colSds(x) #Desvio padrão de cada pixel (cada pixel é uma coluna de x)


qplot(sds, bins = "30", color = I("black")) #Distribuição dos pixels (eixo do y é a quantidade de registos e o eixo do x são os desvios padrões existentes)

image(1:k, 1:k, matrix(sds, k, k)[, k:1]) #Variação de cada localização

new_x <- x[ ,colSds(x) > 2/sds] #Fica só com os pixels com desvio padrão maior que 60
dim(new_x)
## [1] 1000  597
qplot(as.vector(new_x), bins = 30, color = I("black")) #Histograma dos valores de todos os pixels

k=0
dim(new_x) #numero de linhas e colunas de uma matriz
## [1] 1000  597
k = length(matrix(new_x[3,])) #numero de linhas de uma matriz
k=sqrt(k) #Descobrir qual é o numero de linhas e colunas que devemos utilizar para transformar a linha
grid <- matrix(new_x[3,], k, k)
## Warning in matrix(new_x[3, ], k, k): data length [597] is not a sub-
## multiple or multiple of the number of rows [24]
image(1:k, 1:k, grid) #Mostrar a imagem da matriz

# flip the image back
image(1:k, 1:k, grid[, k:1])  #Converter a matriz

k=0
#new_x[new_x < 50] <- 0 #Pixels com valores a baixo de 50 passam a ser zero

bin_x <- x
bin_x[bin_x < 255/2] <- 0
bin_x[bin_x > 255/2] <- 1 #binarizar os dados da matriz

dim(bin_x) #numero de linhas e colunas de uma matriz
## [1] 1000  784
k = length(matrix(bin_x[3,])) #numero de linhas de uma matriz
k=sqrt(k) #Descobrir qual é o numero de linhas e colunas que devemos utilizar para transformar a linha
grid <- matrix(bin_x[3,], k, k)
image(1:k, 1:k, grid) #Mostrar a imagem da matriz

# flip the image back
image(1:k, 1:k, grid[, k:1])  #Converter a matriz