Matrizes para reconhecimento de números
EXPORTAR DADOS
knitr::opts_chunk$set(echo = TRUE)
# A variavel y representa o valor do digito nas 1000 posições.
# A variavel x forma a figura de cada digito, ou seja, x[x1,] é o vetor com n pixels que representa a figura da posição x1.
# x[,x1] são os valores do pixel x1 que varia de imagem para imagem (1000 imagens)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.2.1 v purrr 0.3.3
## v tibble 2.1.3 v dplyr 0.8.3
## v tidyr 1.0.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dslabs)
#install.packages("matrixStats")
library(matrixStats)
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
if(!exists("mnist")) mnist <- read_mnist()
class(mnist$train$images) #Ver a class
## [1] "matrix"
x <- mnist$train$images[1:1000,]
y <- mnist$train$labels[1:1000]
x[1,]
## [1] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [18] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [35] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [52] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [69] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [86] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [103] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [120] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [137] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 3
## [154] 18 18 18 126 136 175 26 166 255 247 127 0 0 0 0 0 0
## [171] 0 0 0 0 0 0 30 36 94 154 170 253 253 253 253 253 225
## [188] 172 253 242 195 64 0 0 0 0 0 0 0 0 0 0 0 49
## [205] 238 253 253 253 253 253 253 253 253 251 93 82 82 56 39 0 0
## [222] 0 0 0 0 0 0 0 0 0 0 18 219 253 253 253 253 253
## [239] 198 182 247 241 0 0 0 0 0 0 0 0 0 0 0 0 0
## [256] 0 0 0 0 0 80 156 107 253 253 205 11 0 43 154 0 0
## [273] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [290] 14 1 154 253 90 0 0 0 0 0 0 0 0 0 0 0 0
## [307] 0 0 0 0 0 0 0 0 0 0 0 0 0 139 253 190 2
## [324] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [341] 0 0 0 0 0 0 0 11 190 253 70 0 0 0 0 0 0
## [358] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [375] 0 0 35 241 225 160 108 1 0 0 0 0 0 0 0 0 0
## [392] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 81 240 253
## [409] 253 119 25 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [426] 0 0 0 0 0 0 0 0 0 45 186 253 253 150 27 0 0
## [443] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [460] 0 0 0 0 16 93 252 253 187 0 0 0 0 0 0 0 0
## [477] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [494] 249 253 249 64 0 0 0 0 0 0 0 0 0 0 0 0 0
## [511] 0 0 0 0 0 0 0 0 46 130 183 253 253 207 2 0 0
## [528] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [545] 39 148 229 253 253 253 250 182 0 0 0 0 0 0 0 0 0
## [562] 0 0 0 0 0 0 0 0 0 24 114 221 253 253 253 253 201
## [579] 78 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [596] 0 23 66 213 253 253 253 253 198 81 2 0 0 0 0 0 0
## [613] 0 0 0 0 0 0 0 0 0 0 18 171 219 253 253 253 253
## [630] 195 80 9 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [647] 0 0 55 172 226 253 253 253 253 244 133 11 0 0 0 0 0
## [664] 0 0 0 0 0 0 0 0 0 0 0 0 0 136 253 253 253
## [681] 212 135 132 16 0 0 0 0 0 0 0 0 0 0 0 0 0
## [698] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [715] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [732] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [749] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [766] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [783] 0 0
head(y) #valor real das imagens
## [1] 5 0 4 1 9 2
DESCODIFICAR IMAGEM 3
dim(x) #numero de linhas e colunas de uma matriz
## [1] 1000 784
k = length(matrix(x[3,])) #numero de linhas de uma matriz
k=sqrt(k) #Descobrir qual é o numero de linhas e colunas que devemos utilizar para transformar a linha
grid <- matrix(x[3,], k, k)
grid
## [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13]
## [1,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [4,] 0 0 0 0 0 0 0 0 0 0 46 120 159
## [5,] 0 0 0 0 0 0 62 126 220 222 245 254 254
## [6,] 0 0 0 0 0 0 81 163 163 163 163 163 120
## [7,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [8,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [9,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [10,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [11,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [12,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [13,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [14,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [15,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [16,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [17,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [18,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [19,] 0 0 0 0 0 0 0 0 0 0 0 23 163
## [20,] 0 0 0 0 0 0 0 2 27 183 198 231 254
## [21,] 0 0 0 0 0 67 120 153 254 254 254 254 216
## [22,] 0 0 0 0 0 232 180 210 162 125 56 29 16
## [23,] 0 0 0 0 0 39 39 40 0 0 0 0 0
## [24,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [25,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [26,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [27,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [28,] 0 0 0 0 0 0 0 0 0 0 0 0 0
## [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
## [1,] 0 0 0 0 0 0 0 0 0 0 0
## [2,] 0 0 0 0 0 0 0 0 0 0 0
## [3,] 0 0 0 0 0 0 0 0 0 0 0
## [4,] 159 159 150 0 0 0 0 0 0 0 0
## [5,] 254 254 253 119 0 0 0 0 0 0 0
## [6,] 67 85 237 177 0 0 0 0 0 0 0
## [7,] 0 0 207 177 0 0 0 0 0 0 0
## [8,] 0 0 207 177 0 0 0 0 0 0 0
## [9,] 0 0 207 177 0 0 0 0 0 0 0
## [10,] 0 47 253 177 0 0 0 0 0 0 0
## [11,] 0 49 254 98 0 0 0 0 0 0 0
## [12,] 0 116 250 56 0 0 0 0 0 0 0
## [13,] 0 144 240 0 0 0 0 0 0 0 0
## [14,] 0 150 198 0 0 0 0 0 0 0 0
## [15,] 0 241 143 0 0 0 0 0 0 0 0
## [16,] 14 243 91 0 0 0 0 0 0 0 0
## [17,] 86 234 28 0 0 0 0 0 0 0 0
## [18,] 178 179 5 102 169 169 169 169 169 169 169
## [19,] 248 241 233 254 254 254 254 255 254 254 255
## [20,] 254 252 250 220 137 57 57 94 96 153 153
## [21,] 91 40 0 0 0 0 0 0 0 0 0
## [22,] 0 0 0 0 0 0 0 0 0 0 0
## [23,] 0 0 0 0 0 0 0 0 0 0 0
## [24,] 0 0 0 0 0 0 0 0 0 0 0
## [25,] 0 0 0 0 0 0 0 0 0 0 0
## [26,] 0 0 0 0 0 0 0 0 0 0 0
## [27,] 0 0 0 0 0 0 0 0 0 0 0
## [28,] 0 0 0 0 0 0 0 0 0 0 0
## [,25] [,26] [,27] [,28]
## [1,] 0 0 0 0
## [2,] 0 0 0 0
## [3,] 0 0 0 0
## [4,] 0 0 0 0
## [5,] 0 0 0 0
## [6,] 0 0 0 0
## [7,] 0 0 0 0
## [8,] 0 0 0 0
## [9,] 0 0 0 0
## [10,] 0 0 0 0
## [11,] 0 0 0 0
## [12,] 0 0 0 0
## [13,] 0 0 0 0
## [14,] 0 0 0 0
## [15,] 0 0 0 0
## [16,] 0 0 0 0
## [17,] 0 0 0 0
## [18,] 96 0 0 0
## [19,] 254 0 0 0
## [20,] 153 0 0 0
## [21,] 0 0 0 0
## [22,] 0 0 0 0
## [23,] 0 0 0 0
## [24,] 0 0 0 0
## [25,] 0 0 0 0
## [26,] 0 0 0 0
## [27,] 0 0 0 0
## [28,] 0 0 0 0
image(1:k, 1:k, grid) #Mostrar a imagem da matriz
# flip the image back
image(1:k, 1:k, grid[, k:1]) #Converter a matriz
sums <- rowSums(x) #Somar as linhas (cada linha representa uma imagem)
avg <- rowMeans(x) #Média das Linhas
data_frame(labels = as.factor(y), row_averages = avg) %>%
qplot(labels, row_averages, data = ., geom = "boxplot") #Intensidade de cada dígito
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
k=k
sds <- colSds(x) #Desvio padrão de cada pixel (cada pixel é uma coluna de x)
qplot(sds, bins = "30", color = I("black")) #Distribuição dos pixels (eixo do y é a quantidade de registos e o eixo do x são os desvios padrões existentes)
image(1:k, 1:k, matrix(sds, k, k)[, k:1]) #Variação de cada localização
new_x <- x[ ,colSds(x) > 2/sds] #Fica só com os pixels com desvio padrão maior que 60
dim(new_x)
## [1] 1000 597
qplot(as.vector(new_x), bins = 30, color = I("black")) #Histograma dos valores de todos os pixels
k=0
dim(new_x) #numero de linhas e colunas de uma matriz
## [1] 1000 597
k = length(matrix(new_x[3,])) #numero de linhas de uma matriz
k=sqrt(k) #Descobrir qual é o numero de linhas e colunas que devemos utilizar para transformar a linha
grid <- matrix(new_x[3,], k, k)
## Warning in matrix(new_x[3, ], k, k): data length [597] is not a sub-
## multiple or multiple of the number of rows [24]
image(1:k, 1:k, grid) #Mostrar a imagem da matriz
# flip the image back
image(1:k, 1:k, grid[, k:1]) #Converter a matriz
k=0
#new_x[new_x < 50] <- 0 #Pixels com valores a baixo de 50 passam a ser zero
bin_x <- x
bin_x[bin_x < 255/2] <- 0
bin_x[bin_x > 255/2] <- 1 #binarizar os dados da matriz
dim(bin_x) #numero de linhas e colunas de uma matriz
## [1] 1000 784
k = length(matrix(bin_x[3,])) #numero de linhas de uma matriz
k=sqrt(k) #Descobrir qual é o numero de linhas e colunas que devemos utilizar para transformar a linha
grid <- matrix(bin_x[3,], k, k)
image(1:k, 1:k, grid) #Mostrar a imagem da matriz
# flip the image back
image(1:k, 1:k, grid[, k:1]) #Converter a matriz