Leitura da imagem

library(captchaReceita)
arq <- download()
d <- arq %>% ler
print(head(d))
##   x  y          r          g          b     cor id
## 1 1 50 0.07450980 0.07450980 0.07450980 #131313  1
## 2 1 49 0.09411765 0.09411765 0.09411765 #181818  2
## 3 1 47 0.56078431 0.56078431 0.56078431 #8F8F8F  4
## 4 1 46 0.93725490 0.93725490 0.93725490 #EFEFEF  5
## 5 1 42 0.68235294 0.68235294 0.68235294 #AEAEAE  9
## 6 1 41 0.52156863 0.52156863 0.52156863 #858585 10

Gráfico

d %>% desenhar

Processamento

  • remover os pontos e riscos
  • separar as letras
  • alinhar a imagem

Cortando a imagem:

x <- d %>% 
  dplyr::filter(y >= 12, y <= 42, x >= 10, x <= 180) %>%
  dplyr::mutate(y = y - 12, x = x - 10)
desenhar(x)

Tirar a "sujeira"

x <-  x %>%
  tirar_sujeira(k = c(5, 5, 5)) %>%
  dplyr::mutate(cor = rgb(r,r,r))
desenhar(x)

"Picotar"

x <- x %>% 
  picotar %>%
  dplyr::mutate(cor = rgb(r,r,r))
desenhar(x)

Limpar de novo

Uma vez dentro de cada grupo

x <- x %>%  
  dplyr::group_by(grupo) %>%
  dplyr::do(tirar_sujeira(., c(4, 4, 4))) %>%
  dplyr::ungroup()
desenhar(x)

Picotar de novo

x <- x %>% 
  picotar %>%
  dplyr::mutate(cor = rgb(r,r,r))
desenhar(x)

Alinhar

x <- x %>% alinhar()
desenhar(x)

Recortar

x <- x %>%
  dplyr::group_by(grupo) %>%
  dplyr::filter(x <= 25, y <= 25)
desenhar(x)

Mudar a resolução

x <- x %>%
  dplyr::do(resize_image(., 20, 20)) %>%
  dplyr::ungroup()
desenhar(x)

Montar banco de dados

x %>%
      dplyr::mutate(xy = sprintf('x%02d_y%02d', x, y), um = 1) %>%
      dplyr::select(grupo, xy, um) %>%
      tidyr::spread(xy, um, fill = 0) %>%
      #arrange(grupo) %>%
      dplyr::select(-grupo)
## Source: local data frame [6 x 371]
## 
##   x01_y03 x01_y11 x01_y12 x01_y13 x01_y14 x01_y15 x01_y17 x01_y18 x01_y19
## 1       0       0       0       1       0       0       0       0       0
## 2       0       0       0       0       0       0       0       0       1
## 3       1       0       0       0       0       0       0       0       0
## 4       0       0       0       0       0       0       1       1       0
## 5       0       0       0       0       0       0       0       0       0
## 6       0       1       1       1       1       1       0       0       0
## Variables not shown: x01_y20 (dbl), x02_y01 (dbl), x02_y02 (dbl), x02_y03
##   (dbl), x02_y04 (dbl), x02_y09 (dbl), x02_y10 (dbl), x02_y11 (dbl),
##   x02_y12 (dbl), x02_y13 (dbl), x02_y14 (dbl), x02_y15 (dbl), x02_y16
##   (dbl), x02_y17 (dbl), x02_y18 (dbl), x02_y19 (dbl), x02_y20 (dbl),
##   x03_y01 (dbl), x03_y02 (dbl), x03_y03 (dbl), x03_y04 (dbl), x03_y05
##   (dbl), x03_y08 (dbl), x03_y09 (dbl), x03_y10 (dbl), x03_y11 (dbl),
##   x03_y12 (dbl), x03_y13 (dbl), x03_y14 (dbl), x03_y15 (dbl), x03_y16
##   (dbl), x03_y17 (dbl), x03_y18 (dbl), x03_y19 (dbl), x03_y20 (dbl),
##   x04_y01 (dbl), x04_y02 (dbl), x04_y03 (dbl), x04_y04 (dbl), x04_y05
##   (dbl), x04_y06 (dbl), x04_y07 (dbl), x04_y08 (dbl), x04_y09 (dbl),
##   x04_y10 (dbl), x04_y11 (dbl), x04_y12 (dbl), x04_y13 (dbl), x04_y14
##   (dbl), x04_y15 (dbl), x04_y16 (dbl), x04_y17 (dbl), x04_y18 (dbl),
##   x04_y19 (dbl), x04_y20 (dbl), x05_y01 (dbl), x05_y02 (dbl), x05_y03
##   (dbl), x05_y04 (dbl), x05_y05 (dbl), x05_y06 (dbl), x05_y07 (dbl),
##   x05_y08 (dbl), x05_y09 (dbl), x05_y10 (dbl), x05_y11 (dbl), x05_y12
##   (dbl), x05_y13 (dbl), x05_y14 (dbl), x05_y15 (dbl), x05_y16 (dbl),
##   x05_y17 (dbl), x05_y18 (dbl), x05_y19 (dbl), x05_y20 (dbl), x06_y01
##   (dbl), x06_y02 (dbl), x06_y03 (dbl), x06_y04 (dbl), x06_y05 (dbl),
##   x06_y06 (dbl), x06_y07 (dbl), x06_y08 (dbl), x06_y09 (dbl), x06_y10
##   (dbl), x06_y11 (dbl), x06_y12 (dbl), x06_y13 (dbl), x06_y14 (dbl),
##   x06_y15 (dbl), x06_y16 (dbl), x06_y17 (dbl), x06_y18 (dbl), x06_y19
##   (dbl), x06_y20 (dbl), x07_y01 (dbl), x07_y02 (dbl), x07_y03 (dbl),
##   x07_y04 (dbl), x07_y05 (dbl), x07_y06 (dbl), x07_y07 (dbl), x07_y08
##   (dbl), x07_y09 (dbl), x07_y10 (dbl), x07_y11 (dbl), x07_y12 (dbl),
##   x07_y13 (dbl), x07_y14 (dbl), x07_y15 (dbl), x07_y16 (dbl), x07_y17
##   (dbl), x07_y18 (dbl), x07_y19 (dbl), x07_y20 (dbl), x08_y01 (dbl),
##   x08_y02 (dbl), x08_y03 (dbl), x08_y04 (dbl), x08_y05 (dbl), x08_y06
##   (dbl), x08_y07 (dbl), x08_y08 (dbl), x08_y09 (dbl), x08_y10 (dbl),
##   x08_y11 (dbl), x08_y12 (dbl), x08_y14 (dbl), x08_y15 (dbl), x08_y16
##   (dbl), x08_y17 (dbl), x08_y18 (dbl), x08_y19 (dbl), x08_y20 (dbl),
##   x09_y01 (dbl), x09_y02 (dbl), x09_y03 (dbl), x09_y04 (dbl), x09_y05
##   (dbl), x09_y06 (dbl), x09_y07 (dbl), x09_y08 (dbl), x09_y09 (dbl),
##   x09_y10 (dbl), x09_y11 (dbl), x09_y12 (dbl), x09_y13 (dbl), x09_y14
##   (dbl), x09_y15 (dbl), x09_y16 (dbl), x09_y18 (dbl), x09_y19 (dbl),
##   x09_y20 (dbl), x10_y01 (dbl), x10_y02 (dbl), x10_y03 (dbl), x10_y04
##   (dbl), x10_y05 (dbl), x10_y06 (dbl), x10_y07 (dbl), x10_y08 (dbl),
##   x10_y09 (dbl), x10_y10 (dbl), x10_y11 (dbl), x10_y12 (dbl), x10_y13
##   (dbl), x10_y14 (dbl), x10_y15 (dbl), x10_y16 (dbl), x10_y17 (dbl),
##   x10_y18 (dbl), x10_y19 (dbl), x10_y20 (dbl), x11_y01 (dbl), x11_y02
##   (dbl), x11_y03 (dbl), x11_y04 (dbl), x11_y05 (dbl), x11_y06 (dbl),
##   x11_y07 (dbl), x11_y08 (dbl), x11_y09 (dbl), x11_y10 (dbl), x11_y11
##   (dbl), x11_y12 (dbl), x11_y13 (dbl), x11_y14 (dbl), x11_y15 (dbl),
##   x11_y16 (dbl), x11_y17 (dbl), x11_y18 (dbl), x11_y19 (dbl), x11_y20
##   (dbl), x12_y01 (dbl), x12_y02 (dbl), x12_y03 (dbl), x12_y04 (dbl),
##   x12_y05 (dbl), x12_y06 (dbl), x12_y07 (dbl), x12_y08 (dbl), x12_y09
##   (dbl), x12_y10 (dbl), x12_y11 (dbl), x12_y12 (dbl), x12_y13 (dbl),
##   x12_y14 (dbl), x12_y15 (dbl), x12_y16 (dbl), x12_y17 (dbl), x12_y18
##   (dbl), x12_y19 (dbl), x12_y20 (dbl), x13_y01 (dbl), x13_y02 (dbl),
##   x13_y03 (dbl), x13_y04 (dbl), x13_y05 (dbl), x13_y06 (dbl), x13_y07
##   (dbl), x13_y08 (dbl), x13_y09 (dbl), x13_y10 (dbl), x13_y11 (dbl),
##   x13_y12 (dbl), x13_y13 (dbl), x13_y14 (dbl), x13_y15 (dbl), x13_y16
##   (dbl), x13_y17 (dbl), x13_y18 (dbl), x13_y19 (dbl), x13_y20 (dbl),
##   x14_y01 (dbl), x14_y02 (dbl), x14_y03 (dbl), x14_y04 (dbl), x14_y05
##   (dbl), x14_y06 (dbl), x14_y08 (dbl), x14_y09 (dbl), x14_y10 (dbl),
##   x14_y11 (dbl), x14_y12 (dbl), x14_y13 (dbl), x14_y14 (dbl), x14_y17
##   (dbl), x14_y18 (dbl), x14_y19 (dbl), x14_y20 (dbl), x15_y01 (dbl),
##   x15_y02 (dbl), x15_y03 (dbl), x15_y04 (dbl), x15_y05 (dbl), x15_y06
##   (dbl), x15_y07 (dbl), x15_y08 (dbl), x15_y09 (dbl), x15_y10 (dbl),
##   x15_y11 (dbl), x15_y12 (dbl), x15_y13 (dbl), x15_y14 (dbl), x15_y15
##   (dbl), x15_y16 (dbl), x15_y17 (dbl), x15_y18 (dbl), x15_y19 (dbl),
##   x16_y01 (dbl), x16_y02 (dbl), x16_y03 (dbl), x16_y04 (dbl), x16_y05
##   (dbl), x16_y06 (dbl), x16_y07 (dbl), x16_y08 (dbl), x16_y10 (dbl),
##   x16_y11 (dbl), x16_y12 (dbl), x16_y13 (dbl), x16_y14 (dbl), x16_y15
##   (dbl), x16_y16 (dbl), x16_y17 (dbl), x16_y18 (dbl), x16_y19 (dbl),
##   x17_y01 (dbl), x17_y02 (dbl), x17_y03 (dbl), x17_y04 (dbl), x17_y05
##   (dbl), x17_y06 (dbl), x17_y07 (dbl), x17_y08 (dbl), x17_y09 (dbl),
##   x17_y10 (dbl), x17_y11 (dbl), x17_y12 (dbl), x17_y13 (dbl), x17_y14
##   (dbl), x17_y15 (dbl), x17_y16 (dbl), x17_y17 (dbl), x17_y18 (dbl),
##   x17_y19 (dbl), x18_y01 (dbl), x18_y02 (dbl), x18_y03 (dbl), x18_y04
##   (dbl), x18_y05 (dbl), x18_y06 (dbl), x18_y07 (dbl), x18_y08 (dbl),
##   x18_y09 (dbl), x18_y10 (dbl), x18_y11 (dbl), x18_y12 (dbl), x18_y13
##   (dbl), x18_y14 (dbl), x18_y15 (dbl), x18_y16 (dbl), x18_y17 (dbl),
##   x18_y18 (dbl), x18_y19 (dbl), x19_y01 (dbl), x19_y02 (dbl), x19_y03
##   (dbl), x19_y04 (dbl), x19_y05 (dbl), x19_y07 (dbl), x19_y08 (dbl),
##   x19_y09 (dbl), x19_y10 (dbl), x19_y11 (dbl), x19_y12 (dbl), x19_y13
##   (dbl), x19_y14 (dbl), x19_y15 (dbl), x19_y16 (dbl), x19_y17 (dbl),
##   x19_y18 (dbl), x19_y19 (dbl), x20_y01 (dbl), x20_y02 (dbl), x20_y03
##   (dbl), x20_y04 (dbl), x20_y05 (dbl), x20_y06 (dbl), x20_y07 (dbl),
##   x20_y08 (dbl), x20_y09 (dbl), x20_y10 (dbl), x20_y11 (dbl), x20_y12
##   (dbl), x20_y13 (dbl), x20_y14 (dbl), x20_y15 (dbl), x20_y16 (dbl),
##   x20_y17 (dbl), x20_y18 (dbl), x20_y19 (dbl)

Ajustar o modelo

  • modelo usado foi Random Forest
  • RF para não precisar fazer seleção de variáveis
  • acerto é de 86% das letras
  • approx: 40% das palavras

res <- decodificar(arq, modelo)
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
print(res)
## [1] "9B2X1C"

res <- decodificar(arq, modelo)
print(res)
## [1] "YHUZI3"

res <- decodificar(arq, modelo)
print(res)
## [1] "YJD2AW"

res <- decodificar(arq, modelo)
print(res)
## [1] "QREXPT"

res <- decodificar(arq, modelo)
print(res)
## [1] "RBVIV6"

res <- decodificar(arq, modelo)
print(res)
## [1] "1VB3ZD"

res <- decodificar(arq, modelo)
print(res)
## [1] "O3M63Q"

res <- decodificar(arq, modelo)
print(res)
## [1] "Q39IPV"

res <- decodificar(arq, modelo)
print(res)
## [1] "YJ8SMD"

res <- decodificar(arq, modelo)
print(res)
## [1] "KPGMDR"

res <- decodificar(arq, modelo)
print(res)
## [1] "XIX676"

res <- decodificar(arq, modelo)
print(res)
## [1] "XP3B12"

res <- decodificar(arq, modelo)
print(res)
## [1] "R5TR12"

res <- decodificar(arq, modelo)
print(res)
## [1] "K9M73L"