Extract-text-from-an-image.R

##############################1---Extract text from an image. Requires the 'tesseract' package.
library(magick)

## Warning: package 'magick' was built under R version 4.0.4

## Linking to ImageMagick 6.9.12.3
## Enabled features: cairo, freetype, fftw, ghostscript, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fontconfig, x11

#install.packages("tesseract")
library(tesseract)

## Warning: package 'tesseract' was built under R version 4.0.5

# NOT RUN {
if(require("tesseract")){
  img <- image_read("http://jeroen.github.io/images/testocr.png")
  image_ocr(img)
}

## [1] "This is a lot of 12 point text to test the\nocr code and see if it works on all types\nof file format.\n\nThe quick brown dog jumped over the\nlazy fox. The quick brown dog jumped\nover the lazy fox. The quick brown dog\njumped over the lazy fox. The quick\nbrown dog jumped over the lazy fox.\n"

# }
print(img)

##   format width height colorspace matte filesize density
## 1    PNG   640    480       sRGB  TRUE    23359   72x72

class(img)

## [1] "magick-image"

txt_1 <- image_ocr(img)
class(txt_1)

## [1] "character"

###############################2----convert image to numerical (0-1) pixel matrix in R
#BiocManager::install("EBImage")
library(EBImage)
example_cat_image <- readImage("http://jeroen.github.io/images/testocr.png")
example_cat_image

## Image 
##   colorMode    : Color 
##   storage.mode : double 
##   dim          : 640 480 4 
##   frames.total : 4 
##   frames.render: 1 
## 
## imageData(object)[1:5,1:6,1]
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    1    1    1    1    1
## [2,]    1    1    1    1    1    1
## [3,]    1    1    1    1    1    1
## [4,]    1    1    1    1    1    1
## [5,]    1    1    1    1    1    1

display(example_cat_image)

# width and height of the original image
dim(example_cat_image)[1:2]

## [1] 640 480

# scale to a specific width and height
y <- resize(example_cat_image, w = 200, h = 100)
y

## Image 
##   colorMode    : Color 
##   storage.mode : double 
##   dim          : 200 100 4 
##   frames.total : 4 
##   frames.render: 1 
## 
## imageData(object)[1:5,1:6,1]
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    1    1    1    1    1
## [2,]    1    1    1    1    1    1
## [3,]    1    1    1    1    1    1
## [4,]    1    1    1    1    1    1
## [5,]    1    1    1    1    1    1

# scale by 50%; the height is determined automatically so that
# the aspect ratio is preserved
y <- resize(example_cat_image, dim(example_cat_image)[1]/3)
y

## Image 
##   colorMode    : Color 
##   storage.mode : double 
##   dim          : 213 160 4 
##   frames.total : 4 
##   frames.render: 1 
## 
## imageData(object)[1:5,1:6,1]
##      [,1] [,2] [,3] [,4] [,5] [,6]
## [1,]    1    1    1    1    1    1
## [2,]    1    1    1    1    1    1
## [3,]    1    1    1    1    1    1
## [4,]    1    1    1    1    1    1
## [5,]    1    1    1    1    1    1

# show the scaled image
display(y)

# extract the pixel array
z <- imageData(y)
dim(z)

## [1] 213 160   4

class(z)

## [1] "array"

# or
z <- as.array(y)
dim(z)

## [1] 213 160   4

is.Image( as.array(y) )

## [1] FALSE

hist(y)

range(y) #[1] 0 1

## [1] 0 1

print(y, short=TRUE)

## Image 
##   colorMode    : Color 
##   storage.mode : double 
##   dim          : 213 160 4 
##   frames.total : 4 
##   frames.render: 1

###############################3---Translate--stop work
#install.packages("RYoudaoTranslate")
library("RYoudaoTranslate")

## Warning: package 'RYoudaoTranslate' was built under R version 4.0.4

## Loading required package: RCurl

## Warning: package 'RCurl' was built under R version 4.0.4

## Loading required package: rjson

apikey = "498375134"
keyfrom = "JustForTestYouDao"
youdaoUrl = function(word){
  paste("http://fanyi.youdao.com/openapi.do?keyfrom=fy1991--421fy&key=282671603&type=data&doctype=json&version=1.1&q=",word,sep="")
}
youdaoTranslate<-function(word){
  url = getURL(youdaoUrl(word))
  obj = fromJSON(url) 
  result=paste0(obj$web[[1]]$value,collapse="；")
  return(result)
}

#sapply(txt_1,youdaoTranslate,simplify=TRUE)

Extract-text-from-an-image.R

liyix

2021-04-23