1. Cargar librerias

library(purrr)
library(tesseract)
library(magick)
## Linking to ImageMagick 6.9.12.93
## Enabled features: cairo, fontconfig, freetype, heic, lcms, pango, raw, rsvg, webp
## Disabled features: fftw, ghostscript, x11
library(officer)
library(pdftools)
## Using poppler version 23.04.0

2. Convertir las imagenes a png

pdf1<-pdf_convert("/Users/sebastianfajardo/Downloads/eso3.pdf",dpi = 600) %>% map(ocr)
## Converting page 1 to eso3_1.png... done!
## Converting page 2 to eso3_2.png... done!
## Converting page 3 to eso3_3.png... done!

3. Generar los paths a las imagenes

image_paths <- c("/Users/sebastianfajardo/eso3_1.png",
                 "/Users/sebastianfajardo/eso3_2.png",
                 "/Users/sebastianfajardo/eso3_3.png")

4. Leer el texto de cada imagen

doc <- read_docx()


imagen1 <- image_read("/Users/sebastianfajardo/eso3_1.png")
texto1 <- ocr(imagen1, engine = tesseract("spa"))
doc <- doc %>% body_add_par(texto1, style = "Normal")


imagen2 <- image_read("/Users/sebastianfajardo/eso3_2.png")
texto2 <- ocr(imagen2, engine = tesseract("spa"))
doc <- doc %>% body_add_par(texto2, style = "Normal")


imagen3 <- image_read("/Users/sebastianfajardo/eso3_3.png")
texto3 <- ocr(imagen3, engine = tesseract("spa"))
doc <- doc %>% body_add_par(texto3, style = "Normal")

5. Crear un documento uniendo los textos

output_path <- "/Users/sebastianfajardo/Downloads/ocr_eso.docx"
print(doc, target = output_path)
LS0tCnRpdGxlOiAiT0NSIFBERiIKYXV0aG9yOiAiU2ViYXN0acOhbiBGYWphcmRvLSBBMDE0MTIwMzUiCmRhdGU6ICIyMDI0LTA4LTE0IgpvdXRwdXQ6IAogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgdGhlbWU6IGNvc21vCi0tLQoKIVtdKC9Vc2Vycy9zZWJhc3RpYW5mYWphcmRvL0Rvd25sb2Fkcy9lc28uZ2lmKQoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij4xLiBDYXJnYXIgbGlicmVyaWFzPC9zcGFuPgpgYGB7cn0KbGlicmFyeShwdXJycikKbGlicmFyeSh0ZXNzZXJhY3QpCmxpYnJhcnkobWFnaWNrKQpsaWJyYXJ5KG9mZmljZXIpCmxpYnJhcnkocGRmdG9vbHMpCmBgYAojIDxzcGFuIHN0eWxlPSJjb2xvcjogYmx1ZTsiPjIuIENvbnZlcnRpciBsYXMgaW1hZ2VuZXMgYSBwbmc8L3NwYW4+CmBgYHtyfQpwZGYxPC1wZGZfY29udmVydCgiL1VzZXJzL3NlYmFzdGlhbmZhamFyZG8vRG93bmxvYWRzL2VzbzMucGRmIixkcGkgPSA2MDApICU+JSBtYXAob2NyKQpgYGAKIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij4zLiBHZW5lcmFyIGxvcyBwYXRocyBhIGxhcyBpbWFnZW5lczwvc3Bhbj4KYGBge3J9CmltYWdlX3BhdGhzIDwtIGMoIi9Vc2Vycy9zZWJhc3RpYW5mYWphcmRvL2VzbzNfMS5wbmciLAogICAgICAgICAgICAgICAgICIvVXNlcnMvc2ViYXN0aWFuZmFqYXJkby9lc28zXzIucG5nIiwKICAgICAgICAgICAgICAgICAiL1VzZXJzL3NlYmFzdGlhbmZhamFyZG8vZXNvM18zLnBuZyIpCgpgYGAKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiBibHVlOyI+NC4gTGVlciBlbCB0ZXh0byBkZSBjYWRhIGltYWdlbjwvc3Bhbj4KYGBge3J9Cgpkb2MgPC0gcmVhZF9kb2N4KCkKCgppbWFnZW4xIDwtIGltYWdlX3JlYWQoIi9Vc2Vycy9zZWJhc3RpYW5mYWphcmRvL2VzbzNfMS5wbmciKQp0ZXh0bzEgPC0gb2NyKGltYWdlbjEsIGVuZ2luZSA9IHRlc3NlcmFjdCgic3BhIikpCmRvYyA8LSBkb2MgJT4lIGJvZHlfYWRkX3Bhcih0ZXh0bzEsIHN0eWxlID0gIk5vcm1hbCIpCgoKaW1hZ2VuMiA8LSBpbWFnZV9yZWFkKCIvVXNlcnMvc2ViYXN0aWFuZmFqYXJkby9lc28zXzIucG5nIikKdGV4dG8yIDwtIG9jcihpbWFnZW4yLCBlbmdpbmUgPSB0ZXNzZXJhY3QoInNwYSIpKQpkb2MgPC0gZG9jICU+JSBib2R5X2FkZF9wYXIodGV4dG8yLCBzdHlsZSA9ICJOb3JtYWwiKQoKCmltYWdlbjMgPC0gaW1hZ2VfcmVhZCgiL1VzZXJzL3NlYmFzdGlhbmZhamFyZG8vZXNvM18zLnBuZyIpCnRleHRvMyA8LSBvY3IoaW1hZ2VuMywgZW5naW5lID0gdGVzc2VyYWN0KCJzcGEiKSkKZG9jIDwtIGRvYyAlPiUgYm9keV9hZGRfcGFyKHRleHRvMywgc3R5bGUgPSAiTm9ybWFsIikKCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IGJsdWU7Ij41LiBDcmVhciB1biBkb2N1bWVudG8gdW5pZW5kbyBsb3MgdGV4dG9zPC9zcGFuPgpgYGB7cn0KCm91dHB1dF9wYXRoIDwtICIvVXNlcnMvc2ViYXN0aWFuZmFqYXJkby9Eb3dubG9hZHMvb2NyX2Vzby5kb2N4IgpwcmludChkb2MsIHRhcmdldCA9IG91dHB1dF9wYXRoKQpgYGAKCgoK