scrape_txt

scrape_txt <- function(char){readLines(textConnection(char))}

text_to_df

text_to_df <- function(txt) {
  # read text line-by-line
  lines <- read.table(text = txt, sep = "\n", stringsAsFactors = FALSE)

  # flatten to vector
  v <- as.vector(unlist(lines))

  # basic integrity check
  if (length(v) %% 2 != 0) {
    stop("Text must contain an even number of lines (Spanish–English pairs).")
  }

  # split into odd/even
  eng  <- v[seq_along(v) %% 2 == 0]  # even
  span <- v[seq_along(v) %% 2 != 0]  # odd

  # assemble data frame
  data.frame(
    eng  = eng,
    span = span,
    stringsAsFactors = FALSE
  )
}
# Put everything into a list for lapply: 
lst <- mget(ls(pattern = "_VOCAB$"))
all_df <- lapply(lst, text_to_df)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
chapter_names <- c(
  Ch1_VOCAB  = "Chapter 1: Hello, how are you?",
  Ch2_VOCAB  = "Chapter 2: At university",
  Ch3_VOCAB  = "Chapter 3: The Family",
  Ch4_VOCAB  = "Chapter 4: Pastimes",
  Ch5_VOCAB  = "Chapter 5: The Holidays",
  Ch6_VOCAB  = "Chapter 6: Shopping!",
  Ch7_VOCAB  = "Chapter 7: The Daily Routine",
  Ch8_VOCAB  = "Chapter 8: Food",
  Ch9_VOCAB  = "Chapter 9: Celebrations",
  Ch10_VOCAB = "Chapter 10: In the doctor’s office",
  Ch11_VOCAB = "Chapter 11: Technology",
  Ch12_VOCAB = "Chapter 12: Housing",
  Ch13_VOCAB = "Chapter 13: Nature",
  Ch14_VOCAB = "Chapter 14: In the City",
  Ch15_VOCAB = "Chapter 15: Well-being"
)

# Include Chapter names : Write.csv : 
lapply(names(all_df), function(nm) {
  df <- all_df[[nm]]

  df$chapter <- chapter_names[[nm]]  # add chapter name

  write.csv(
    df,
    file = paste0(nm, ".csv"),
    row.names = FALSE
  )
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL

Text-Ready Readable format :

Otherwise we require ChatGPT code-interpreter :

lapply(names(all_df), function(nm) {
  df <- all_df[[nm]]

  lines <- paste0(df$span, " — ", df$eng)

  # add a header (optional, but nice for GPT)
  txt <- c(
    paste0("## ", nm),
    "",
    lines
  )

  writeLines(txt, con = paste0(nm, ".txt"))
})
## [[1]]
## NULL
## 
## [[2]]
## NULL
## 
## [[3]]
## NULL
## 
## [[4]]
## NULL
## 
## [[5]]
## NULL
## 
## [[6]]
## NULL
## 
## [[7]]
## NULL
## 
## [[8]]
## NULL
## 
## [[9]]
## NULL
## 
## [[10]]
## NULL
## 
## [[11]]
## NULL
## 
## [[12]]
## NULL
## 
## [[13]]
## NULL
## 
## [[14]]
## NULL
## 
## [[15]]
## NULL