scrape_txt <- function(char){readLines(textConnection(char))}
text_to_df <- function(txt) {
# read text line-by-line
lines <- read.table(text = txt, sep = "\n", stringsAsFactors = FALSE)
# flatten to vector
v <- as.vector(unlist(lines))
# basic integrity check
if (length(v) %% 2 != 0) {
stop("Text must contain an even number of lines (Spanish–English pairs).")
}
# split into odd/even
eng <- v[seq_along(v) %% 2 == 0] # even
span <- v[seq_along(v) %% 2 != 0] # odd
# assemble data frame
data.frame(
eng = eng,
span = span,
stringsAsFactors = FALSE
)
}
# Put everything into a list for lapply:
lst <- mget(ls(pattern = "_VOCAB$"))
all_df <- lapply(lst, text_to_df)
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
## Warning in scan(file = file, what = what, sep = sep, quote = quote, dec = dec,
## : EOF within quoted string
chapter_names <- c(
Ch1_VOCAB = "Chapter 1: Hello, how are you?",
Ch2_VOCAB = "Chapter 2: At university",
Ch3_VOCAB = "Chapter 3: The Family",
Ch4_VOCAB = "Chapter 4: Pastimes",
Ch5_VOCAB = "Chapter 5: The Holidays",
Ch6_VOCAB = "Chapter 6: Shopping!",
Ch7_VOCAB = "Chapter 7: The Daily Routine",
Ch8_VOCAB = "Chapter 8: Food",
Ch9_VOCAB = "Chapter 9: Celebrations",
Ch10_VOCAB = "Chapter 10: In the doctor’s office",
Ch11_VOCAB = "Chapter 11: Technology",
Ch12_VOCAB = "Chapter 12: Housing",
Ch13_VOCAB = "Chapter 13: Nature",
Ch14_VOCAB = "Chapter 14: In the City",
Ch15_VOCAB = "Chapter 15: Well-being"
)
# Include Chapter names : Write.csv :
lapply(names(all_df), function(nm) {
df <- all_df[[nm]]
df$chapter <- chapter_names[[nm]] # add chapter name
write.csv(
df,
file = paste0(nm, ".csv"),
row.names = FALSE
)
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL
Otherwise we require ChatGPT code-interpreter :
lapply(names(all_df), function(nm) {
df <- all_df[[nm]]
lines <- paste0(df$span, " — ", df$eng)
# add a header (optional, but nice for GPT)
txt <- c(
paste0("## ", nm),
"",
lines
)
writeLines(txt, con = paste0(nm, ".txt"))
})
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
##
## [[5]]
## NULL
##
## [[6]]
## NULL
##
## [[7]]
## NULL
##
## [[8]]
## NULL
##
## [[9]]
## NULL
##
## [[10]]
## NULL
##
## [[11]]
## NULL
##
## [[12]]
## NULL
##
## [[13]]
## NULL
##
## [[14]]
## NULL
##
## [[15]]
## NULL