custom_stops <- tibble(word = c(
"teacher", "teachers", "student", "students", "classroom", "class",
"lesson", "learning", "observed", "observation", "visit", "school",
"ms", "mr", "mrs", "dr", "clearly", "evident", "continue", "also",
"well", "great", "good", "nice", "wonderful", "excellent", "amazing",
"truly", "pleasure", "visiting", "thank", "allowing", "opportunity",
"observe", "cultivated", "strong", "culture", "mutual", "respect",
"always", "joy", "dear", "respectfully", "sincerely", "one", "make",
"use", "able", "will", "would", "could", "may", "must", "need",
"see", "look", "come", "go", "get", "give", "take", "keep", "let",
"put", "set", "way", "time", "day", "year", "work", "working",
"used", "using", "like", "even", "still", "already", "much", "many",
"first", "last", "new", "old", "high", "low", "large", "small",
"long", "little", "own", "right"
))
all_stops <- bind_rows(stop_words, custom_stops |> mutate(lexicon = "custom"))
lwt_tokens <- lwt |>
select(doc_id, School2, Observer, month, text_clean) |>
unnest_tokens(word, text_clean) |>
filter(!str_detect(word, "^[0-9]+$")) |>
anti_join(all_stops, by = "word") |>
mutate(word = lemmatize_words(word)) |>
filter(nchar(word) > 2)