The below data set holds all of the possible Wordle solutions (2,315 as of 12/24/2022).
wordlewordsdf <- as_tibble(read.csv('valid_solutions_BW.csv'))
colnames(wordlewordsdf) <- "Solutions"
wordlewordsdf
## # A tibble: 2,315 × 1
## Solutions
## <chr>
## 1 aback
## 2 abase
## 3 abate
## 4 abbey
## 5 abbot
## 6 abhor
## 7 abide
## 8 abled
## 9 abode
## 10 abort
## # … with 2,305 more rows
unique_words <- c()
for (word in wordlewordsdf$Solutions) {
if (all_unique(unlist(strsplit(word, split = '')))) {
unique_words <- c(unique_words, word)
}
}
uniquewordsdf <- wordlewordsdf %>%
filter(Solutions %in% unique_words)
colnames(uniquewordsdf) <- 'W1'
uniquewordsdf
## # A tibble: 1,566 × 1
## W1
## <chr>
## 1 abhor
## 2 abide
## 3 abled
## 4 abode
## 5 abort
## 6 about
## 7 above
## 8 abuse
## 9 acorn
## 10 acrid
## # … with 1,556 more rows
Entropy calculation for unique words
word_value <- c()
letter_value <- c()
for (word in uniquewordsdf$W1) {
for (pos in 1:5) {
letter_value <- c(letter_value, nrow(
filter(wordlewordsdf, substr(Solutions, pos, pos) == substr(word, pos, pos))
) + nrow(
filter(wordlewordsdf, substr(Solutions, pos, pos) != substr(word, pos, pos)) %>%
filter(grepl(substr(word, pos, pos), Solutions))
))
}
word_value <- c(word_value, sum(letter_value/2315))
letter_value <- c()
}
uniquewordsdf <- uniquewordsdf %>%
mutate(E1 = word_value)
uniquewordsdf
## # A tibble: 1,566 × 2
## W1 E1
## <chr> <dbl>
## 1 abhor 1.32
## 2 abide 1.40
## 3 abled 1.40
## 4 abode 1.41
## 5 abort 1.45
## 6 about 1.28
## 7 above 1.32
## 8 abuse 1.43
## 9 acorn 1.48
## 10 acrid 1.39
## # … with 1,556 more rows
word_pair <- c()
word_pair_entropy <- c()
for (word in uniquewordsdf$W1) {
secondworddf <- uniquewordsdf
for (letter in unlist(strsplit(word, split = ''))) {
secondworddf <- secondworddf %>%
filter(!grepl(letter, W1))
}
secondworddf <- secondworddf %>%
arrange(desc(E1))
word_pair <- c(word_pair, secondworddf$W1[1])
word_pair_entropy <- c(word_pair_entropy, secondworddf$E1[1])
}
uniquewordsdf <- uniquewordsdf %>%
mutate(W2 = word_pair,
E2 = word_pair_entropy)
uniquewordsdf
## # A tibble: 1,566 × 4
## W1 E1 W2 E2
## <chr> <dbl> <chr> <dbl>
## 1 abhor 1.32 islet 1.57
## 2 abide 1.40 snort 1.44
## 3 abled 1.40 intro 1.46
## 4 abode 1.41 shirt 1.36
## 5 abort 1.45 slice 1.48
## 6 about 1.28 liner 1.61
## 7 above 1.32 shirt 1.36
## 8 abuse 1.43 intro 1.46
## 9 acorn 1.48 islet 1.57
## 10 acrid 1.39 stole 1.58
## # … with 1,556 more rows
third_word <- c()
third_word_entropy <- c()
for (row_num in 1:nrow(uniquewordsdf)) {
word_pair <- c(uniquewordsdf$W1[row_num], uniquewordsdf$W2[row_num])
thirdworddf <- uniquewordsdf
for (word in word_pair) {
for (letter in unlist(strsplit(word, split = ''))) {
thirdworddf <- thirdworddf %>%
filter(!grepl(letter, W1))
}
}
thirdworddf <- thirdworddf %>%
arrange(desc(E1))
if (nrow(thirdworddf) > 0) {
third_word <- c(third_word, thirdworddf$W1[1])
third_word_entropy <- c(third_word_entropy, thirdworddf$E1[1])
} else {
third_word <- c(third_word, 'NULL')
third_word_entropy <- c(third_word_entropy, NA)
}
}
uniquewordsdf <- uniquewordsdf %>%
mutate(W3 = third_word,
E3 = third_word_entropy)
uniquewordsdf <- uniquewordsdf %>%
filter(is.na(E3) == FALSE) %>%
mutate(TotalEntropy = E1 + E2 + E3,
WeightedEntropy = (E1^2)/TotalEntropy + (E2^2)/TotalEntropy + (E3^2)/TotalEntropy) %>%
arrange(desc(WeightedEntropy))
uniquewordsdf
## # A tibble: 1,469 × 8
## W1 E1 W2 E2 W3 E3 TotalEntropy WeightedEntropy
## <chr> <dbl> <chr> <dbl> <chr> <dbl> <dbl> <dbl>
## 1 alert 1.78 scion 1.27 pudgy 0.816 3.86 1.41
## 2 alter 1.78 scion 1.27 pudgy 0.816 3.86 1.41
## 3 later 1.78 scion 1.27 pudgy 0.816 3.86 1.41
## 4 pudgy 0.816 alert 1.78 scion 1.27 3.86 1.41
## 5 scion 1.27 alert 1.78 pudgy 0.816 3.86 1.41
## 6 sonic 1.27 alert 1.78 pudgy 0.816 3.86 1.41
## 7 dumpy 0.816 alert 1.78 scion 1.27 3.86 1.41
## 8 noisy 1.25 alert 1.78 chump 0.833 3.87 1.40
## 9 chump 0.833 alert 1.78 noisy 1.25 3.87 1.40
## 10 jumpy 0.667 alert 1.78 scion 1.27 3.71 1.40
## # … with 1,459 more rows