The below data set holds all of the possible Wordle solutions (2,315 as of 12/24/2022).
wordlewordsdf <- as_tibble(read.csv('valid_solutions_BW.csv'))
colnames(wordlewordsdf) <- "Solutions"
wordlewordsdf
## # A tibble: 2,315 × 1
## Solutions
## <chr>
## 1 aback
## 2 abase
## 3 abate
## 4 abbey
## 5 abbot
## 6 abhor
## 7 abide
## 8 abled
## 9 abode
## 10 abort
## # … with 2,305 more rows
unique_words <- c()
for (word in wordlewordsdf$Solutions) {
if (all_unique(unlist(strsplit(word, split = '')))) {
unique_words <- c(unique_words, word)
}
}
uniquewordsdf <- wordlewordsdf %>%
filter(Solutions %in% unique_words)
colnames(uniquewordsdf) <- 'UniqueWords'
uniquewordsdf
## # A tibble: 1,566 × 1
## UniqueWords
## <chr>
## 1 abhor
## 2 abide
## 3 abled
## 4 abode
## 5 abort
## 6 about
## 7 above
## 8 abuse
## 9 acorn
## 10 acrid
## # … with 1,556 more rows
all_letters <- c()
for (word in wordlewordsdf$Solutions) {
all_letters <- c(all_letters, unlist(strsplit(word, split = '')))
}
overall_ranks <- as.data.frame(table(all_letters))
colnames(overall_ranks) <- c('Letter', 'Freq')
overall_ranks <- overall_ranks %>%
mutate(Rank = rank(-Freq))
overall_ranks
## Letter Freq Rank
## 1 a 979 2
## 2 b 281 18
## 3 c 477 10
## 4 d 393 13
## 5 e 1233 1
## 6 f 230 19
## 7 g 311 17
## 8 h 389 14
## 9 i 671 7
## 10 j 27 26
## 11 k 210 20
## 12 l 719 6
## 13 m 316 16
## 14 n 575 9
## 15 o 754 4
## 16 p 367 15
## 17 q 29 25
## 18 r 899 3
## 19 s 669 8
## 20 t 729 5
## 21 u 467 11
## 22 v 153 22
## 23 w 195 21
## 24 x 37 24
## 25 y 425 12
## 26 z 40 23
letter_rank <- c()
word_avg_rank <- c()
for (word in uniquewordsdf$UniqueWords) {
for (pos in 1:5) {
letter_rank <- c(letter_rank,
overall_ranks$Rank[overall_ranks$Letter == substr(word, pos, pos)])
}
word_avg_rank <- c(word_avg_rank, sum(letter_rank)/5)
letter_rank <- c()
}
uniquewordsdf <- uniquewordsdf %>%
mutate(Avg_Rank = word_avg_rank)
uniquewordsdf
## # A tibble: 1,566 × 2
## UniqueWords Avg_Rank
## <chr> <dbl>
## 1 abhor 8.2
## 2 abide 8.2
## 3 abled 8
## 4 abode 7.6
## 5 abort 6.4
## 6 about 8
## 7 above 9.4
## 8 abuse 8
## 9 acorn 5.6
## 10 acrid 7
## # … with 1,556 more rows
column_ranks <- data.frame(matrix(nrow = 26, ncol = 11))
column_ranks$X1 <- letters
for (pos in 1:5) {
for (ltr in 1:length(letters)) {
column_ranks[ltr, pos + 1] <- sum(grepl(letters[ltr], substr(wordlewordsdf$Solutions, pos, pos)))
}
column_ranks[, pos + 6] <- rank(-column_ranks[, pos + 1])
}
colnames(column_ranks) <- c('Letter', 'Pos1_F', 'Pos2_F', 'Pos3_F', 'Pos4_F', 'Pos5_F', 'Pos1_R', 'Pos2_R', 'Pos3_R', 'Pos4_R', 'Pos5_R')
column_ranks
## Letter Pos1_F Pos2_F Pos3_F Pos4_F Pos5_F Pos1_R Pos2_R Pos3_R Pos4_R Pos5_R
## 1 a 141 304 307 163 64 6.0 1.0 1.0 4.0 10.0
## 2 b 173 16 57 24 11 3.0 17.5 15.0 21.0 19.5
## 3 c 198 40 56 152 31 2.0 13.0 16.0 7.5 16.0
## 4 d 111 20 75 69 118 9.0 16.0 11.0 13.0 8.0
## 5 e 72 242 177 318 424 14.0 4.0 4.0 1.0 1.0
## 6 f 136 8 25 35 26 7.0 23.0 20.0 18.0 17.0
## 7 g 115 12 67 76 41 8.0 21.0 12.0 12.0 14.0
## 8 h 69 144 9 28 139 15.0 8.0 24.0 19.0 6.0
## 9 i 34 202 266 158 11 19.0 5.0 2.0 6.0 19.5
## 10 j 20 2 3 2 0 22.5 25.5 25.0 25.0 25.0
## 11 k 20 10 12 55 113 22.5 22.0 21.5 15.0 9.0
## 12 l 88 201 112 162 156 12.0 6.0 8.0 5.0 5.0
## 13 m 107 38 61 68 42 10.0 14.0 13.0 14.0 13.0
## 14 n 37 87 139 182 130 18.0 9.0 7.0 2.0 7.0
## 15 o 41 279 244 132 58 17.0 2.0 3.0 10.0 11.0
## 16 p 142 61 58 50 56 5.0 11.0 14.0 16.0 12.0
## 17 q 23 5 1 0 0 21.0 24.0 26.0 26.0 25.0
## 18 r 105 267 163 152 212 11.0 3.0 6.0 7.5 4.0
## 19 s 366 16 80 171 36 1.0 17.5 10.0 3.0 15.0
## 20 t 149 77 111 139 253 4.0 10.0 9.0 9.0 3.0
## 21 u 33 186 165 82 1 20.0 7.0 5.0 11.0 23.0
## 22 v 43 15 49 46 0 16.0 19.0 17.0 17.0 25.0
## 23 w 83 44 26 25 17 13.0 12.0 19.0 20.0 18.0
## 24 x 0 14 12 3 8 26.0 20.0 21.5 23.5 21.0
## 25 y 6 23 29 3 364 24.0 15.0 18.0 23.5 2.0
## 26 z 3 2 11 20 4 25.0 25.5 23.0 22.0 22.0
letter_rank <- c()
word_rank <- c()
for (word in uniquewordsdf$UniqueWords) {
for (pos in 1:5) {
letter_rank <- c(letter_rank, column_ranks[, pos + 6][column_ranks$Letter == substr(word, pos, pos)])
}
word_rank <- c(word_rank, sum(letter_rank)/5)
letter_rank <- c()
}
uniquewordsdf <- uniquewordsdf %>%
mutate(Avg_Col_Rank = word_rank)
uniquewordsdf
## # A tibble: 1,566 × 3
## UniqueWords Avg_Rank Avg_Col_Rank
## <chr> <dbl> <dbl>
## 1 abhor 8.2 12.3
## 2 abide 8.2 7.9
## 3 abled 8 8.1
## 4 abode 7.6 8.1
## 5 abort 6.4 7.4
## 6 about 8 8.1
## 7 above 9.4 8.9
## 8 abuse 8 6.5
## 9 acorn 5.6 7.3
## 10 acrid 7 7.8
## # … with 1,556 more rows
uniquewordsdf <- uniquewordsdf %>%
mutate(Composite_Rank = (Avg_Rank + Avg_Col_Rank)/2)
uniquewordsdf
## # A tibble: 1,566 × 4
## UniqueWords Avg_Rank Avg_Col_Rank Composite_Rank
## <chr> <dbl> <dbl> <dbl>
## 1 abhor 8.2 12.3 10.2
## 2 abide 8.2 7.9 8.05
## 3 abled 8 8.1 8.05
## 4 abode 7.6 8.1 7.85
## 5 abort 6.4 7.4 6.9
## 6 about 8 8.1 8.05
## 7 above 9.4 8.9 9.15
## 8 abuse 8 6.5 7.25
## 9 acorn 5.6 7.3 6.45
## 10 acrid 7 7.8 7.4
## # … with 1,556 more rows
uniquewordsdf %>%
arrange(Avg_Rank)
startingword <- 'alter'
for (letter in unlist(strsplit(startingword, split = ''))) {
uniquewordsdf <- uniquewordsdf %>%
filter(!grepl(letter, UniqueWords))
}
uniquewordsdf %>%
arrange(Avg_Rank)
secondword <- 'sonic'
for (letter in unlist(strsplit(secondword, split = ''))) {
uniquewordsdf <- uniquewordsdf %>%
filter(!grepl(letter, UniqueWords))
}
uniquewordsdf %>%
arrange(Avg_Rank)
The best 3 starting words, based on lowest average overall rank, is in this order: alter, sonic, & dumpy.
uniquewordsdf %>%
arrange(Avg_Col_Rank)
startingword <- 'crane'
for (letter in unlist(strsplit(startingword, split = ''))) {
uniquewordsdf <- uniquewordsdf %>%
filter(!grepl(letter, UniqueWords))
}
uniquewordsdf %>%
arrange(Avg_Col_Rank)
secondword <- 'foist'
for (letter in unlist(strsplit(secondword, split = ''))) {
uniquewordsdf <- uniquewordsdf %>%
filter(!grepl(letter, UniqueWords))
}
uniquewordsdf %>%
arrange(Avg_Rank)
The 3 best starting words, based on lowest average column ranks, is in this order: crane, foist, & lumpy.
uniquewordsdf %>%
arrange(Composite_Rank)
startingword <- 'crane'
for (letter in unlist(strsplit(startingword, split = ''))) {
uniquewordsdf <- uniquewordsdf %>%
filter(!grepl(letter, UniqueWords))
}
uniquewordsdf %>%
arrange(Composite_Rank)
secondword <- 'foist'
for (letter in unlist(strsplit(secondword, split = ''))) {
uniquewordsdf <- uniquewordsdf %>%
filter(!grepl(letter, UniqueWords))
}
uniquewordsdf %>%
arrange(Composite_Rank)
The best 3 starting words, based on composite rank score, is in this order: crane, moist/foist, & bulky.