Data Set

The below data set holds all of the possible Wordle solutions (2,315 as of 12/24/2022).

wordlewordsdf <- as_tibble(read.csv('valid_solutions_BW.csv'))
colnames(wordlewordsdf) <- "Solutions"
wordlewordsdf
## # A tibble: 2,315 × 1
##    Solutions
##    <chr>    
##  1 aback    
##  2 abase    
##  3 abate    
##  4 abbey    
##  5 abbot    
##  6 abhor    
##  7 abide    
##  8 abled    
##  9 abode    
## 10 abort    
## # … with 2,305 more rows

Unique Words

unique_words <- c()

for (word in wordlewordsdf$Solutions) {
  if (all_unique(unlist(strsplit(word, split = '')))) {
    unique_words <- c(unique_words, word)
  }
}

uniquewordsdf <- wordlewordsdf %>%
  filter(Solutions %in% unique_words)
colnames(uniquewordsdf) <- 'W1'

uniquewordsdf
## # A tibble: 1,566 × 1
##    W1   
##    <chr>
##  1 abhor
##  2 abide
##  3 abled
##  4 abode
##  5 abort
##  6 about
##  7 above
##  8 abuse
##  9 acorn
## 10 acrid
## # … with 1,556 more rows

Best Starting Word - Entropy

Entropy calculation for unique words

word_value <- c()
letter_value <- c()
for (word in uniquewordsdf$W1) {
  for (pos in 1:5) {
    letter_value <- c(letter_value, nrow(
      filter(wordlewordsdf, substr(Solutions, pos, pos) == substr(word, pos, pos))
    ) + nrow(
      filter(wordlewordsdf, substr(Solutions, pos, pos) != substr(word, pos, pos)) %>%
        filter(grepl(substr(word, pos, pos), Solutions))
    ))
  }
  word_value <- c(word_value, sum(letter_value/2315))
  letter_value <- c()
}

uniquewordsdf <- uniquewordsdf %>%
  mutate(E1 = word_value)

uniquewordsdf
## # A tibble: 1,566 × 2
##    W1       E1
##    <chr> <dbl>
##  1 abhor  1.32
##  2 abide  1.40
##  3 abled  1.40
##  4 abode  1.41
##  5 abort  1.45
##  6 about  1.28
##  7 above  1.32
##  8 abuse  1.43
##  9 acorn  1.48
## 10 acrid  1.39
## # … with 1,556 more rows
word_pair <- c()
word_pair_entropy <- c()
for (word in uniquewordsdf$W1) {
  secondworddf <- uniquewordsdf
  for (letter in unlist(strsplit(word, split = ''))) {
   secondworddf <- secondworddf %>% 
      filter(!grepl(letter, W1)) 
  }
  secondworddf <- secondworddf %>%
    arrange(desc(E1))
  word_pair <- c(word_pair, secondworddf$W1[1])
  word_pair_entropy <- c(word_pair_entropy, secondworddf$E1[1])
}

uniquewordsdf <- uniquewordsdf %>%
  mutate(W2 = word_pair,
         E2 = word_pair_entropy)

uniquewordsdf
## # A tibble: 1,566 × 4
##    W1       E1 W2       E2
##    <chr> <dbl> <chr> <dbl>
##  1 abhor  1.32 islet  1.57
##  2 abide  1.40 snort  1.44
##  3 abled  1.40 intro  1.46
##  4 abode  1.41 shirt  1.36
##  5 abort  1.45 slice  1.48
##  6 about  1.28 liner  1.61
##  7 above  1.32 shirt  1.36
##  8 abuse  1.43 intro  1.46
##  9 acorn  1.48 islet  1.57
## 10 acrid  1.39 stole  1.58
## # … with 1,556 more rows
third_word <- c()
third_word_entropy <- c()
for (row_num in 1:nrow(uniquewordsdf)) {
  word_pair <- c(uniquewordsdf$W1[row_num], uniquewordsdf$W2[row_num])
  thirdworddf <- uniquewordsdf
  for (word in word_pair) {
    for (letter in unlist(strsplit(word, split = ''))) {
      thirdworddf <- thirdworddf %>%
        filter(!grepl(letter, W1))
    }
  }
  thirdworddf <- thirdworddf %>%
    arrange(desc(E1))
  if (nrow(thirdworddf) > 0) {
    third_word <- c(third_word, thirdworddf$W1[1])
    third_word_entropy <- c(third_word_entropy, thirdworddf$E1[1])
  } else {
    third_word <- c(third_word, 'NULL')
    third_word_entropy <- c(third_word_entropy, NA)
  }
}

uniquewordsdf <- uniquewordsdf %>%
  mutate(W3 = third_word,
         E3 = third_word_entropy)

uniquewordsdf <- uniquewordsdf %>%
  filter(is.na(E3) == FALSE) %>%
  mutate(TotalEntropy = E1 + E2 + E3,
         WeightedEntropy = (E1^2)/TotalEntropy + (E2^2)/TotalEntropy + (E3^2)/TotalEntropy) %>%
  arrange(desc(WeightedEntropy))

uniquewordsdf
## # A tibble: 1,469 × 8
##    W1       E1 W2       E2 W3       E3 TotalEntropy WeightedEntropy
##    <chr> <dbl> <chr> <dbl> <chr> <dbl>        <dbl>           <dbl>
##  1 alert 1.78  scion  1.27 pudgy 0.816         3.86            1.41
##  2 alter 1.78  scion  1.27 pudgy 0.816         3.86            1.41
##  3 later 1.78  scion  1.27 pudgy 0.816         3.86            1.41
##  4 pudgy 0.816 alert  1.78 scion 1.27          3.86            1.41
##  5 scion 1.27  alert  1.78 pudgy 0.816         3.86            1.41
##  6 sonic 1.27  alert  1.78 pudgy 0.816         3.86            1.41
##  7 dumpy 0.816 alert  1.78 scion 1.27          3.86            1.41
##  8 noisy 1.25  alert  1.78 chump 0.833         3.87            1.40
##  9 chump 0.833 alert  1.78 noisy 1.25          3.87            1.40
## 10 jumpy 0.667 alert  1.78 scion 1.27          3.71            1.40
## # … with 1,459 more rows