Data Set

The below data set holds all of the possible Wordle solutions (2,315 as of 12/24/2022).

wordlewordsdf <- as_tibble(read.csv('valid_solutions_BW.csv'))
colnames(wordlewordsdf) <- "Solutions"
wordlewordsdf

## # A tibble: 2,315 × 1
##    Solutions
##    <chr>    
##  1 aback    
##  2 abase    
##  3 abate    
##  4 abbey    
##  5 abbot    
##  6 abhor    
##  7 abide    
##  8 abled    
##  9 abode    
## 10 abort    
## # … with 2,305 more rows

Unique Words

unique_words <- c()

for (word in wordlewordsdf$Solutions) {
  if (all_unique(unlist(strsplit(word, split = '')))) {
    unique_words <- c(unique_words, word)
  }
}

uniquewordsdf <- wordlewordsdf %>%
  filter(Solutions %in% unique_words)
colnames(uniquewordsdf) <- 'UniqueWords'

uniquewordsdf

## # A tibble: 1,566 × 1
##    UniqueWords
##    <chr>      
##  1 abhor      
##  2 abide      
##  3 abled      
##  4 abode      
##  5 abort      
##  6 about      
##  7 above      
##  8 abuse      
##  9 acorn      
## 10 acrid      
## # … with 1,556 more rows

Overall Frequencies/Ranks

all_letters <- c()

for (word in wordlewordsdf$Solutions) {
  all_letters <- c(all_letters, unlist(strsplit(word, split = '')))
}

overall_ranks <- as.data.frame(table(all_letters))
colnames(overall_ranks) <- c('Letter', 'Freq')
overall_ranks <- overall_ranks %>%
  mutate(Rank = rank(-Freq))

overall_ranks

##    Letter Freq Rank
## 1       a  979    2
## 2       b  281   18
## 3       c  477   10
## 4       d  393   13
## 5       e 1233    1
## 6       f  230   19
## 7       g  311   17
## 8       h  389   14
## 9       i  671    7
## 10      j   27   26
## 11      k  210   20
## 12      l  719    6
## 13      m  316   16
## 14      n  575    9
## 15      o  754    4
## 16      p  367   15
## 17      q   29   25
## 18      r  899    3
## 19      s  669    8
## 20      t  729    5
## 21      u  467   11
## 22      v  153   22
## 23      w  195   21
## 24      x   37   24
## 25      y  425   12
## 26      z   40   23

Ranking Unique Words by Overall Rank

letter_rank <- c()
word_avg_rank <- c()

for (word in uniquewordsdf$UniqueWords) {
  for (pos in 1:5) {
    letter_rank <- c(letter_rank, 
                     overall_ranks$Rank[overall_ranks$Letter == substr(word, pos, pos)])
  }
  word_avg_rank <- c(word_avg_rank, sum(letter_rank)/5)
  letter_rank <- c()
}

uniquewordsdf <- uniquewordsdf %>%
  mutate(Avg_Rank = word_avg_rank) 
uniquewordsdf

## # A tibble: 1,566 × 2
##    UniqueWords Avg_Rank
##    <chr>          <dbl>
##  1 abhor            8.2
##  2 abide            8.2
##  3 abled            8  
##  4 abode            7.6
##  5 abort            6.4
##  6 about            8  
##  7 above            9.4
##  8 abuse            8  
##  9 acorn            5.6
## 10 acrid            7  
## # … with 1,556 more rows

Column Frequencies/Ranks

column_ranks <- data.frame(matrix(nrow = 26, ncol = 11))
column_ranks$X1 <- letters

for (pos in 1:5) {
  for (ltr in 1:length(letters)) {
    column_ranks[ltr, pos + 1] <- sum(grepl(letters[ltr], substr(wordlewordsdf$Solutions, pos, pos)))
  }
  column_ranks[, pos + 6] <- rank(-column_ranks[, pos + 1])
}

colnames(column_ranks) <- c('Letter', 'Pos1_F', 'Pos2_F', 'Pos3_F', 'Pos4_F', 'Pos5_F', 'Pos1_R', 'Pos2_R', 'Pos3_R', 'Pos4_R', 'Pos5_R')

column_ranks

##    Letter Pos1_F Pos2_F Pos3_F Pos4_F Pos5_F Pos1_R Pos2_R Pos3_R Pos4_R Pos5_R
## 1       a    141    304    307    163     64    6.0    1.0    1.0    4.0   10.0
## 2       b    173     16     57     24     11    3.0   17.5   15.0   21.0   19.5
## 3       c    198     40     56    152     31    2.0   13.0   16.0    7.5   16.0
## 4       d    111     20     75     69    118    9.0   16.0   11.0   13.0    8.0
## 5       e     72    242    177    318    424   14.0    4.0    4.0    1.0    1.0
## 6       f    136      8     25     35     26    7.0   23.0   20.0   18.0   17.0
## 7       g    115     12     67     76     41    8.0   21.0   12.0   12.0   14.0
## 8       h     69    144      9     28    139   15.0    8.0   24.0   19.0    6.0
## 9       i     34    202    266    158     11   19.0    5.0    2.0    6.0   19.5
## 10      j     20      2      3      2      0   22.5   25.5   25.0   25.0   25.0
## 11      k     20     10     12     55    113   22.5   22.0   21.5   15.0    9.0
## 12      l     88    201    112    162    156   12.0    6.0    8.0    5.0    5.0
## 13      m    107     38     61     68     42   10.0   14.0   13.0   14.0   13.0
## 14      n     37     87    139    182    130   18.0    9.0    7.0    2.0    7.0
## 15      o     41    279    244    132     58   17.0    2.0    3.0   10.0   11.0
## 16      p    142     61     58     50     56    5.0   11.0   14.0   16.0   12.0
## 17      q     23      5      1      0      0   21.0   24.0   26.0   26.0   25.0
## 18      r    105    267    163    152    212   11.0    3.0    6.0    7.5    4.0
## 19      s    366     16     80    171     36    1.0   17.5   10.0    3.0   15.0
## 20      t    149     77    111    139    253    4.0   10.0    9.0    9.0    3.0
## 21      u     33    186    165     82      1   20.0    7.0    5.0   11.0   23.0
## 22      v     43     15     49     46      0   16.0   19.0   17.0   17.0   25.0
## 23      w     83     44     26     25     17   13.0   12.0   19.0   20.0   18.0
## 24      x      0     14     12      3      8   26.0   20.0   21.5   23.5   21.0
## 25      y      6     23     29      3    364   24.0   15.0   18.0   23.5    2.0
## 26      z      3      2     11     20      4   25.0   25.5   23.0   22.0   22.0

Ranking Unique Words by Column Rank

letter_rank <- c()
word_rank <- c()

for (word in uniquewordsdf$UniqueWords) {
  for (pos in 1:5) {
    letter_rank <- c(letter_rank, column_ranks[, pos + 6][column_ranks$Letter == substr(word, pos, pos)])
  }
  word_rank <- c(word_rank, sum(letter_rank)/5)
  letter_rank <- c()
}

uniquewordsdf <- uniquewordsdf %>% 
  mutate(Avg_Col_Rank = word_rank)

uniquewordsdf

## # A tibble: 1,566 × 3
##    UniqueWords Avg_Rank Avg_Col_Rank
##    <chr>          <dbl>        <dbl>
##  1 abhor            8.2         12.3
##  2 abide            8.2          7.9
##  3 abled            8            8.1
##  4 abode            7.6          8.1
##  5 abort            6.4          7.4
##  6 about            8            8.1
##  7 above            9.4          8.9
##  8 abuse            8            6.5
##  9 acorn            5.6          7.3
## 10 acrid            7            7.8
## # … with 1,556 more rows

Composite Rank

uniquewordsdf <- uniquewordsdf %>%
  mutate(Composite_Rank = (Avg_Rank + Avg_Col_Rank)/2)
uniquewordsdf

## # A tibble: 1,566 × 4
##    UniqueWords Avg_Rank Avg_Col_Rank Composite_Rank
##    <chr>          <dbl>        <dbl>          <dbl>
##  1 abhor            8.2         12.3          10.2 
##  2 abide            8.2          7.9           8.05
##  3 abled            8            8.1           8.05
##  4 abode            7.6          8.1           7.85
##  5 abort            6.4          7.4           6.9 
##  6 about            8            8.1           8.05
##  7 above            9.4          8.9           9.15
##  8 abuse            8            6.5           7.25
##  9 acorn            5.6          7.3           6.45
## 10 acrid            7            7.8           7.4 
## # … with 1,556 more rows

Best Starting Words by Overall Rank

uniquewordsdf %>%
  arrange(Avg_Rank)

startingword <- 'alter'
for (letter in unlist(strsplit(startingword, split = ''))) {
  uniquewordsdf <- uniquewordsdf %>%
    filter(!grepl(letter, UniqueWords))
}

uniquewordsdf %>%
  arrange(Avg_Rank)

secondword <- 'sonic'
for (letter in unlist(strsplit(secondword, split = ''))) {
  uniquewordsdf <- uniquewordsdf %>%
    filter(!grepl(letter, UniqueWords))
}

uniquewordsdf %>%
  arrange(Avg_Rank)

The best 3 starting words, based on lowest average overall rank, is in this order: alter, sonic, & dumpy.

Best Starting Words by Column Ranks

uniquewordsdf %>% 
  arrange(Avg_Col_Rank)

startingword <- 'crane'
for (letter in unlist(strsplit(startingword, split = ''))) {
  uniquewordsdf <- uniquewordsdf %>%
    filter(!grepl(letter, UniqueWords))
}

uniquewordsdf %>%
  arrange(Avg_Col_Rank)

secondword <- 'foist'
for (letter in unlist(strsplit(secondword, split = ''))) {
  uniquewordsdf <- uniquewordsdf %>%
    filter(!grepl(letter, UniqueWords))
}

uniquewordsdf %>%
  arrange(Avg_Rank)

The 3 best starting words, based on lowest average column ranks, is in this order: crane, foist, & lumpy.

Best Starting Words by Composite Rank Score

uniquewordsdf %>%
  arrange(Composite_Rank)

startingword <- 'crane'
for (letter in unlist(strsplit(startingword, split = ''))) {
  uniquewordsdf <- uniquewordsdf %>%
    filter(!grepl(letter, UniqueWords))
}

uniquewordsdf %>%
  arrange(Composite_Rank)

secondword <- 'foist'
for (letter in unlist(strsplit(secondword, split = ''))) {
  uniquewordsdf <- uniquewordsdf %>%
    filter(!grepl(letter, UniqueWords))
}

uniquewordsdf %>%
  arrange(Composite_Rank)

The best 3 starting words, based on composite rank score, is in this order: crane, moist/foist, & bulky.

RanksStartingWord

Jie Heng Yu

1/5/2023