All of the potential Wordle solutions can be found at https://static.nytimes.com/newsgraphics/2022/01/25/wordle-solver/assets/solutions.txt . I thought it’d be fun to perform a statistical analysis on letter frequency by position.

First, load our packages.

library(tidyverse)
library(tidytext)
library(ggthemes)

Then load the data.

solutions <- read_table("https://static.nytimes.com/newsgraphics/2022/01/25/wordle-solver/assets/solutions.txt", 
                         col_names = FALSE)

What are the most common letters in Wordle solutions?

solutions %>% 
  unnest_tokens(shingle, X1, token = "character_shingles", n = 1) %>% 
  count(shingle, sort = TRUE) %>% 
knitr::kable(format = "html", table.attr = "style='width:30%;'")
shingle n
e 1230
a 975
r 897
o 753
t 729
l 716
i 670
s 668
n 573
c 475
u 466
y 424
d 393
h 387
p 365
m 316
g 310
b 280
f 229
k 210
w 194
v 152
z 40
x 37
q 29
j 27

Let’s plot that:

solutions %>% 
  unnest_tokens(shingle, X1, token = "character_shingles", n = 1) %>% 
  count(shingle, sort = TRUE) %>% 
ggplot(aes(reorder(shingle, n),n)) + geom_col() + coord_flip() + theme_clean() +
  xlab("Frequency") + ylab("Letter") + ggtitle("Frequency of Each Letter in Wordle Solutions")


What is the frequency of letters in each letter position?

 solutions %>% 
   mutate(one = str_sub(solutions$X1, start = 1, end=1)) %>% 
   mutate(two = str_sub(solutions$X1, start = 2, end=2)) %>% 
  mutate(three = str_sub(solutions$X1, start = 3, end=3)) %>% 
  mutate(four = str_sub(solutions$X1, start = 4, end=4)) %>% 
  mutate(five = str_sub(solutions$X1, start = 5, end=5)) -> letters

letters %>% 
   count(one, sort = TRUE) %>% 
   ggplot(aes(reorder(one, n),n)) + geom_col() + coord_flip() + theme_clean() +
   ggtitle("Freqency of Letters in First Position")

 letters %>% 
   count(two, sort = TRUE) %>% 
   ggplot(aes(reorder(two, n),n)) + geom_col() + coord_flip() + theme_clean() +
   ggtitle("Freqency of Letters in Second Position")    

 letters %>% 
   count(three, sort = TRUE) %>% 
   ggplot(aes(reorder(three, n),n)) + geom_col() + coord_flip() + theme_clean() +
   ggtitle("Freqency of Letters in Third Position")

 letters %>% 
   count(four, sort = TRUE)  %>% 
   ggplot(aes(reorder(four,n),n)) + geom_col() + coord_flip() + theme_clean() +
   ggtitle("Freqency of Letters in Fourth Position")

 letters %>% 
   count(five, sort = TRUE)  %>% 
   ggplot(aes(reorder(five,n),n)) + geom_col() + coord_flip() + theme_clean() +
   ggtitle("Freqency of Letters in Fifth Position")