Data Sets

guess <- as_tibble(read.csv('valid_guesses.csv'))
guess <- guess %>% mutate(Classif1 = 'Guess')

solution <- as_tibble(read.csv('valid_solutions.csv'))
solution <- solution %>% mutate(Classif1 = 'Solution')

answers <- rbind(guess, solution)
colnames(answers)[1] <- 'Input1'
answers
## # A tibble: 12,972 × 2
##    Input1 Classif1
##    <chr>  <chr>   
##  1 aahed  Guess   
##  2 aalii  Guess   
##  3 aargh  Guess   
##  4 aarti  Guess   
##  5 abaca  Guess   
##  6 abaci  Guess   
##  7 abacs  Guess   
##  8 abaft  Guess   
##  9 abaka  Guess   
## 10 abamp  Guess   
## # … with 12,962 more rows

10,657 guesses, 2315 solutions, 12972 inputs


Unique Answers

unique_answers <- c()

for (word in answers$Input1) {
  if (all_unique(unlist(strsplit(word, split = '')))) {
    unique_answers <- c(unique_answers, word)
  }
}

uniqueAnswersDF <- answers %>%
  filter(Input1 %in% unique_answers)
uniqueAnswersDF
## # A tibble: 8,322 × 2
##    Input1 Classif1
##    <chr>  <chr>   
##  1 abers  Guess   
##  2 abets  Guess   
##  3 abies  Guess   
##  4 abler  Guess   
##  5 ables  Guess   
##  6 ablet  Guess   
##  7 ablow  Guess   
##  8 abmho  Guess   
##  9 abohm  Guess   
## 10 aboil  Guess   
## # … with 8,312 more rows

6756 unique guesses, 1566 unique solutions, 8322 unique inputs


Greens Reference

green_ref <- data.frame(Letter = letters,
                        C1 = NA, C2 = NA, C3 = NA, C4 = NA, C5 = NA)
for (letter in green_ref$Letter) {
  for (posit in 1:5) {
    letter_green <- nrow(filter(solution, substr(word, posit, posit) == letter))
    green_ref[ , posit + 1][green_ref$Letter == letter] <- round(letter_green/2315, digits = 5)
  }
}

green_ref
##    Letter      C1      C2      C3      C4      C5
## 1       a 0.06091 0.13132 0.13261 0.07041 0.02765
## 2       b 0.07473 0.00691 0.02462 0.01037 0.00475
## 3       c 0.08553 0.01728 0.02419 0.06566 0.01339
## 4       d 0.04795 0.00864 0.03240 0.02981 0.05097
## 5       e 0.03110 0.10454 0.07646 0.13737 0.18315
## 6       f 0.05875 0.00346 0.01080 0.01512 0.01123
## 7       g 0.04968 0.00518 0.02894 0.03283 0.01771
## 8       h 0.02981 0.06220 0.00389 0.01210 0.06004
## 9       i 0.01469 0.08726 0.11490 0.06825 0.00475
## 10      j 0.00864 0.00086 0.00130 0.00086 0.00000
## 11      k 0.00864 0.00432 0.00518 0.02376 0.04881
## 12      l 0.03801 0.08683 0.04838 0.06998 0.06739
## 13      m 0.04622 0.01641 0.02635 0.02937 0.01814
## 14      n 0.01598 0.03758 0.06004 0.07862 0.05616
## 15      o 0.01771 0.12052 0.10540 0.05702 0.02505
## 16      p 0.06134 0.02635 0.02505 0.02160 0.02419
## 17      q 0.00994 0.00216 0.00043 0.00000 0.00000
## 18      r 0.04536 0.11533 0.07041 0.06566 0.09158
## 19      s 0.15810 0.00691 0.03456 0.07387 0.01555
## 20      t 0.06436 0.03326 0.04795 0.06004 0.10929
## 21      u 0.01425 0.08035 0.07127 0.03542 0.00043
## 22      v 0.01857 0.00648 0.02117 0.01987 0.00000
## 23      w 0.03585 0.01901 0.01123 0.01080 0.00734
## 24      x 0.00000 0.00605 0.00518 0.00130 0.00346
## 25      y 0.00259 0.00994 0.01253 0.00130 0.15724
## 26      z 0.00130 0.00086 0.00475 0.00864 0.00173

Probability Calculation

input_prob <- c()
for (input in uniqueAnswersDF$Input1) {
  letter_prob <- c()
  for (posit in 1:5) {
    letter_prob <- c(letter_prob, green_ref[ , posit + 1][green_ref$Letter == substr(input, posit, posit)])
  }
  input_prob <- c(input_prob, 1 - prod(1 - letter_prob))
}

uniqueAnswersDF <- uniqueAnswersDF %>%
  mutate(P_green1 = input_prob)
uniqueAnswersDF
## # A tibble: 8,322 × 3
##    Input1 Classif1 P_green1
##    <chr>  <chr>       <dbl>
##  1 abers  Guess       0.208
##  2 abets  Guess       0.203
##  3 abies  Guess       0.299
##  4 abler  Guess       0.305
##  5 ables  Guess       0.246
##  6 ablet  Guess       0.318
##  7 ablow  Guess       0.169
##  8 abmho  Guess       0.125
##  9 abohm  Guess       0.191
## 10 aboil  Guess       0.275
## # … with 8,312 more rows

Best Starting Words

P_green1 is the probability of getting at least 1 green letter when you enter corresponding Input1 as your starting word.

uniqueAnswersDF %>%
  arrange(desc(P_green1))
  1. saine, Classification = Guess, P_green1 = 0.513
  2. soare, Classification = Guess, P_green1 = 0.510
  3. saice, Classification = Guess, P_green1 = 0.506
  4. slane, Classification = Guess, P_green1 = 0.498
  5. slate, Classification = Solution, P_green1 = 0.488
  6. soily, Classification = Guess, P_green1 = 0.486
  7. soave, Classification = Guess, P_green1 = 0.486
  8. samey, Classification = Guess, P_green1 = 0.482
  9. sauce, Classification = Solution, P_green1 = 0.482
  10. slice, Classification = Solution, P_green1 = 0.481