guess <- as_tibble(read.csv('valid_guesses.csv'))
guess <- guess %>% mutate(Classif1 = 'Guess')
solution <- as_tibble(read.csv('valid_solutions.csv'))
solution <- solution %>% mutate(Classif1 = 'Solution')
answers <- rbind(guess, solution)
colnames(answers)[1] <- 'Input1'
answers
## # A tibble: 12,972 × 2
## Input1 Classif1
## <chr> <chr>
## 1 aahed Guess
## 2 aalii Guess
## 3 aargh Guess
## 4 aarti Guess
## 5 abaca Guess
## 6 abaci Guess
## 7 abacs Guess
## 8 abaft Guess
## 9 abaka Guess
## 10 abamp Guess
## # … with 12,962 more rows
10,657 guesses, 2315 solutions, 12972 inputs
unique_answers <- c()
for (word in answers$Input1) {
if (all_unique(unlist(strsplit(word, split = '')))) {
unique_answers <- c(unique_answers, word)
}
}
uniqueAnswersDF <- answers %>%
filter(Input1 %in% unique_answers)
uniqueAnswersDF
## # A tibble: 8,322 × 2
## Input1 Classif1
## <chr> <chr>
## 1 abers Guess
## 2 abets Guess
## 3 abies Guess
## 4 abler Guess
## 5 ables Guess
## 6 ablet Guess
## 7 ablow Guess
## 8 abmho Guess
## 9 abohm Guess
## 10 aboil Guess
## # … with 8,312 more rows
6756 unique guesses, 1566 unique solutions, 8322 unique inputs
green_ref <- data.frame(Letter = letters,
C1 = NA, C2 = NA, C3 = NA, C4 = NA, C5 = NA)
for (letter in green_ref$Letter) {
for (posit in 1:5) {
letter_green <- nrow(filter(solution, substr(word, posit, posit) == letter))
green_ref[ , posit + 1][green_ref$Letter == letter] <- round(letter_green/2315, digits = 5)
}
}
green_ref
## Letter C1 C2 C3 C4 C5
## 1 a 0.06091 0.13132 0.13261 0.07041 0.02765
## 2 b 0.07473 0.00691 0.02462 0.01037 0.00475
## 3 c 0.08553 0.01728 0.02419 0.06566 0.01339
## 4 d 0.04795 0.00864 0.03240 0.02981 0.05097
## 5 e 0.03110 0.10454 0.07646 0.13737 0.18315
## 6 f 0.05875 0.00346 0.01080 0.01512 0.01123
## 7 g 0.04968 0.00518 0.02894 0.03283 0.01771
## 8 h 0.02981 0.06220 0.00389 0.01210 0.06004
## 9 i 0.01469 0.08726 0.11490 0.06825 0.00475
## 10 j 0.00864 0.00086 0.00130 0.00086 0.00000
## 11 k 0.00864 0.00432 0.00518 0.02376 0.04881
## 12 l 0.03801 0.08683 0.04838 0.06998 0.06739
## 13 m 0.04622 0.01641 0.02635 0.02937 0.01814
## 14 n 0.01598 0.03758 0.06004 0.07862 0.05616
## 15 o 0.01771 0.12052 0.10540 0.05702 0.02505
## 16 p 0.06134 0.02635 0.02505 0.02160 0.02419
## 17 q 0.00994 0.00216 0.00043 0.00000 0.00000
## 18 r 0.04536 0.11533 0.07041 0.06566 0.09158
## 19 s 0.15810 0.00691 0.03456 0.07387 0.01555
## 20 t 0.06436 0.03326 0.04795 0.06004 0.10929
## 21 u 0.01425 0.08035 0.07127 0.03542 0.00043
## 22 v 0.01857 0.00648 0.02117 0.01987 0.00000
## 23 w 0.03585 0.01901 0.01123 0.01080 0.00734
## 24 x 0.00000 0.00605 0.00518 0.00130 0.00346
## 25 y 0.00259 0.00994 0.01253 0.00130 0.15724
## 26 z 0.00130 0.00086 0.00475 0.00864 0.00173
input_prob <- c()
for (input in uniqueAnswersDF$Input1) {
letter_prob <- c()
for (posit in 1:5) {
letter_prob <- c(letter_prob, green_ref[ , posit + 1][green_ref$Letter == substr(input, posit, posit)])
}
input_prob <- c(input_prob, 1 - prod(1 - letter_prob))
}
uniqueAnswersDF <- uniqueAnswersDF %>%
mutate(P_green1 = input_prob)
uniqueAnswersDF
## # A tibble: 8,322 × 3
## Input1 Classif1 P_green1
## <chr> <chr> <dbl>
## 1 abers Guess 0.208
## 2 abets Guess 0.203
## 3 abies Guess 0.299
## 4 abler Guess 0.305
## 5 ables Guess 0.246
## 6 ablet Guess 0.318
## 7 ablow Guess 0.169
## 8 abmho Guess 0.125
## 9 abohm Guess 0.191
## 10 aboil Guess 0.275
## # … with 8,312 more rows
P_green1 is the probability of getting at least 1 green letter when you enter corresponding Input1 as your starting word.
uniqueAnswersDF %>%
arrange(desc(P_green1))
Guess, P_green1 = 0.513Guess, P_green1 = 0.510Guess, P_green1 = 0.506Guess, P_green1 = 0.498Solution, P_green1 = 0.488Guess, P_green1 = 0.486Guess, P_green1 = 0.486Guess, P_green1 = 0.482Solution, P_green1 = 0.482Solution, P_green1 = 0.481