uniqueInputs <- as_tibble(read.csv('uniqueAnswersDF.csv'))
uniqueInputs
## # A tibble: 8,322 × 2
## Input1 Classif1
## <chr> <chr>
## 1 abers Guess
## 2 abets Guess
## 3 abies Guess
## 4 abler Guess
## 5 ables Guess
## 6 ablet Guess
## 7 ablow Guess
## 8 abmho Guess
## 9 abohm Guess
## 10 aboil Guess
## # … with 8,312 more rows
solutions <- as_tibble(read.csv('valid_solutions.csv')) %>%
mutate(Result = '-----')
solutions
## # A tibble: 2,315 × 2
## word Result
## <chr> <chr>
## 1 aback -----
## 2 abase -----
## 3 abate -----
## 4 abbey -----
## 5 abbot -----
## 6 abhor -----
## 7 abide -----
## 8 abled -----
## 9 abode -----
## 10 abort -----
## # … with 2,305 more rows
Using the test word faith, we’ll match it to all the possible wordle solutions. This will give us all the possible outcomes for the word faith. We’ll then tabulate the frequencies of each outcome. The frequency tells us how many words are left in the resulting set of wordle solutions for each outcome. With these frequencies, we can find the best starting word based on expected reduction in solution set size.
testingInput = 'faith'
for (input in solutions$word) {
for (pos in 1:5) {
# Green
if (substr(testingInput, pos, pos) == substr(input, pos, pos)) {
substr(solutions$Result[solutions$word == input], pos, pos) <- 'G'
}
# Yellow
if (substr(testingInput, pos, pos) != substr(input, pos, pos) & grepl(substr(testingInput, pos, pos), input)) {
substr(solutions$Result[solutions$word == input], pos, pos) <- 'Y'
}
}
}
testDf <- as.data.frame(table(solutions$Result)) %>%
mutate(Prob = Freq/2315,
Removed = -(2315 - Freq),
Percent_removed = Removed/2315)
testDf
# confidence interval
possiblepercentremoved <- c()
for (row in 1:nrow(testDf)) {
possiblepercentremoved <- c(possiblepercentremoved,
rep(testDf[row, ]$Percent_removed,
times = testDf[row, ]$Freq))
}
hist(possiblepercentremoved)
2315 + mean(possiblepercentremoved * 2315)
2315 + (as.vector(quantile(possiblepercentremoved, c(0.025, 0.975))) * 2315)
If we entered audio as the first word, we would expect to reduce the number of possible wordle solutions to 183.7 words. 95% of the time, audio will reduce the number of possible wordle solutions to anywhere from 4 to 435 words, inclusively.
If we entered faith as the first word, we would expect to reduce the number of possible wordle solutions to 180.6 words. 95% of the time, faith will reduce the number of possible wordle solutions to anywhere from 3 to 472 words, inclusively.
means <- c()
lowerBounds <- c()
upperBounds <- c()
medians <- c()
for (inputWord in uniqueInputs$Input1) {
solutionsCopy <- solutions
for (solutionWord in solutionsCopy$word) {
for (pos in 1:5) {
# Green
if (substr(inputWord, pos, pos) == substr(solutionWord, pos, pos)) {
substr(solutionsCopy$Result[solutionsCopy$word == solutionWord], pos, pos) <- 'G'
}
# Yellow
if (substr(inputWord, pos, pos) != substr(solutionWord, pos, pos) & grepl(substr(inputWord, pos, pos), solutionWord)) {
substr(solutionsCopy$Result[solutionsCopy$word == solutionWord], pos, pos) <- 'Y'
}
}
}
testDF <- as.data.frame(table(solutionsCopy$Result)) %>%
mutate(percentRemoved = -(2315 - Freq)/2315)
# Mean & 95% CI
possiblePercentRemoved <- c()
for (row in 1:nrow(testDF)) {
possiblePercentRemoved <- c(possiblePercentRemoved,
rep(testDF[row, ]$percentRemoved,
times = testDF[row, ]$Freq))
}
means <- c(means, 2315 + mean(possiblePercentRemoved * 2315))
medians <- c(medians, 2315 + median(possiblePercentRemoved * 2315))
lowerBounds <- c(lowerBounds, 2315 + (as.vector(quantile(possiblePercentRemoved, c(0.025, 0.975))) * 2315)[1])
upperBounds <- c(upperBounds, 2315 + (as.vector(quantile(possiblePercentRemoved, c(0.025, 0.975))) * 2315)[2])
}
uniqueInputs <- uniqueInputs %>%
mutate(LowerBound = lowerBounds,
UpperBound = upperBounds,
BoundRange = UpperBound - LowerBound,
Mean = means,
Median = medians,
SumScore = Median + BoundRange) %>%
arrange(SumScore)
uniqueInputs
## # A tibble: 8,322 × 8
## Input1 Classif1 LowerBound UpperBound BoundRange Mean Median SumScore
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 raise Solution 3 168 165 61.0 43 208
## 2 arise Solution 4 168 164 63.7 51 215
## 3 soare Guess 3 183 180 62.3 42 222
## 4 raile Guess 3 173 170 61.3 54 224
## 5 reais Guess 3 168 165 71.6 60 225
## 6 ariel Guess 3 173 170 65.3 56 226
## 7 serai Guess 3 168 165 72.9 62 227
## 8 aesir Guess 4 168 164 69.9 64 228
## 9 aloes Guess 3 174 171 77.4 58 229
## 10 arose Solution 3 183 180 66.0 49 229
## # … with 8,312 more rows