From Mark Bradwin comes a fishy puzzle about state names:
Ohio is the only state whose name doesn’t share any letters with the word “mackerel.” It’s strange, but it’s true.
But that isn’t the only pairing of a state and a word you can say that about — it’s not even the only fish! Kentucky has “goldfish” to itself, Montana has “jellyfish” and Delaware has “monkfish,” just to name a few.
What is the longest “mackerel?” That is, what is the longest word that doesn’t share any letters with exactly one state? (If multiple “mackerels” are tied for being the longest, can you find them all?)
Extra credit: Which state has the most “mackerels?” That is, which state has the most words for which it is the only state without any letters in common with those words?
(For both the Riddler and the extra credit, please refer to Friend of the Riddler™ Peter Norvig’s word list.)
Nice. Finally we have something that I won’t feel bad about not being able to solve analytically. The only thing I’ll feel bad about is not writing efficient code.
library(stringr)
library(ggplot2)
library(ggrepel)
library(scales)
# for lapply later
uniqueLetters <- function (wordVector) {
return(letters %in% wordVector)
}
# true if the two words have the same letter
# input logical vector of whether letters appear or not
compareWords <- function (word1, word2) {
return(!(2 %in% (word1 + word2))) # this is the best thing I've ever written
}
# list of words and their vectors
words <- scan("words.txt", what="char", skipNul=TRUE)
wordList <- lapply(str_split(words, ""), uniqueLetters)
names(wordList) <- words
# list of state names and their vectors
states <- tolower(scan("states.txt", what="char", skipNul=TRUE))
statesList <- lapply(str_split(states, ""), uniqueLetters)
names(statesList) <- states
# final list of mackerels
mackerels <- rep(list(NA), 50)
names(mackerels) <- states
for (word in names(wordList)) {
wordName <- word
wordVector <- wordList[word]
# all states that don't have the same letters
compatibleStates <- c()
for(state in names(statesList)) {
stateName <- state
stateVector <- statesList[state]
if(compareWords(unlist(stateVector), unlist(wordVector))) {
compatibleStates <- c(compatibleStates, stateName)
}
}
# only if there's only one state that fits the requirement
if(length(compatibleStates) == 1) {
mackerels[[compatibleStates]] <- c(mackerels[[compatibleStates]], wordName)
}
}
# save the output
yaml::write_yaml(mackerels, "mackerels.yaml")
This took somewhere from 10 to 15 minutes to run. Not too bad, but I’m not going to run it again and just import the list.
mackerels <- yaml::read_yaml("mackerels.yaml")
allMackerels <- unlist(mackerels, use.name = FALSE)
allMackerels <- allMackerels[!is.na(allMackerels)]
allMackerelsLength <- str_length(allMackerels)
allMackerelsMax <- max(allMackerelsLength)
longestMackerels <- allMackerels[allMackerelsLength %in% allMackerelsMax]
print(paste("The longest 'mackerels' are ", paste(longestMackerels, collapse = " and "), ".", sep = ""))
## [1] "The longest 'mackerels' are counterproductivenesses and hydrochlorofluorocarbon."
print(paste(tools::toTitleCase(names(which.max(lengths(mackerels) - 1))), "has the most 'mackerels' with", max(lengths(mackerels)), "mackerels."))
## [1] "Ohio has the most 'mackerels' with 11343 mackerels."
Now that we’ve answered the Riddler, let’s see if we can do something fun with the data. Does the number of unique letters in a state’s name affect the number of “mackerels” that state has?
mackerels <- yaml::read_yaml("mackerels.yaml")
mackerelDF <- data.frame(state = states, nMackerels = 0, nLetters = 0)
for (row in 1:nrow(mackerelDF)) {
state <- mackerelDF[row,"state"]
mackerelDF[row,"nLetters"] <- sum(letters %in% unlist(str_split(state, "")))
mackerelDF[row,"nMackerels"] <- length(mackerels[[state]]) - 1
}
ggplot(mackerelDF) +
geom_point(aes(x = nLetters, y = nMackerels), alpha = 0.3, size = 4) +
geom_smooth(aes(x = nLetters, y = nMackerels), method = "lm", formula= y ~ I(1/x)) +
scale_x_continuous(breaks = seq(0, 11, by = 1)) +
scale_y_continuous(breaks = seq(0, 12000, by = 2000), label = number) +
xlab("Unique Letters in State Name") +
ylab("Number of \"Mackerels\"")
summary(lm(mackerelDF$nMackerels ~ I(1/mackerelDF$nLetters)))
##
## Call:
## lm(formula = mackerelDF$nMackerels ~ I(1/mackerelDF$nLetters))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2823.3 -945.0 -142.7 628.7 6078.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3694.7 796.5 -4.639 2.73e-05 ***
## I(1/mackerelDF$nLetters) 26875.9 4439.7 6.054 2.08e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1679 on 48 degrees of freedom
## Multiple R-squared: 0.4329, Adjusted R-squared: 0.4211
## F-statistic: 36.65 on 1 and 48 DF, p-value: 2.077e-07
Not bad. What about GDP per capita?
mackerelDF <- cbind(mackerelDF, read.csv("gdp.csv"))
mackerelDF <- mackerelDF[,-4]
mackerelDF$gdp <- as.numeric(mackerelDF$gdp)
mackerelDF$gdpPerCapita <- as.numeric(mackerelDF$gdpPerCapita)
ggplot(mackerelDF, aes(x = gdpPerCapita, y = nMackerels + 1, labels = state)) +
geom_point(alpha = 0.5, stroke = 0, size = 5) +
geom_text_repel(aes(label = ifelse(state %in% c("ohio", "alabama", "mississippi", "illinois", "connecticut", "michigan", "maine", "california", "vermont", "florida", "virginia", "alaska", "pennsylvania", "montana", "hawaii", "texas", "georgia"), tools::toTitleCase(as.character(state)), '')), hjust = -0.2, vjust = -0.9) +
geom_text_repel(aes(label = ifelse(state %in% c("newyork"), "New York", '')), hjust = 0.8, vjust = -1.4) +
scale_x_continuous(breaks = seq(0, 100000, by = 10000), labels = dollar) +
scale_y_log10(breaks = c(0, 10, 100, 500, 1000, 2000, 4000, 12000), labels = number) +
xlab("GDP Per Capita") +
ylab("Number of \"Mackerels\"") +
theme(legend.position="none")
No.