# load libraries
library(knitr)
library(rmarkdown)
# read in the data
df <- read.csv("S:/ursa2/campbell/other2/wordle/wordle_20231104_summary.csv")
# print paged table
paged_table(df)CamWordleWorld
What in god’s sweet name have I done?
I’ve clearly got too much time on my hands, so I thought I’d do a little data exploration on our ongoing Wordle competition. I exported our CamWordleWorld chat to a text file, and then did some data cleanup to get a table of our daily scores. Here’s that table:
NA values represent puzzles that one (or more) of us didn’t report a score.
Individual bar graphs
If I remember correctly, at least some of us lost our overall score tallies at some point. But, the good news is, due to our diligent WhatsApp reporting of scores, I have all of the reported scores in this database. So, here are our individual bar graphs – sort of the equivalent of what you get after each game. Note that I treated “X”s as 7’s, and calculated averages (vertical dotted lines) accordingly:
# mickey's bar chart
par(mar = c(5,5,1,1), las = 1)
h <- hist(df$mickey, breaks = seq(0,7,1), xlim = c(0,7), xaxt = "n", xlab = "score",
main = "Mickey")
lab.ys <- ifelse(h$counts > 50, h$counts - 10, h$counts + 10)
text(x = seq(0.5,6.5), y = lab.ys, labels = h$counts)
axis(1, at = seq(0.5, 6.5), labels = seq(1,7))
abline(v = mean(df$mickey, na.rm = T), lty = 2, lwd = 3, col = "red")
legend("topright", legend = paste0("Mean = ", round(mean(df$mickey, na.rm = T), 2)),
lty = 2, lwd = 3, col = "red")# jon's bar chart
par(mar = c(5,5,1,1), las = 1)
h <- hist(df$jon, breaks = seq(0,7,1), xlim = c(0,7), xaxt = "n", xlab = "score",
main = "Jon")
lab.ys <- ifelse(h$counts > 50, h$counts - 10, h$counts + 10)
text(x = seq(0.5,6.5), y = lab.ys, labels = h$counts)
axis(1, at = seq(0.5, 6.5), labels = seq(1,7))
abline(v = mean(df$jon, na.rm = T), lty = 2, lwd = 3, col = "red")
legend("topright", legend = paste0("Mean = ", round(mean(df$jon, na.rm = T), 2)),
lty = 2, lwd = 3, col = "red")# dad's bar chart
par(mar = c(5,5,1,1), las = 1)
h <- hist(df$dad, breaks = seq(0,7,1), xlim = c(0,7), xaxt = "n", xlab = "score",
main = "Dad")
lab.ys <- ifelse(h$counts > 50, h$counts - 10, h$counts + 10)
text(x = seq(0.5,6.5), y = lab.ys, labels = h$counts)
axis(1, at = seq(0.5, 6.5), labels = seq(1,7))
abline(v = mean(df$dad, na.rm = T), lty = 2, lwd = 3, col = "red")
legend("topright", legend = paste0("Mean = ", round(mean(df$dad, na.rm = T), 2)),
lty = 2, lwd = 3, col = "red")So, Jon has the lowest average!
Individual game ranks
We can also explore how we rank at the individual game level (i.e., coming in 1st, 2nd, or 3rd on a per-game basis):
# get game-level ranks
df$mickey.rank <- apply(df[,2:4], 1, function(x) ifelse(is.na(x[1]), NA, floor(rank(x)[1])))
df$jon.rank <- apply(df[,2:4], 1, function(x) ifelse(is.na(x[2]), NA, floor(rank(x)[2])))
df$dad.rank <- apply(df[,2:4], 1, function(x) ifelse(is.na(x[3]), NA, floor(rank(x)[3])))
# mickey's bar chart
par(mar = c(5,5,1,1), las = 1)
h <- hist(df$mickey.rank, breaks = seq(0,3,1), xlim = c(0,3), xaxt = "n", xlab = "rank",
main = "Mickey")
lab.ys <- ifelse(h$counts > 50, h$counts - 10, h$counts + 10)
text(x = seq(0.5,2.5), y = lab.ys, labels = h$counts)
axis(1, at = seq(0.5, 2.5), labels = seq(1,3))
abline(v = mean(df$mickey.rank, na.rm = T), lty = 2, lwd = 3, col = "red")
legend("topright", legend = paste0("Mean = ", round(mean(df$mickey.rank, na.rm = T), 2)),
lty = 2, lwd = 3, col = "red")# jon's bar chart
par(mar = c(5,5,1,1), las = 1)
h <- hist(df$jon.rank, breaks = seq(0,3,1), xlim = c(0,3), xaxt = "n", xlab = "rank",
main = "Jon")
lab.ys <- ifelse(h$counts > 50, h$counts - 10, h$counts + 10)
text(x = seq(0.5,2.5), y = lab.ys, labels = h$counts)
axis(1, at = seq(0.5, 2.5), labels = seq(1,3))
abline(v = mean(df$jon.rank, na.rm = T), lty = 2, lwd = 3, col = "red")
legend("topright", legend = paste0("Mean = ", round(mean(df$jon.rank, na.rm = T), 2)),
lty = 2, lwd = 3, col = "red")# dad's bar chart
par(mar = c(5,5,1,1), las = 1)
h <- hist(df$dad.rank, breaks = seq(0,3,1), xlim = c(0,3), xaxt = "n", xlab = "rank",
main = "Dad")
lab.ys <- ifelse(h$counts > 50, h$counts - 10, h$counts + 10)
text(x = seq(0.5,2.5), y = lab.ys, labels = h$counts)
axis(1, at = seq(0.5, 2.5), labels = seq(1,3))
abline(v = mean(df$dad.rank, na.rm = T), lty = 2, lwd = 3, col = "red")
legend("topright", legend = paste0("Mean = ", round(mean(df$dad.rank, na.rm = T), 2)),
lty = 2, lwd = 3, col = "red")Based on these results, it looks like Mickey wins the most at the individual game level! :)
Scores over time
It feels like to me we individually have hot and cold streaks. I wanted to see if that was noticeable in the data. So, I’ll plot out lines representing average scores over time, on a “moving window” period of 7 days:
library(zoo)
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
# get weekly rolling means
df$mickey.mean.week <- rollmean(df$mickey, 7, align = "center", na.pad = T, na.rm = T)
df$jon.mean.week <- rollmean(df$jon, 7, align = "center", na.pad = T, na.rm = T)
df$dad.mean.week <- rollmean(df$dad, 7, align = "center", na.pad = T, na.rm = T)
# plot the data out
yrange <- range(df[,c("mickey.mean.week", "jon.mean.week", "dad.mean.week")],
na.rm = T)
par(mar = c(5,5,1,1), las = 1)
plot(mickey.mean.week ~ wordle, data = df, type = "l", lwd = 2, col = 2,
xlab = "Wordle", ylab = "7-day Mean Score", ylim = yrange)
lines(jon.mean.week ~ wordle, data = df, lwd = 2, col = 3)
lines(dad.mean.week ~ wordle, data = df, lwd = 2, col = 4)
legend("topright", legend = c("Mickey", "Jon", "Dad"), lwd = 2, col = c(2,3,4))…A little too noisy to see any major trends. Let’s try a larger temporal window (31 days) to get at monthly trends:
# get monthly rolling means
df$mickey.mean.month <- rollmean(df$mickey, 31, align = "center", na.pad = T, na.rm = T)
df$jon.mean.month <- rollmean(df$jon, 31, align = "center", na.pad = T, na.rm = T)
df$dad.mean.month <- rollmean(df$dad, 31, align = "center", na.pad = T, na.rm = T)
# plot the data out
yrange <- range(df[,c("mickey.mean.month", "jon.mean.month", "dad.mean.month")],
na.rm = T)
par(mar = c(5,5,1,1), las = 1)
plot(mickey.mean.month ~ wordle, data = df, type = "l", lwd = 2, col = 2,
xlab = "Wordle", ylab = "31-day Mean Score", ylim = yrange)
lines(jon.mean.month ~ wordle, data = df, lwd = 2, col = 3)
lines(dad.mean.month ~ wordle, data = df, lwd = 2, col = 4)
legend("topright", legend = c("Mickey", "Jon", "Dad"), lwd = 2, col = c(2,3,4))Pretty cool! Looks like we have all gone on hot and cold streaks over time.