Loading Libraries
library(car)
library(mosaic)
library(pander)
library(tidyverse)
library(readxl)
library(janitor)
library(lubridate)
Filtering the Data by narrowing the date range to August onwards and
removing a couple outliers
wordle_data <- read_csv("Problem_C_Data_Wordle.csv")
wordle_data <- clean_names(wordle_data)
wordle_data <- wordle_data[-c(154:359),]
wordle_data <- wordle_data[-c(7,32,109),]
wordle_data$date <- mdy(wordle_data$date)
Linear Regression Model
word.lm <- lm(number_of_reported_results ~ date, data = wordle_data)
summary(word.lm) %>% pander()
| (Intercept) |
2004661 |
61357 |
32.67 |
1.488e-69 |
| date |
-102.5 |
3.182 |
-32.2 |
9.936e-69 |
Fitting linear model: number_of_reported_results ~
date
| 150 |
1715 |
0.8751 |
0.8742 |
Assessing Linear Regression Validity
par(mfrow=c(1,3))
plot(word.lm, which=1:2)
plot(word.lm$residuals, ylab="Residuals")
mtext("Residuals vs Order", side=3)

Prediction Interval
predict(word.lm, newdata = data.frame(date = as.Date("2023-03-01")), interval = 'prediction') %>% pander()
Linear Regression Graph
ggplot(wordle_data, aes(x=date, y=number_of_reported_results))+
geom_point()+
geom_smooth(method = "lm", formula = y~x, se=FALSE)+
theme_bw()
