This R code aims to predict future lottery numbers by analysing the
historical frequencies of winning numbers. The code loads lottery data,
reshapes it for analysis, and calculates the observed frequencies of
each winning number. Rather than adhering to a specific probability
distribution law, the code fits a probability distribution directly
based on the observed frequencies. This distribution is then used to
generate predictions for the next set of winning numbers. The code
offers a practical way to make predictions using the available
historical data. Data: 19/11/1994 to 29/11/2023. See notes after code
chunk for further explanation.
install.packages("tidyverse")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.3'
## (as 'lib' is unspecified)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the data
file_path <- "uk_lottery_numbers_2.csv"
lottery_data <- read.csv(file_path)
# Check the structure of the data
str(lottery_data)
## 'data.frame': 2915 obs. of 10 variables:
## $ DrawDate: chr "29 Nov 2023" "25 Nov 2023" "22 Nov 2023" "18 Nov 2023" ...
## $ X : int 29 25 22 18 15 11 8 4 1 28 ...
## $ X.1 : chr "Nov" "Nov" "Nov" "Nov" ...
## $ X.2 : int 2023 2023 2023 2023 2023 2023 2023 2023 2023 2023 ...
## $ Ball.1 : int 19 5 9 3 6 12 1 20 9 25 ...
## $ Ball.2 : int 31 9 14 11 7 26 7 37 15 28 ...
## $ Ball.3 : int 35 31 26 22 16 28 12 39 36 37 ...
## $ Ball.4 : int 38 34 49 23 33 34 17 52 37 38 ...
## $ Ball.5 : int 42 46 55 32 40 41 46 54 42 42 ...
## $ Ball.6 : int 49 48 56 47 55 42 53 57 43 50 ...
# Convert the DrawDate to a Date type with the new format
lottery_data$DrawDate <- as.Date(lottery_data$DrawDate, format = "%d %b %Y")
# Reshape the data to long format for easier analysis
lottery_data_long <- lottery_data %>%
pivot_longer(cols = starts_with("Ball"), names_to = "Ball_Type", values_to = "Winning_Number")
# Group by date and winning number, then count the frequency
frequency_data <- lottery_data_long %>%
group_by(Winning_Number) %>%
summarise(Frequency = n())
# Display the result
print(frequency_data)
## # A tibble: 59 × 2
## Winning_Number Frequency
## <int> <int>
## 1 1 313
## 2 2 317
## 3 3 336
## 4 4 335
## 5 5 327
## 6 6 331
## 7 7 334
## 8 8 335
## 9 9 344
## 10 10 344
## # ℹ 49 more rows
# Create a bar plot to visualise the frequency of winning numbers with gray fill and blue outline
ggplot(frequency_data, aes(x = as.factor(Winning_Number), y = Frequency)) +
geom_bar(stat = "identity", fill = "gray", color = "blue", linewidth = 0.5) +
labs(title = "Frequency of Winning Numbers",
x = "Winning Number",
y = "Frequency") +
theme_minimal()

# Adjust the range based on the number of balls in the lottery
ball_range <- 1:59
# Function to predict the next n winning combinations using observed frequencies
predict_next_n_winning_combinations <- function(frequency_data, n = 10, ball_range) {
predictions <- vector("list", length = n)
for (i in 1:n) {
repeat {
# Fit a probability distribution based on observed frequencies
distribution <- frequency_data$Frequency / sum(frequency_data$Frequency)
# Sample from the distribution to generate a combination
balls <- sample(ball_range, 6, replace = TRUE, prob = distribution)
# Check for duplicates
if (!any(duplicated(balls))) {
break
}
}
# Store the prediction
predictions[[i]] <- balls
}
return(predictions)
}
# Predict the next 10 winning combinations using observed frequencies
next_10_winning_combinations <- predict_next_n_winning_combinations(frequency_data, n = 10, ball_range)
# Print the predicted winning combinations
cat("Predicted Winning Combinations using Observed Frequencies:\n")
## Predicted Winning Combinations using Observed Frequencies:
for (i in 1:10) {
cat("Set", i, ": Balls -", next_10_winning_combinations[[i]], "\n")
}
## Set 1 : Balls - 40 10 12 16 45 13
## Set 2 : Balls - 46 6 50 38 48 54
## Set 3 : Balls - 42 2 41 38 1 32
## Set 4 : Balls - 12 35 33 42 26 34
## Set 5 : Balls - 45 40 17 29 33 42
## Set 6 : Balls - 38 37 17 44 9 36
## Set 7 : Balls - 35 21 39 11 43 10
## Set 8 : Balls - 36 13 20 16 18 6
## Set 9 : Balls - 8 27 18 15 4 6
## Set 10 : Balls - 41 24 53 11 30 26