Travelling Salesman Problem: Visit Each City Once In The Shortest
Route and Finish Where You Started.
pacman::p_load(pacman,geosphere,GA)
# Read the CSV file as a single column
cities_raw <- read.csv("UK_Cities.csv", header = TRUE, sep = ",")
# Split the single column into multiple columns
cities <- data.frame(do.call('rbind', strsplit(as.character(cities_raw$City.Latitude.Longitude.Population), ',', fixed = TRUE)))
# Rename the columns
colnames(cities) <- c("City", "Latitude", "Longitude", "Population")
# Convert Latitude, Longitude, and Population to numeric
cities$Latitude <- as.numeric(as.character(cities$Latitude))
cities$Longitude <- as.numeric(as.character(cities$Longitude))
cities$Population <- as.numeric(as.character(cities$Population))
# Verify the column names
print(names(cities))
## [1] "City" "Latitude" "Longitude" "Population"
# Calculate the distance matrix using longitude and latitude columns
distance_matrix <- distm(cities[, c("Longitude", "Latitude")])
# Nearest Neighbour Heuristic
nearest_neighbour_tsp <- function(distance_matrix) {
visited <- rep(FALSE, nrow(cities))
route <- integer(nrow(cities))
current_city <- 1
route[1] <- current_city
visited[current_city] <- TRUE
for (i in 2:nrow(cities)) {
distances <- distance_matrix[current_city, ]
distances[visited] <- Inf
next_city <- which.min(distances)
route[i] <- next_city
visited[next_city] <- TRUE
current_city <- next_city
}
# Return to start
route <- c(route, route[1])
total_distance <- sum(sapply(1:(length(route) - 1), function(i) {
distance_matrix[route[i], route[i + 1]]
}))
return(list(route = route, total_distance = total_distance))
}
# Genetic Algorithm
tsp_fitness <- function(route) {
total_distance <- sum(sapply(1:(length(route) - 1), function(i) {
distance_matrix[route[i], route[i + 1]]
})) + distance_matrix[route[length(route)], route[1]] # return to start
return(-total_distance)
}
genetic_algorithm_tsp <- function(distance_matrix) {
ga_tsp <- ga(
type = "permutation",
fitness = tsp_fitness,
lower = 1,
upper = nrow(cities),
popSize = 50,
maxiter = 500,
run = 100
)
best_route <- ga_tsp@solution[1, ]
best_route <- c(best_route, best_route[1])
best_distance <- -ga_tsp@fitnessValue
return(list(route = best_route, total_distance = best_distance))
}
# Run Nearest Neighbour Heuristic
nn_result <- nearest_neighbour_tsp(distance_matrix)
nn_route <- nn_result$route
nn_distance <- nn_result$total_distance
# Run Genetic Algorithm
ga_result <- genetic_algorithm_tsp(distance_matrix)
ga_route <- ga_result$route
ga_distance <- ga_result$total_distance
# Convert distances to kilometres and miles
nn_distance_km <- nn_distance / 1000
nn_distance_miles <- nn_distance / 1609.34
ga_distance_km <- ga_distance / 1000
ga_distance_miles <- ga_distance / 1609.34
# Print results
print(paste("Nearest Neighbour Route:", paste(cities$City[nn_route], collapse = " -> ")))
## [1] "Nearest Neighbour Route: London -> Woking -> Reading -> Oxford -> Swindon -> Bristol -> Cardiff -> Exeter -> Plymouth -> Southampton -> Portsmouth -> Brighton -> Crawley -> Southend-on-Sea -> Ipswich -> Norwich -> Cambridge -> Peterborough -> Northampton -> Milton Keynes -> Luton -> Coventry -> Birmingham -> Wolverhampton -> Stoke-on-Trent -> Stockport -> Manchester -> Oldham -> Huddersfield -> Bradford -> Leeds -> Barnsley -> Sheffield -> Derby -> Nottingham -> Leicester -> Warrington -> St Helens -> Liverpool -> Southport -> Blackpool -> York -> Hull -> Middlesbrough -> Sunderland -> Newcastle upon Tyne -> Edinburgh -> Glasgow -> Aberdeen -> London"
print(paste("Nearest Neighbour Total Distance (meters):", nn_distance))
## [1] "Nearest Neighbour Total Distance (meters): 3246987.78406202"
print(paste("Nearest Neighbour Total Distance (km):", nn_distance_km))
## [1] "Nearest Neighbour Total Distance (km): 3246.98778406202"
print(paste("Nearest Neighbour Total Distance (miles):", nn_distance_miles))
## [1] "Nearest Neighbour Total Distance (miles): 2017.58968525111"
print(paste("Genetic Algorithm Route:", paste(cities$City[ga_route], collapse = " -> ")))
## [1] "Genetic Algorithm Route: Reading -> Southampton -> London -> Nottingham -> Leicester -> Bradford -> Oldham -> Stoke-on-Trent -> Southport -> St Helens -> Stockport -> Peterborough -> Bristol -> Cardiff -> Portsmouth -> Brighton -> Woking -> Exeter -> Plymouth -> Oxford -> Luton -> Milton Keynes -> Swindon -> Wolverhampton -> Birmingham -> Northampton -> Coventry -> Middlesbrough -> Sunderland -> Glasgow -> Edinburgh -> Newcastle upon Tyne -> Blackpool -> Warrington -> Manchester -> Derby -> Cambridge -> Barnsley -> York -> Hull -> Sheffield -> Leeds -> Huddersfield -> Aberdeen -> Liverpool -> Norwich -> Ipswich -> Southend-on-Sea -> Crawley -> Reading"
print(paste("Genetic Algorithm Total Distance (meters):", ga_distance))
## [1] "Genetic Algorithm Total Distance (meters): 5434072.61363995"
print(paste("Genetic Algorithm Total Distance (km):", ga_distance_km))
## [1] "Genetic Algorithm Total Distance (km): 5434.07261363995"
print(paste("Genetic Algorithm Total Distance (miles):", ga_distance_miles))
## [1] "Genetic Algorithm Total Distance (miles): 3376.58457109122"
Code Explaination.
knitr::opts_chunk$set(echo = TRUE): This line is a configuration
setting for the knitr package, commonly used in R Markdown documents. It
instructs knitr to display the R code chunks within your document when
it is rendered. This is useful for making your analysis transparent and
reproducible.
pacman::p_load(…): The p_load function from the pacman package
simplifies package management. It installs and loads the necessary
packages (geosphere for distance calculations and GA for the genetic
algorithm) in one step.
read.csv(…): This reads the UK_Cities.csv file into a data frame
called cities_raw. It assumes that the file has a header row and that
the values are separated by commas.
Data Splitting and Conversion: The subsequent lines handle the
specific format of the CSV file, where all city details are in a single
column. The code splits this column into separate columns for “City,”
“Latitude,” “Longitude,” and “Population.” It also converts the numeric
columns (Latitude, Longitude, Population) to the appropriate data
type.
distm(…): This function from the geosphere package calculates a
distance matrix. Each entry in the matrix represents the geographical
distance between two cities. This matrix is essential for the TSP
algorithms.
Nearest Neighbour Heuristic.
This function implements the Nearest Neighbour heuristic for the
TSP:
It starts from the first city (current_city = 1).
It repeatedly finds the nearest unvisited city to the current city
and adds it to the route.
Genetic Algorithm.
tsp_fitness(route): This function evaluates the “fitness” of a given
route (permutation of cities). The fitness is the negative of the total
distance of the route (because the GA package aims to maximise the
fitness function).
genetic_algorithm_tsp(distance_matrix): This function applies a
genetic algorithm to find a good solution for the TSP:
It initialises a population of random routes.
It repeatedly evolves the population by selecting fitter routes,
applying crossover and mutation operators, and creating a new
generation.
This process continues until a termination condition is met (e.g., a
maximum number of iterations).
It returns the best route found.
It marks the visited city.
It continues until all cities are visited and returns to the
starting city.
The final part of the code runs the Nearest Neighbour and Genetic
Algorithm functions, converts the distances to kilometres and miles, and
prints the results (the routes and their total distances) for both
algorithms.