Before we begin, we load the packages required for data analysis, we set a seed so that the visualizations of our graphs will be consistent, and we establish a palette.
library(tidyverse)
library(sf)
library(spData)
library(rnaturalearth)
library(xml2)
library(igraph)
library(knitr)
library(markovchain)
set.seed(87)
palette <- c("#1b4079","#a71d31","#fbfaef","#ea8c55","#08a4bd")
Bird Buddy is a company that sells fancy bird feeders equipped with cameras. The feeders take photos of birds as they feed and identify the species automatically. Bird Buddy releases anonymized geospatial data monthly that includes the time each photo was taken and the species that was captured on camera. Using this data, we will be determining the best bird-watching paths for each region of the United States. The priorities that will dictate the best paths are:
Since we are working with geospatial data, we first define a function for transforming longitude and latitude coordinates to the matching state in the United States (US) and the District of Columbia (DC). We adapted the function from here.
lon_lat_to_state <- function(pointsDF,
states = spData::us_states,
name_col = "NAME") {
pts <- st_as_sf(pointsDF, coords = 1:2, crs = 4326)
states <- st_transform(states, crs = 3857)
pts <- st_transform(pts, crs = 3857)
state_names <- states[[name_col]]
ii <- as.integer(st_intersects(pts, states))
state_names[ii]
}
us_states_ne <- ne_states(country = "United States of America",
returnclass = "sf")
We aggregate the Bird Buddy data for all months from October 2022, the first month for which Bird Buddy released data, to February 2023. We then map the longitude and latitude coordinates to states. We have removed geospatial data that falls outside the US and DC.
read_vFiles <- function(v){
dfx <- read.csv(file=v, header=TRUE)
}
my_url0 <- "https://raw.githubusercontent.com/geedoubledee/data607_final_project/main/completed.txt"
completed <- readLines(my_url0)
if (length(completed) == 0){
vFiles <- list.files(pattern = "csv$")
bird_buddy_df <- as.data.frame(matrix(nrow = 0, ncol = 5))
cols <- c("lat", "lon", "timestamp", "species")
reorder <- c("lon", "lat", "timestamp", "species")
for (i in 1:length(vFiles)){
dfx <- read_vFiles(vFiles[i])
colnames(dfx) <- cols
dfx <- dfx[reorder]
state <- lon_lat_to_state(dfx,
states = us_states_ne,
name_col = "name")
dfx <- cbind(dfx, state)
bird_buddy_df <- rbind(bird_buddy_df, dfx)
}
write.csv(bird_buddy_df, "bird_buddy_df.csv", row.names=FALSE)
}
Next, we calculate the probability of seeing particular species in each state based on the number of times that species has been seen in that state divided by that state’s total count of bird sightings. We have removed species with probabilities of being seen of less than 0.001 from our analysis.
if (!"bird_buddy_df.csv" %in% completed){
total_by_state <- bird_buddy_df %>%
filter(!is.na(state)) %>%
group_by(state) %>%
summarize(total = n()) %>%
arrange(desc(total))
bird_buddy_df_minimal <- bird_buddy_df %>%
filter(!is.na(state)) %>%
group_by(state, species) %>%
summarize(total = n()) %>%
arrange(state, desc(total))
write.csv(bird_buddy_df_minimal, "bird_buddy_df_minimal.csv",
row.names=FALSE)
bird_buddy_df_analysis <- total_by_state %>%
left_join(bird_buddy_df_minimal, by=join_by(state),
relationship="one-to-many")
cols <- c("state", "state_total", "species", "species_total")
colnames(bird_buddy_df_analysis) <- cols
bird_buddy_df_analysis <- bird_buddy_df_analysis %>%
mutate(p = round(species_total / state_total, 4)) %>%
filter(p >= 0.001) %>%
arrange(state, desc(p))
write.csv(bird_buddy_df_analysis, "bird_buddy_df_analysis.csv",
row.names=FALSE)
}else{
my_url1 <- "https://raw.githubusercontent.com/geedoubledee/data607_final_project/main/bird_buddy_df_minimal.csv"
my_url2 <- "https://raw.githubusercontent.com/geedoubledee/data607_final_project/main/bird_buddy_df_analysis.csv"
bird_buddy_df_minimal <- read.csv(my_url1)
bird_buddy_df_analysis <- read.csv(my_url2)
}
Now that our bird sighting data is ready, we gather additional information regarding the states:
The two-letter state abbreviations were included with the geospatial data parsing packages.
cols <- c("name", "postal")
state_abbrev <- as.data.frame(us_states_ne[, colnames(us_states_ne) %in% cols])
rownames(state_abbrev) <- NULL
remove <- "geometry"
state_abbrev <- state_abbrev[, !colnames(state_abbrev) %in% remove]
cols <- c("state", "abbrev")
colnames(state_abbrev) <- cols
The state birds were listed on Wikipedia.
my_url3 <- "https://en.wikipedia.org/wiki/List_of_U.S._state_birds"
dat <- rvest::read_html(my_url3)
tbls <- rvest::html_nodes(dat, "table")
state_birds <- as.data.frame(rvest::html_table(tbls)[[1]])
state_birds <- state_birds[1:2]
cols <- c("state", "bird")
colnames(state_birds) <- cols
state_birds <- state_birds %>%
filter(state %in% unique(us_states_ne$name))
state_birds[39, 2] <- NA #fix PA
state_birds$bird <- trimws(tolower(str_replace_all(state_birds$bird, "\\(.*", "")))
The Bureau of Economic Analysis divides the US and DC into eight regions that are useful for our analysis.
my_url4 <- "https://raw.githubusercontent.com/geedoubledee/data607_final_project/main/BEA_State_Regions.txt"
state_regions <- as.data.frame(readLines(my_url4))
colnames(state_regions) <- "region_state"
state_regions <- state_regions %>%
separate_wider_delim(region_state, delim = ":", names = c("region", "state"))
state_regions$state <- str_replace_all(state_regions$state, " and", "")
state_regions <- state_regions %>%
separate_longer_delim(state, delim = ",") %>%
mutate(across(where(is.character), str_trim))
reorder <- c("state", "region")
state_regions <- state_regions[, reorder]
We then combine this additional state data into one data frame.
state_df <- state_abbrev %>%
left_join(state_regions, by = join_by(state)) %>%
left_join(state_birds, by = join_by(state)) %>%
arrange(state)
rm(state_abbrev)
rm(state_regions)
rm(state_birds)
To generate undirected graphs for the states in each region, we develop adjacency matrices for each region.
far_west <- state_df %>%
filter(region == "Far West")
great_lakes <- state_df %>%
filter(region == "Great Lakes")
mideast <- state_df %>%
filter(region == "Mideast")
new_england <- state_df %>%
filter(region == "New England")
plains <- state_df %>%
filter(region == "Plains")
rocky_mountain <- state_df %>%
filter(region == "Rocky Mountain")
southeast <- state_df %>%
filter(region == "Southeast")
southwest <- state_df %>%
filter(region == "Southwest")
far_west_adj <- as.matrix(rbind(c(0, 0, 0, 0, 0, 1),
c(0, 0, 1, 1, 1, 0),
c(0, 1, 0, 0, 0, 0),
c(0, 1, 0, 0, 1, 0),
c(0, 1, 0, 1, 0, 1),
c(1, 0, 0, 0, 1, 0)))
colnames(far_west_adj) <- far_west$abbrev
rownames(far_west_adj) <- far_west$abbrev
great_lakes_adj <- as.matrix(rbind(c(0, 1, 0, 0, 1),
c(1, 0, 1, 1, 0),
c(0, 1, 0, 1, 1),
c(0, 1, 1, 0, 0),
c(1, 0, 1, 0, 0)))
colnames(great_lakes_adj) <- great_lakes$abbrev
rownames(great_lakes_adj) <- great_lakes$abbrev
mideast_adj <- as.matrix(rbind(c(0, 0, 1, 1, 0, 1),
c(0, 0, 1, 0, 0, 0),
c(1, 1, 0, 0, 0, 1),
c(1, 0, 0, 0, 1, 1),
c(0, 0, 0, 1, 0, 1),
c(1, 0, 1, 1, 1, 0)))
colnames(mideast_adj) <- mideast$abbrev
rownames(mideast_adj) <- mideast$abbrev
new_england_adj <- as.matrix(rbind(c(0, 0, 1, 0, 1, 0),
c(0, 0, 0, 1, 0, 0),
c(1, 0, 0, 1, 1, 1),
c(0, 1, 1, 0, 0, 1),
c(1, 0, 1, 0, 0, 0),
c(0, 0, 1, 1, 0, 0)))
colnames(new_england_adj) <- new_england$abbrev
rownames(new_england_adj) <- new_england$abbrev
plains_adj <- as.matrix(rbind(c(0, 0, 1, 1, 1, 0, 1),
c(0, 0, 0, 1, 1, 0, 0),
c(1, 0, 0, 0, 0, 1, 1),
c(1, 1, 0, 0, 1, 0, 0),
c(1, 1, 0, 1, 0, 0, 1),
c(0, 0, 1, 0, 0, 0, 1),
c(1, 0, 1, 0, 1, 1, 0)))
colnames(plains_adj) <- plains$abbrev
rownames(plains_adj) <- plains$abbrev
rocky_mountain_adj <- as.matrix(rbind(c(0, 0, 0, 1, 1),
c(0, 0, 1, 1, 1),
c(0, 1, 0, 0, 1),
c(1, 1, 0, 0, 1),
c(1, 1, 1, 1, 0)))
colnames(rocky_mountain_adj) <- rocky_mountain$abbrev
rownames(rocky_mountain_adj) <- rocky_mountain$abbrev
southeast_adj <- as.matrix(rbind(c(0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0),
c(0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0),
c(1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0),
c(1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0),
c(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1),
c(0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0),
c(1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0),
c(0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0),
c(0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0),
c(1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0),
c(0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1),
c(0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0)))
colnames(southeast_adj) <- southeast$abbrev
rownames(southeast_adj) <- southeast$abbrev
southwest_adj <- as.matrix(rbind(c(0, 1, 0, 0),
c(1, 0, 1, 1),
c(0, 1, 0, 1),
c(0, 1, 1, 0)))
colnames(southwest_adj) <- southwest$abbrev
rownames(southwest_adj) <- southwest$abbrev
We then create graphs from the adjacency matrices and plot them.
far_west_g <- graph_from_adjacency_matrix(far_west_adj, mode = "undirected",
weighted = TRUE)
rocky_mountain_g <- graph_from_adjacency_matrix(rocky_mountain_adj,
mode = "undirected",
weighted = TRUE)
plains_g <- graph_from_adjacency_matrix(plains_adj, mode = "undirected",
weighted = TRUE)
great_lakes_g <- graph_from_adjacency_matrix(great_lakes_adj,
mode = "undirected",
weighted = TRUE)
par(mfrow = c(2, 2), mar = c(1.5, 0.5, 1.5, 0.5))
layout <- layout_nicely(far_west_g)
plot(far_west_g, layout = layout, vertex.color = palette[2],
edge.color = palette[2], vertex.label.color = palette[3],
vertex.size = 40,
main = "Far West")
layout <- layout_nicely(rocky_mountain_g)
plot(rocky_mountain_g, layout = layout, vertex.color = palette[4],
edge.color = palette[4], vertex.label.color = palette[3],
vertex.size = 40,
main = "Rocky Mountain")
layout <- layout_nicely(plains_g)
plot(plains_g, layout = layout, vertex.color = palette[4],
edge.color = palette[4], vertex.label.color = palette[3],
vertex.size = 40,
main = "Plains")
layout <- layout_nicely(great_lakes_g)
plot(great_lakes_g, layout = layout, vertex.color = palette[2],
edge.color = palette[2], vertex.label.color = palette[3],
vertex.size = 40,
main = "Great Lakes")
southwest_g <- graph_from_adjacency_matrix(southwest_adj, mode = "undirected",
weighted = TRUE)
southeast_g <- graph_from_adjacency_matrix(southeast_adj, mode = "undirected",
weighted = TRUE)
mideast_g <- graph_from_adjacency_matrix(mideast_adj, mode = "undirected",
weighted = TRUE)
new_england_g <- graph_from_adjacency_matrix(new_england_adj,
mode = "undirected",
weighted = TRUE)
par(mfrow = c(2, 2), mar = c(1.5, 0.5, 1.5, 0.5))
layout <- layout_nicely(southwest_g)
plot(southwest_g, layout = layout, vertex.color = palette[1],
edge.color = palette[1], vertex.label.color = palette[3],
vertex.size = 40,
main = "Southwest")
layout <- layout_nicely(southeast_g)
plot(southeast_g, layout = layout, vertex.color = palette[5],
edge.color = palette[5], vertex.label.color = palette[3],
vertex.size = 40,
main = "Southeast")
layout <- layout_nicely(mideast_g)
plot(mideast_g, layout = layout, vertex.color = palette[5],
edge.color = palette[5], vertex.label.color = palette[3],
vertex.size = 40,
main = "Mideast")
layout <- layout_nicely(new_england_g)
plot(new_england_g, layout = layout, vertex.color = palette[1],
edge.color = palette[1], vertex.label.color = palette[3],
vertex.size = 40,
main = "New England")
Now that every region is a graph of states as vertices and their connections as edges, we can find all the Hamiltonian paths within each graph. Hamiltonian paths visit each vertex of a graph exactly once. To find them, we first find all the simple paths, which are paths that do not include repeating vertices. Then we eliminate any simple path that does not visit all the vertices, leaving us with only Hamiltonian paths.
pathing_df <- bird_buddy_df_analysis
pathing_df$species <- tolower(pathing_df$species)
pathing_df <- pathing_df %>%
left_join(state_df, by = join_by(state)) %>%
mutate(p = ifelse(.$species == .$bird, round(p^(1/2), 4), p)) %>%
arrange(desc(p))
remove <- "bird"
pathing_df <- pathing_df[, !colnames(pathing_df) %in% remove]
regional_df_list <- list(far_west, great_lakes, mideast, new_england, plains, rocky_mountain, southeast, southwest)
regional_graph_list <- list(far_west_g, great_lakes_g, mideast_g, new_england_g, plains_g, rocky_mountain_g, southeast_g, southwest_g)
find_all_hamiltonian_paths <- function(df_list, graph_list){
hp_all_list <- vector(mode = "list", length = length(df_list))
for (i in 1:length(df_list)){
df <- df_list[[i]]
g <- graph_list[[i]]
hp_all <- as.data.frame(matrix(nrow = 0, ncol = nrow(df)))
for (j in 1:nrow(df)){
start = df[j, 2]
hp_regional <- all_simple_paths(g, from = start)
# turn the list of igraph objects into a list of ordinary vectors,
# turn those vectors into matrices, transpose them, and convert them
# to data frames before using list_rbind; that function only works if
# each element of the list is a dataframe or NULL
hp_regional <- list_rbind(map(sapply(hp_regional, as_ids),
function(x) as.data.frame(t(as.matrix(x)))))
# remove paths that don't visit all 12 vertices
hp_regional <- hp_regional[rowSums(is.na(hp_regional)) == 0, ]
if (ncol(hp_regional) < nrow(df)){
# means all vertices cannot be reached from that start without
# visiting at least one vertex twice
next
}else{
hp_all <- rbind(hp_all, hp_regional)
}
}
hp_all_list[i] <- list(hp_all)
}
hp_all_list
}
hp_all_list <- find_all_hamiltonian_paths(regional_df_list, regional_graph_list)
We apply weights to each edge of each graph. The weights applied are the inverse probability of seeing the most likely bird to be seen in that state, excepting any already seen birds from previously visited states in the region. By doing that, paths to new birds with higher chances of being seen are considered shorter. There is a bonus (i.e. reduced weight) for seeing a bird in a state where it is the state bird, calculated by taking the square root of the probability before we take the inverse of it. We avoid duplicate bird sightings along each path by removing already seen species from the possible sightings in subsequent states along each path.
We imagine we are always flying to each region, so that we can enter the graph from anywhere. With that in mind, there is essentially an unseen starting vertex connected to every state in each region. Once you account for this unseen starting vertex in each graph, there is always one fewer edge than there are vertices in it, as is proper.
Summing the determined weights for each path reveals the shortest or geodesic path for reach region.
apply_wts_to_hpaths <- function(hp_all_list){
wts_all_list <- vector(mode = "list", length = length(hp_all_list))
specs_all_list <- wts_all_list
shortest_all_list <- wts_all_list
for (i in 1:length(hp_all_list)){
hp_regional <- hp_all_list[[i]]
wts_df <- as.data.frame(matrix(0, nrow = nrow(hp_regional)),
ncol = ncol(hp_regional))
specs_df <- wts_df
for (j in 1:nrow(hp_regional)){
copy1 <- pathing_df
for (k in 1:ncol(hp_regional)){
st <- hp_regional[j, k]
st_bird <- state_df[state_df$abbrev == st, 4]
copy2 <- copy1
copy2 <- copy2 %>%
filter(abbrev == st)
spec <- copy2[1, 3]
p <- copy2[1, 5]
wts_df[j, k] <- 1 - p
specs_df[j, k] <- spec
copy1 <- copy1 %>%
filter(species != spec)
}
}
wts_all_list[i] <- list(wts_df)
specs_all_list[i] <- list(specs_df)
wts_df <- wts_df %>%
mutate(across(everything(), ~replace_na(.x, 1))) %>%
mutate(total = rowSums(.[1:ncol(wts_df)]))
specs_df <- specs_df %>%
mutate(across(everything(), ~replace_na(.x, "None")))
shortest <- which(wts_df$total == min(wts_df$total), arr.ind = TRUE)
if (length(shortest) > 1){
# means there is a tie
shortest <- shortest[1]
}
shortest_df <- rbind(hp_regional[shortest, ], specs_df[shortest, ],
wts_df[shortest, 1:(ncol(wts_df) - 1)])
shortest_df[3, ] <- 1 - as.numeric(shortest_df[3, ])
shortest_df <- as.data.frame(t(shortest_df))
rownames(shortest_df) <- NULL
cols <- c("abbrev", "sighting", "p")
colnames(shortest_df) <- cols
shortest_df <- shortest_df %>%
left_join(state_df, by = join_by(abbrev)) %>%
mutate(saw_state_bird = ifelse(.$sighting == .$bird, "Yes", "No"))
remove <- "bird"
shortest_df <- shortest_df[, !colnames(shortest_df) %in% remove]
shortest_all_list[i] <- list(shortest_df)
}
wts_specs_shortest_list <- list(wts_all_list, specs_all_list, shortest_all_list)
wts_specs_shortest_list
}
wts_specs_shortest_list <- apply_wts_to_hpaths(hp_all_list)
We have found the best bird-watching paths for all regions:
shortest_unpacked <- wts_specs_shortest_list[[3]]
for (i in 1:length(shortest_unpacked)){
df <- shortest_unpacked[[i]]
shortest_unpacked[[i]] <- cbind(order = rownames(df), df)
}
shortest_unpacked_df <- list_rbind(shortest_unpacked)
kable(shortest_unpacked_df[, -4], format = "simple")
order | abbrev | sighting | state | region | saw_state_bird |
---|---|---|---|---|---|
1 | AK | black-capped chickadee | Alaska | Far West | No |
2 | WA | dark-eyed junco | Washington | Far West | No |
3 | OR | house finch | Oregon | Far West | No |
4 | NV | mourning dove | Nevada | Far West | No |
5 | CA | california quail | California | Far West | Yes |
6 | HI | java sparrow | Hawaii | Far West | No |
1 | OH | northern cardinal | Ohio | Great Lakes | Yes |
2 | IN | house finch | Indiana | Great Lakes | No |
3 | IL | house sparrow | Illinois | Great Lakes | No |
4 | WI | purple finch | Wisconsin | Great Lakes | No |
5 | MI | tufted titmouse | Michigan | Great Lakes | No |
1 | DC | house sparrow | District of Columbia | Mideast | No |
2 | MD | northern cardinal | Maryland | Mideast | No |
3 | DE | house finch | Delaware | Mideast | No |
4 | NJ | purple finch | New Jersey | Mideast | No |
5 | NY | eastern bluebird | New York | Mideast | Yes |
6 | PA | american crow | Pennsylvania | Mideast | NA |
1 | RI | northern cardinal | Rhode Island | New England | No |
2 | CT | tufted titmouse | Connecticut | New England | No |
3 | MA | black-capped chickadee | Massachusetts | New England | Yes |
4 | VT | blue jay | Vermont | New England | No |
5 | NH | purple finch | New Hampshire | New England | Yes |
6 | ME | eastern bluebird | Maine | New England | No |
1 | MO | northern cardinal | Missouri | Plains | No |
2 | KS | house finch | Kansas | Plains | No |
3 | NE | house sparrow | Nebraska | Plains | No |
4 | SD | dark-eyed junco | South Dakota | Plains | No |
5 | ND | purple finch | North Dakota | Plains | No |
6 | MN | black-capped chickadee | Minnesota | Plains | No |
7 | IA | common starling | Iowa | Plains | No |
1 | CO | house finch | Colorado | Rocky Mountain | No |
2 | WY | eurasian collared dove | Wyoming | Rocky Mountain | No |
3 | MT | house sparrow | Montana | Rocky Mountain | No |
4 | ID | dark-eyed junco | Idaho | Rocky Mountain | No |
5 | UT | pine siskin | Utah | Rocky Mountain | No |
1 | KY | northern cardinal | Kentucky | Southeast | Yes |
2 | TN | house finch | Tennessee | Southeast | No |
3 | AR | northern mockingbird | Arkansas | Southeast | Yes |
4 | LA | house sparrow | Louisiana | Southeast | No |
5 | MS | american goldfinch | Mississippi | Southeast | No |
6 | AL | purple finch | Alabama | Southeast | No |
7 | FL | fish crow | Florida | Southeast | No |
8 | GA | tufted titmouse | Georgia | Southeast | No |
9 | SC | carolina wren | South Carolina | Southeast | Yes |
10 | NC | eastern bluebird | North Carolina | Southeast | No |
11 | VA | blue jay | Virginia | Southeast | No |
12 | WV | black-capped chickadee | West Virginia | Southeast | No |
1 | AZ | mourning dove | Arizona | Southwest | No |
2 | NM | house finch | New Mexico | Southwest | No |
3 | OK | northern cardinal | Oklahoma | Southwest | No |
4 | TX | white-winged dove | Texas | Southwest | No |
From our results, we will highlight the Southeast region, whose geodisic path results in seeing the most birds in the states where they are the state birds.
southeast_shortest <- shortest_unpacked[[7]]
southeast_shortest$p <- as.numeric(southeast_shortest$p)
kable(southeast_shortest[, -4], format = "simple")
order | abbrev | sighting | state | region | saw_state_bird |
---|---|---|---|---|---|
1 | KY | northern cardinal | Kentucky | Southeast | Yes |
2 | TN | house finch | Tennessee | Southeast | No |
3 | AR | northern mockingbird | Arkansas | Southeast | Yes |
4 | LA | house sparrow | Louisiana | Southeast | No |
5 | MS | american goldfinch | Mississippi | Southeast | No |
6 | AL | purple finch | Alabama | Southeast | No |
7 | FL | fish crow | Florida | Southeast | No |
8 | GA | tufted titmouse | Georgia | Southeast | No |
9 | SC | carolina wren | South Carolina | Southeast | Yes |
10 | NC | eastern bluebird | North Carolina | Southeast | No |
11 | VA | blue jay | Virginia | Southeast | No |
12 | WV | black-capped chickadee | West Virginia | Southeast | No |
To see how long our pretend bird-watching journey through the Southeast might take, we set up a transition matrix for moving along the Markov chain that represents our path. The last state in the path is the absorbing state, and we start from the unseen vertex outside the region that we mentioned earlier.
P <- matrix(0, nrow = (nrow(southeast_shortest) + 1),
ncol = (nrow(southeast_shortest) + 1))
rownames(P) <- c("XX", southeast_shortest$abbrev)
colnames(P) <- c("XX", southeast_shortest$abbrev)
diag(P) <- c(1 - southeast_shortest$p, 1)
j <- 2
for (i in 1:length(southeast_shortest$p)){
P[i, j] <- southeast_shortest[i, "p"]
j <- j + 1
}
to_LaTeX <- function(A){
rows <- apply(A, MARGIN=1, paste, collapse = " & ")
matrix_string <- paste(rows, collapse = " \\\\ ")
return(paste("\\begin{bmatrix}", matrix_string, "\\end{bmatrix}"))
}
mc_P <- new("markovchain", states = rownames(P), byrow = TRUE,
transitionMatrix = P, name = "Southeast")
mc_P_g <- as(mc_P, "igraph")
plot(simplify(mc_P_g), layout = layout.circle, vertex.color = palette[5],
edge.color = palette[5], vertex.label.color = palette[3],
vertex.size = 30, edge.arrow.size = 0.5,
main = "Southeast")
The transition probabilities in our transition matrix are the edge weights, and we will treat the mean steps required for absorption time as days.
P_rounded <- round(P, 2)
P_print <- to_LaTeX(P_rounded)
P=[0.390.610000000000000.80.20000000000000.880.120000000000000.90.10000000000000.930.070000000000000.950.050000000000000.930.070000000000000.920.080000000000000.850.150000000000000.930.070000000000000.970.030000000000000.950.050000000000001]
steps <- as.data.frame(meanAbsorptionTime(mc_P))
cols <- "steps"
colnames(steps) <- cols
steps <- round(steps, 2)
steps <- as.data.frame(t(steps))
kable(steps, format = "simple")
XX | KY | TN | AR | LA | MS | AL | FL | GA | SC | NC | VA | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
steps | 160.22 | 158.59 | 153.46 | 145.43 | 135.53 | 120.19 | 100.89 | 86.2 | 73.13 | 66.6 | 51.35 | 21.14 |
Starting from outside the region, it would take around 160 days to walk our path from Kentucky to West Virginia and see the birds we want to see.
Having identified the best bird-watching paths for all regions according to our priorities, there is still a lot of room for improvement. We could reconfigure the priorities so that if you have a better chance of seeing a new bird by staying in the state you’re in than by moving to a new state, you don’t move. This reconfiguration would probably require allowing paths that do not visit all states in the region, and/or allowing paths that revisit states.