This script will retrieve game-level stats from the data
API available from collegebasketballdata.com. It retrieves the last two
complete seasons’ worth of data for MTSU men’s basketball, but it could
be adjusted to get data for other teams and other seasons. The data will
end up in a data frame called all_games_raw.
To use the script, you will need to obtain a free API key by visiting collegebasketballdata.com, clicking on the “Get Your Free API Key” link, typing in a working e-mail address (my Gmail address worked better than my MTSU address), and clicking submit.
Once your receive your API key via the e-mail address you provided, copy and paste the API key into the script in place of the PASTE_YOUR_API_KEY_HERE text. Be sure you don’t accidentally delete the quote marks around the API key.
There are other kinds of data available from the API’s other endpoints. See the data API page for a description of each available endpoint.
I’m not sure what exact kind of data you are looking for, but this script should help you get started.
###############################################
# CollegeBasketballData.com — MTSU games, last + current season
# Auth: Authorization: Bearer <token> (required) [ref: official client]
# Endpoints used: /teams, /games [ref: Swagger UI]
###############################################
###############################################
# Install and load required packages
###############################################
needed_packages <- c("httr", "jsonlite", "dplyr", "stringr", "tibble", "purrr")
for (pkg in needed_packages) {
if (!require(pkg, character.only = TRUE)) {
install.packages(pkg, dependencies = TRUE)
library(pkg, character.only = TRUE)
}
}
###############################################
# Configuration
###############################################
# <<< PASTE YOUR BEARER TOKEN BETWEEN THE QUOTES >>>
api_key <- "PASTE_YOUR_API_KEY_HERE"
base_url <- "https://api.collegebasketballdata.com"
# Seasons use the season-end year convention
last_season <- 2024 # 2023–24 season
current_season <- 2025 # 2024–25 season
###############################################
# Low-level GET helper (Bearer token auth)
# Doc note: API expects Bearer token auth. [ref: official client]
###############################################
cbdd_get <- function(path, query = list()) {
url <- httr::modify_url(base_url, path = path, query = query)
res <- httr::GET(url, httr::add_headers(Authorization = paste("Bearer", api_key)))
status <- httr::status_code(res)
if (status == 401)
stop(
"401 Unauthorized. Confirm 'Authorization: Bearer <token>' and that your token is active."
)
if (status == 403)
stop("403 Forbidden. Your key may lack access to this endpoint.")
if (status >= 400) {
msg <- tryCatch(
httr::content(res, "text", encoding = "UTF-8"),
error = function(e)
""
)
stop(sprintf("Request failed (%s). Body: %s", status, msg))
}
txt <- httr::content(res, "text", encoding = "UTF-8")
if (!nzchar(txt))
return(tibble::tibble())
jsonlite::fromJSON(txt, flatten = TRUE)
}
###############################################
# Utility: serialize list-columns for CSV writing
###############################################
serialize_list_cols <- function(df) {
if (!is.data.frame(df))
return(df)
df %>%
dplyr::mutate(dplyr::across(where(is.list), ~ {
purrr::map_chr(.x, ~ if (is.null(.x) ||
(is.list(.x) && length(.x) == 0)) {
NA_character_
} else {
jsonlite::toJSON(.x, auto_unbox = TRUE, null = "null")
})
}))
}
###############################################
# 1) Resolve Middle Tennessee from /teams (defensive)
# Endpoint: /teams (Swagger docs)
###############################################
teams <- cbdd_get("/teams") # [2](https://github.com/christian7974/College-Basketball-API)
teams_tbl <- tibble::as_tibble(teams)
names(teams_tbl) <- tolower(names(teams_tbl))
candidate_fields <- c(
"name",
"school",
"team",
"displayname",
"shortname",
"mascot",
"nickname",
"full_name"
)
existing_fields <- intersect(candidate_fields, names(teams_tbl))
if (length(existing_fields) == 0) {
message("Available columns in /teams:")
print(names(teams_tbl))
stop("No expected name-like columns present; cannot resolve team.")
}
patterns <- c("middle tenn", "middle\\s+tennessee", "blue raiders")
mtsu_row <- teams_tbl %>%
dplyr::filter(dplyr::if_any(
dplyr::all_of(existing_fields),
~ stringr::str_detect(tolower(as.character(.x)), paste(patterns, collapse = "|"))
)) %>%
dplyr::slice(1)
if (nrow(mtsu_row) == 0) {
message("Could not auto-match Middle Tennessee. Sample name fields:")
print(utils::head(teams_tbl[, existing_fields, drop = FALSE], 10))
stop("Please set `team_filter_value` manually to the exact team string the API expects.")
}
# Choose a usable value to pass as the 'team' query
priority <- existing_fields[match(candidate_fields, existing_fields, nomatch = 0)]
team_filter_value <- NA_character_
for (fld in priority) {
val <- mtsu_row[[fld]][1]
if (!is.null(val) &&
nzchar(as.character(val))) {
team_filter_value <- as.character(val)
break
}
}
if (!nzchar(team_filter_value))
stop("Matched team row, but no usable value extracted.")
message(sprintf("Resolved team value for /games query as: %s", team_filter_value))
###############################################
# 2) Pull /games for last season and current season
# Endpoint: /games (Swagger docs)
###############################################
get_team_games <- function(team_name, season_year) {
cbdd_get("/games", query = list(team = team_name, season = season_year))
}
games_last <- get_team_games(team_filter_value, last_season) # [2](https://github.com/christian7974/College-Basketball-API)
games_current <- get_team_games(team_filter_value, current_season) # [2](https://github.com/christian7974/College-Basketball-API)
###############################################
# 3) Combine, make CSV-safe, preview, and save
###############################################
all_games_raw <- dplyr::bind_rows(games_last, games_current)
# Save raw JSON (full fidelity)
jsonlite::write_json(
all_games_raw,
"mtsu_games_2024_2025_raw.json",
pretty = FALSE,
auto_unbox = TRUE
)
# CSV-safe version (list-cols serialized to JSON strings)
all_games_csv <- serialize_list_cols(all_games_raw)
# Preview a few common columns if present (case-insensitive)
preview_candidates <- c(
"gameid",
"game_id",
"startdate",
"start_date",
"hometeam",
"home_team",
"homescore",
"home_score",
"awayteam",
"away_team",
"awayscore",
"away_score",
"status",
"season"
)
sel <- names(all_games_csv)[tolower(names(all_games_csv)) %in% preview_candidates]
cat("\nPreview of MTSU games (last + current season):\n")
print(utils::head(all_games_csv[, if (length(sel))
sel
else
names(all_games_csv), drop = FALSE], 20))
# Write CSV
outfile <- "mtsu_games_2024_2025.csv"
utils::write.csv(all_games_csv, outfile, row.names = FALSE)
cat(
sprintf(
"\nSaved %d rows to '%s'\nAlso saved raw JSON to 'mtsu_games_2024_2025_raw.json'\n",
nrow(all_games_csv),
outfile
)
)