library(pacman)
p_load(FITfileR)
p_load(tmap)
p_load(tmaptools)
p_load(dplyr)
p_load(tibble)
p_load(data.table)
p_load(purrr)
p_load(sf)FITfile_extraction_upload
This script allows extracting geodata and relevant meta-data from FIT files obtained through a Strava bulk download.
Libraries
Function: Process a single FIT file
This function extracts variables from the messages “records” and “session” of a fit file and binds them together into one df.
Crucial variables are necessary, if they are not present in the “records” message, the file will be omitted. Optional variables are included if they are present. If they are not present, the file will still be incorporated.
Crucial and optional variables are defined in the expressions: crucial_vars <- c(“timestamp”, “position_lat”, “position_long”) optional_vars <- c(“enhanced_altitude”, “gps_accuracy”, “altitude”, “grade”, “distance”, “cadence”, “speed”, “enhanced_speed”, “ascent”, “descent”)
In my case, some files couldn’t be read by FITfileR. These will be omitted by the function.
process_fit_file <- function(file_path) {
fit <- tryCatch({
readFitFile(file_path)
}, error = function(e) {
warning(paste("Error reading file:", basename(file_path), ":", e$message))
return(NULL) # Return NULL if file cannot be read
})
# If the file couldn't be read, skip further processing
if (is.null(fit)) {
return(NULL) # Skip this file
}
# Extract geolocation data safely
record_data <- records(fit)
# Define crucial and optional variables
crucial_vars <- c("timestamp", "position_lat", "position_long")
optional_vars <- c("enhanced_altitude", "gps_accuracy", "altitude", "grade", "distance", "cadence", "speed", "enhanced_speed", "ascent", "descent")
# Check if record_data is a data frame or list
# df
if (is.data.frame(record_data)) {
# If it's a data frame, check if all crucial columns are present
missing_vars <- setdiff(crucial_vars, names(record_data))
if (length(missing_vars) == 0) {
# All crucial variables are present, compile geo_data
geo_data <- record_data %>%
select(any_of(c(crucial_vars, optional_vars))) %>%
mutate(sequence_number = row_number())
} else {
# Some crucial variables are missing, return the list of missing variables
warning(paste("Skipping file (missing crucial variables in record_data df:", paste(missing_vars, collapse = ", "), "):", basename(file_path)))
return(NULL) # Skip this file
}
}
# list
else if (is.list(record_data)) {
# If it's a list, flatten the list and check for crucial variables
geo_data <- tryCatch({
flattened_data <- record_data %>%
data.table::rbindlist(fill = TRUE) %>% # Flatten list and fill missing values
tibble::as_tibble()
# Check if all crucial variables are present
missing_vars <- setdiff(crucial_vars, names(flattened_data))
if (length(missing_vars) == 0) {
# All crucial variables are present, compile geo_data
flattened_data %>%
select(any_of(c(crucial_vars, optional_vars))) %>%
mutate(sequence_number = row_number())
} else {
# Some crucial variables are missing, return the list of missing variables
warning(paste("Skipping file (missing crucial variables in record_data list:", paste(missing_vars, collapse = ", "), "):", basename(file_path)))
return(NULL) # Skip this file
}
}, error = function(e) {
warning(paste("Error processing records in:", basename(file_path)))
return(NULL) # Skip this file
})
} else {
warning(paste("Skipping file (invalid record data):", basename(file_path)))
return(NULL)
}
# Extract session data
session <- getMessagesByType(fit, message_type = "session") %>%
as.data.frame() %>%
select(timestamp, sport, sub_sport) %>%
rename(session_id = timestamp)
# Ensure only one session row exists
if (nrow(session) == 0) {
warning(paste("Skipping file (no session data):", basename(file_path)))
return(NULL)
}
session <- session[1, ] # Take the first row
# Merge session data with geo_data
geo_data <- geo_data %>%
mutate(session_id = session$session_id,
sport = session$sport,
sub_sport = session$sub_sport,
file_name = basename(file_path)) # Track source file
return(geo_data)
}Single file extraction
fit_processed <- process_fit_file("Data/10011315142.fit")Function: Process a folder with FIT files
This function takes a folder with FIT files as an input, extracts crucial and optional variables from the FIT files defined above, and binds them together into one df.
process_fit_folder <- function(folder_path) {
fit_files <- list.files(folder_path, pattern = "\\.fit$", full.names = TRUE)
results <- lapply(fit_files, function(file) {
message(paste("Processing:", basename(file))) # Print progress
process_fit_file(file)
})
# Remove NULL results (skipped files)
results <- Filter(Negate(is.null), results)
if (length(results) == 0) {
warning("No valid FIT files processed.")
return(NULL)
}
# Combine all data frames into one
combined_data <- dplyr::bind_rows(results)
return(combined_data)
}Apply to folder
fit_data <- process_fit_folder("Data/")