FITfile_extraction_upload

This script allows extracting geodata and relevant meta-data from FIT files obtained through a Strava bulk download.

Libraries

library(pacman)
p_load(FITfileR)
p_load(tmap)
p_load(tmaptools)
p_load(dplyr)
p_load(tibble)
p_load(data.table)
p_load(purrr)
p_load(sf)

Function: Process a single FIT file

This function extracts variables from the messages “records” and “session” of a fit file and binds them together into one df.

Crucial variables are necessary, if they are not present in the “records” message, the file will be omitted. Optional variables are included if they are present. If they are not present, the file will still be incorporated.

Crucial and optional variables are defined in the expressions: crucial_vars <- c(“timestamp”, “position_lat”, “position_long”) optional_vars <- c(“enhanced_altitude”, “gps_accuracy”, “altitude”, “grade”, “distance”, “cadence”, “speed”, “enhanced_speed”, “ascent”, “descent”)

In my case, some files couldn’t be read by FITfileR. These will be omitted by the function.

process_fit_file <- function(file_path) {
   fit <- tryCatch({
    readFitFile(file_path)
  }, error = function(e) {
    warning(paste("Error reading file:", basename(file_path), ":", e$message))
    return(NULL)  # Return NULL if file cannot be read
  })
  
  # If the file couldn't be read, skip further processing
  if (is.null(fit)) {
    return(NULL)  # Skip this file
  }

  # Extract geolocation data safely
  record_data <- records(fit)
 
  # Define crucial and optional variables 
  crucial_vars <- c("timestamp", "position_lat", "position_long")
  optional_vars <- c("enhanced_altitude", "gps_accuracy", "altitude", "grade", "distance", "cadence", "speed", "enhanced_speed", "ascent", "descent")
  
  # Check if record_data is a data frame or list
  # df
  if (is.data.frame(record_data)) {
    # If it's a data frame, check if all crucial columns are present
    missing_vars <- setdiff(crucial_vars, names(record_data))
    
    if (length(missing_vars) == 0) {
      # All crucial variables are present, compile geo_data
      geo_data <- record_data %>%
        select(any_of(c(crucial_vars, optional_vars))) %>%
        mutate(sequence_number = row_number())
    } else {
      # Some crucial variables are missing, return the list of missing variables
      warning(paste("Skipping file (missing crucial variables in record_data df:", paste(missing_vars, collapse = ", "), "):", basename(file_path)))
      return(NULL)  # Skip this file
    }
  } 
  
  # list
  else if (is.list(record_data)) {
    # If it's a list, flatten the list and check for crucial variables
    geo_data <- tryCatch({
      flattened_data <- record_data %>%
        data.table::rbindlist(fill = TRUE) %>%  # Flatten list and fill missing values
  tibble::as_tibble()
      
      # Check if all crucial variables are present
      missing_vars <- setdiff(crucial_vars, names(flattened_data))
      
      if (length(missing_vars) == 0) {
        # All crucial variables are present, compile geo_data
        flattened_data %>%
          select(any_of(c(crucial_vars, optional_vars))) %>%
          mutate(sequence_number = row_number())
      } else {
        # Some crucial variables are missing, return the list of missing variables
        warning(paste("Skipping file (missing crucial variables in record_data list:", paste(missing_vars, collapse = ", "), "):", basename(file_path)))
        return(NULL)  # Skip this file
      }
    }, error = function(e) {
      warning(paste("Error processing records in:", basename(file_path)))
      return(NULL)  # Skip this file
    })
  } else {
    warning(paste("Skipping file (invalid record data):", basename(file_path)))
    return(NULL)
  }
  
  # Extract session data
  session <- getMessagesByType(fit, message_type = "session") %>%
    as.data.frame() %>%
    select(timestamp, sport, sub_sport) %>%
    rename(session_id = timestamp)
  
  # Ensure only one session row exists
  if (nrow(session) == 0) {
    warning(paste("Skipping file (no session data):", basename(file_path)))
    return(NULL)
  }
  session <- session[1, ]  # Take the first row
  
  # Merge session data with geo_data
  geo_data <- geo_data %>%
    mutate(session_id = session$session_id,
           sport = session$sport,
           sub_sport = session$sub_sport,
           file_name = basename(file_path))  # Track source file
  
  return(geo_data)
}

Single file extraction

fit_processed <- process_fit_file("Data/10011315142.fit")

Function: Process a folder with FIT files

This function takes a folder with FIT files as an input, extracts crucial and optional variables from the FIT files defined above, and binds them together into one df.

process_fit_folder <- function(folder_path) {
  fit_files <- list.files(folder_path, pattern = "\\.fit$", full.names = TRUE)
  
results <- lapply(fit_files, function(file) {
    message(paste("Processing:", basename(file)))  # Print progress
    process_fit_file(file)
  })
  
  # Remove NULL results (skipped files)
  results <- Filter(Negate(is.null), results)
  
  if (length(results) == 0) {
    warning("No valid FIT files processed.")
    return(NULL)
  }
  
  # Combine all data frames into one
  combined_data <- dplyr::bind_rows(results)
  
  return(combined_data)
}

Apply to folder

fit_data <- process_fit_folder("Data/")