A Different Perspective on the Website’s Race Results

This mostly compares the Time (gun time) and Net Time (chip time) positions.

Environment prep

The packages tidyverse and plotly are loaded, the former supplying a vast collection of useful tools for processing data, and the latter providing some plot dynamics for revealing additional data and user interaction.

library(tidyverse)
library(plotly)

Tidy and Wrangle or Wrangle and Tidy

Data were downloaded manually from https://www.racetecresults.com/results.aspx?CId=16247&RId=10055. As the first step after download, the Unix utility iconv was used in a MINGW64 terminal to convert downloaded UTF-16LE to UTF-8, which is the standard encoding. Data encoded as UTF-16LE often makes processing fail in R.

    iconv -f utf-16le -t utf-8 Export.csv > Exportutf8.csv

Expand data with derived columns for analysis and presenation.

tenmiler <- readr::read_csv("Exportutf8.csv",        # after converting from utf-16le
                     show_col_types = FALSE)

tenmiler$reportedpace <- hms::as_hms(as.POSIXct(tenmiler$Pace, format = "%M:%S"))
tenmiler$netpace <- hms::as_hms(tenmiler$`Net Time` / 10)

tenmiler$mph = round(10 / (lubridate::period_to_seconds(lubridate::hms(tenmiler$`Net Time`))) * 3600,
                     digits = 1)

#
#  Runners I know, tagged and designated for identifing visually in plots
#
tenmiler$`mi gente` <- tenmiler$`Race No` %in% c(26,57,70,71)
tenmiler <- dplyr::mutate(tenmiler, 
                   miGender = ifelse(`mi gente`, paste("mi", Gender, sep = ""), Gender))

#
# create a new finish order index for net time (chip time)
#
tenmiler_ntposorder <- tenmiler %>%
                        select(c(`Net Time`, `Race No`)) %>% 
                        arrange(`Net Time`) %>%                    # Order selected by Net Time.
                        tibble::rownames_to_column("nettimepos")   # The row names of the data frame are ascending number,
                                                                   # which is used to form the new position sequence

tenmiler <- dplyr::left_join(tenmiler, 
                             tenmiler_ntposorder %>%               # The new position sequence is joined into the 
                               select(c(`Race No`, nettimepos)),   # main tibble
                             by = c("Race No"))

tenmiler$nettimepos <- as.integer(tenmiler$nettimepos)


tenmiler$poschanges <- tenmiler$Pos != tenmiler$nettimepos
tenmiler$posdiff    <- tenmiler$nettimepos - tenmiler$Pos
tenmiler$timediff   <- tenmiler$`Net Time` - tenmiler$Time


tenmiler <- tenmiler %>% 
  dplyr::relocate(c(nettimepos,poschanges,posdiff,Time,`Net Time`,timediff), 
                    .after = Pos)

#
#  Make an attribute list for plotly pop-up info on plot
#
tenmiler$attribs <- paste(tenmiler$Name,
                          tenmiler$Category, 
                          paste("Position", tenmiler$Pos),  
                          paste("Net Time Pos", tenmiler$nettimepos), 
                          paste("Pace:", tenmiler$Pace),
                          paste("Net time", tenmiler$`Net Time`, "H:M:S" ),
                          paste("Time", tenmiler$Time, "H:M:S" ),
                          paste(tenmiler$mph, "mph"), 
                          sep="\n")

tenmiler
tenmiler %>% filter(`mi gente` == TRUE)

Pace by Net Time (Chip) Postition for Gender

ggplotly(
    ggplot(tenmiler, aes(x = Pos, y = netpace, 
                         col = miGender, 
                         shape = `mi gente`, size = `mi gente`)) + 
    geom_point(aes(text = attribs), alpha = 0.5) + 
    theme_minimal(),
tooltip = c("attribs")
)

Pace by Net Time Position Gender

ggplotly(
    ggplot(tenmiler, aes(x = nettimepos, y = netpace, 
                         col = miGender, 
                         shape = `mi gente`, size = `mi gente`)) + 
    geom_point(aes(text = attribs), alpha = 0.5) + 
    theme_minimal(),
tooltip = c("attribs")
)

Density Distribution of Net Time

  ggplot(tenmiler, aes(`Net Time`, fill = Gender, colour = Gender)) + 
    geom_density(alpha = 0.1)

Distribution of Net Time at Five Minute Intervals

tenmiler %>%
       ggplot(aes(`Net Time`, fill = Gender)) + 
          geom_histogram(alpha = 0.5, position = "identity", binwidth = 5*60)