MNL Data Prep

This R Markdown document demonstrates how to expand a crash-level dataset so that each original crash record is repeated three times, corresponding to the three severity categories:

The workflow includes:

  1. creating a small example dataset,
  2. expanding each row three times,
  3. generating severity indicator variables,
  4. providing an alternative base R approach, and
  5. reading from a CSV file and exporting the expanded output.

Load Required Packages

library(dplyr)
library(tidyr)
library(DT)
library(tidytable)

Example 1: Small Illustrative Dataset

# Create a small example dataset
df <- data.frame(
  Crash_ID = c(1,2,3,4,5,6,7,8,9,10,25),
  Cluster = c(1,1,3,1,3,3,3,3,3,3,3),
  Crash_Sev = c("BC","BC","O","O","BC","BC","BC","BC","BC","BC","KA"),
  stringsAsFactors = FALSE
)

# Display original data
df
##    Crash_ID Cluster Crash_Sev
## 1         1       1        BC
## 2         2       1        BC
## 3         3       3         O
## 4         4       1         O
## 5         5       3        BC
## 6         6       3        BC
## 7         7       3        BC
## 8         8       3        BC
## 9         9       3        BC
## 10       10       3        BC
## 11       25       3        KA
# Convert crash severity to numeric category
# O = 1, BC = 2, KA = 3
df <- df %>%
  mutate(
    Crash_Sev_Cat = case_when(
      Crash_Sev == "O"  ~ 1,
      Crash_Sev == "BC" ~ 2,
      Crash_Sev == "KA" ~ 3,
      TRUE ~ NA_real_
    )
  )

# Repeat each crash record 3 times

##################################################
####################### METHOD 01
##################################################
df2 <- df %>%
  crossing(CrSev = 1:3) %>%
  mutate(
    # Indicator for whether the candidate severity matches the observed severity
    Severity = if_else(CrSev == Crash_Sev_Cat, 1, 0),

    # Dummy variables for the three severity levels
    InO  = if_else(CrSev == 1, 1, 0),
    InBC = if_else(CrSev == 2, 1, 0),
    InKA = if_else(CrSev == 3, 1, 0)
  ) %>%
  arrange(Crash_ID, CrSev)

# Display expanded data
print(df2, n = Inf)
## # A tidytable: 33 × 9
##    Crash_ID Cluster Crash_Sev Crash_Sev_Cat CrSev Severity   InO  InBC  InKA
##       <dbl>   <dbl> <chr>             <dbl> <int>    <dbl> <dbl> <dbl> <dbl>
##  1        1       1 BC                    2     1        0     1     0     0
##  2        1       1 BC                    2     2        1     0     1     0
##  3        1       1 BC                    2     3        0     0     0     1
##  4        2       1 BC                    2     1        0     1     0     0
##  5        2       1 BC                    2     2        1     0     1     0
##  6        2       1 BC                    2     3        0     0     0     1
##  7        3       3 O                     1     1        1     1     0     0
##  8        3       3 O                     1     2        0     0     1     0
##  9        3       3 O                     1     3        0     0     0     1
## 10        4       1 O                     1     1        1     1     0     0
## 11        4       1 O                     1     2        0     0     1     0
## 12        4       1 O                     1     3        0     0     0     1
## 13        5       3 BC                    2     1        0     1     0     0
## 14        5       3 BC                    2     2        1     0     1     0
## 15        5       3 BC                    2     3        0     0     0     1
## 16        6       3 BC                    2     1        0     1     0     0
## 17        6       3 BC                    2     2        1     0     1     0
## 18        6       3 BC                    2     3        0     0     0     1
## 19        7       3 BC                    2     1        0     1     0     0
## 20        7       3 BC                    2     2        1     0     1     0
## 21        7       3 BC                    2     3        0     0     0     1
## 22        8       3 BC                    2     1        0     1     0     0
## 23        8       3 BC                    2     2        1     0     1     0
## 24        8       3 BC                    2     3        0     0     0     1
## 25        9       3 BC                    2     1        0     1     0     0
## 26        9       3 BC                    2     2        1     0     1     0
## 27        9       3 BC                    2     3        0     0     0     1
## 28       10       3 BC                    2     1        0     1     0     0
## 29       10       3 BC                    2     2        1     0     1     0
## 30       10       3 BC                    2     3        0     0     0     1
## 31       25       3 KA                    3     1        0     1     0     0
## 32       25       3 KA                    3     2        0     0     1     0
## 33       25       3 KA                    3     3        1     0     0     1
##################################################
####################### METHOD 02
##################################################

df2 <- df %>%
  mutate(
    Crash_Sev_Cat = dplyr::case_when(
      Crash_Sev == "O"  ~ 1,
      Crash_Sev == "BC" ~ 2,
      Crash_Sev == "KA" ~ 3,
      TRUE ~ NA_real_
    )
  ) %>%
  tidyr::crossing(CrSev = 1:3) %>%
  mutate(
    Severity = dplyr::if_else(CrSev == Crash_Sev_Cat, 1, 0),
    InO  = dplyr::if_else(CrSev == 1, 1, 0),
    InBC = dplyr::if_else(CrSev == 2, 1, 0),
    InKA = dplyr::if_else(CrSev == 3, 1, 0)
  ) %>%
  arrange(Crash_ID, CrSev)
df2
## # A tidytable: 33 × 9
##    Crash_ID Cluster Crash_Sev Crash_Sev_Cat CrSev Severity   InO  InBC  InKA
##       <dbl>   <dbl> <chr>             <dbl> <int>    <dbl> <dbl> <dbl> <dbl>
##  1        1       1 BC                    2     1        0     1     0     0
##  2        1       1 BC                    2     2        1     0     1     0
##  3        1       1 BC                    2     3        0     0     0     1
##  4        2       1 BC                    2     1        0     1     0     0
##  5        2       1 BC                    2     2        1     0     1     0
##  6        2       1 BC                    2     3        0     0     0     1
##  7        3       3 O                     1     1        1     1     0     0
##  8        3       3 O                     1     2        0     0     1     0
##  9        3       3 O                     1     3        0     0     0     1
## 10        4       1 O                     1     1        1     1     0     0
## # ℹ 23 more rows
print(df2, n = Inf)
## # A tidytable: 33 × 9
##    Crash_ID Cluster Crash_Sev Crash_Sev_Cat CrSev Severity   InO  InBC  InKA
##       <dbl>   <dbl> <chr>             <dbl> <int>    <dbl> <dbl> <dbl> <dbl>
##  1        1       1 BC                    2     1        0     1     0     0
##  2        1       1 BC                    2     2        1     0     1     0
##  3        1       1 BC                    2     3        0     0     0     1
##  4        2       1 BC                    2     1        0     1     0     0
##  5        2       1 BC                    2     2        1     0     1     0
##  6        2       1 BC                    2     3        0     0     0     1
##  7        3       3 O                     1     1        1     1     0     0
##  8        3       3 O                     1     2        0     0     1     0
##  9        3       3 O                     1     3        0     0     0     1
## 10        4       1 O                     1     1        1     1     0     0
## 11        4       1 O                     1     2        0     0     1     0
## 12        4       1 O                     1     3        0     0     0     1
## 13        5       3 BC                    2     1        0     1     0     0
## 14        5       3 BC                    2     2        1     0     1     0
## 15        5       3 BC                    2     3        0     0     0     1
## 16        6       3 BC                    2     1        0     1     0     0
## 17        6       3 BC                    2     2        1     0     1     0
## 18        6       3 BC                    2     3        0     0     0     1
## 19        7       3 BC                    2     1        0     1     0     0
## 20        7       3 BC                    2     2        1     0     1     0
## 21        7       3 BC                    2     3        0     0     0     1
## 22        8       3 BC                    2     1        0     1     0     0
## 23        8       3 BC                    2     2        1     0     1     0
## 24        8       3 BC                    2     3        0     0     0     1
## 25        9       3 BC                    2     1        0     1     0     0
## 26        9       3 BC                    2     2        1     0     1     0
## 27        9       3 BC                    2     3        0     0     0     1
## 28       10       3 BC                    2     1        0     1     0     0
## 29       10       3 BC                    2     2        1     0     1     0
## 30       10       3 BC                    2     3        0     0     0     1
## 31       25       3 KA                    3     1        0     1     0     0
## 32       25       3 KA                    3     2        0     0     1     0
## 33       25       3 KA                    3     3        1     0     0     1
##################################################
####################### METHOD 03
##################################################

df$Crash_Sev_Cat <- c(O = 1, BC = 2, KA = 3)[df$Crash_Sev]

df2 <- df[rep(seq_len(nrow(df)), each = 3), ]
df2$CrSev <- rep(1:3, times = nrow(df))
df2$Severity <- ifelse(df2$CrSev == df2$Crash_Sev_Cat, 1, 0)
df2$InO  <- ifelse(df2$CrSev == 1, 1, 0)
df2$InBC <- ifelse(df2$CrSev == 2, 1, 0)
df2$InKA <- ifelse(df2$CrSev == 3, 1, 0)

row.names(df2) <- NULL
print(df2, n = Inf)
## # A tidytable: 33 × 9
##    Crash_ID Cluster Crash_Sev Crash_Sev_Cat CrSev Severity   InO  InBC  InKA
##       <dbl>   <dbl> <chr>             <dbl> <int>    <dbl> <dbl> <dbl> <dbl>
##  1        1       1 BC                    2     1        0     1     0     0
##  2        1       1 BC                    2     2        1     0     1     0
##  3        1       1 BC                    2     3        0     0     0     1
##  4        2       1 BC                    2     1        0     1     0     0
##  5        2       1 BC                    2     2        1     0     1     0
##  6        2       1 BC                    2     3        0     0     0     1
##  7        3       3 O                     1     1        1     1     0     0
##  8        3       3 O                     1     2        0     0     1     0
##  9        3       3 O                     1     3        0     0     0     1
## 10        4       1 O                     1     1        1     1     0     0
## 11        4       1 O                     1     2        0     0     1     0
## 12        4       1 O                     1     3        0     0     0     1
## 13        5       3 BC                    2     1        0     1     0     0
## 14        5       3 BC                    2     2        1     0     1     0
## 15        5       3 BC                    2     3        0     0     0     1
## 16        6       3 BC                    2     1        0     1     0     0
## 17        6       3 BC                    2     2        1     0     1     0
## 18        6       3 BC                    2     3        0     0     0     1
## 19        7       3 BC                    2     1        0     1     0     0
## 20        7       3 BC                    2     2        1     0     1     0
## 21        7       3 BC                    2     3        0     0     0     1
## 22        8       3 BC                    2     1        0     1     0     0
## 23        8       3 BC                    2     2        1     0     1     0
## 24        8       3 BC                    2     3        0     0     0     1
## 25        9       3 BC                    2     1        0     1     0     0
## 26        9       3 BC                    2     2        1     0     1     0
## 27        9       3 BC                    2     3        0     0     0     1
## 28       10       3 BC                    2     1        0     1     0     0
## 29       10       3 BC                    2     2        1     0     1     0
## 30       10       3 BC                    2     3        0     0     0     1
## 31       25       3 KA                    3     1        0     1     0     0
## 32       25       3 KA                    3     2        0     0     1     0
## 33       25       3 KA                    3     3        1     0     0     1

Example 2: Any Dataset

# Read CSV file
# Replace "Moto4.csv" with your file name 
df_csv <- read.csv("Moto4.csv", stringsAsFactors = FALSE)


datatable(
  df_csv, extensions = 'Buttons', options = list(
    dom = 'Bfrtip',
    buttons = c('csv', 'excel')
  )
)
# Display first few rows
head(df_csv)
##          CRASH_VEH_NUM1 SpLim Align Eject Access Road Age Int Injury
## 1 LA14_1310110334237182     0     0     1      0    0   0   0      3
## 2 LA14_1310200250243592     0     0     1      0    0   0   1      3
## 3 LA14_1401011516425592     1     0     0      0    1   0   0      3
## 4 LA14_1401030944055782     0     0     0      1    0   0   1      3
## 5 LA14_1401031918472811     0     0     0      0    0   0   1      3
## 6 LA14_1401050857473642     0     0     1      0    1   0   1      3
# Considering your dataset already has a numeric severity column such as Injury:
# 1 = O, 2 = BC, 3 = KA

df_expanded <- df_csv %>%
  crossing(CrSev = 1:3) %>%
  mutate(
    # Severity equals 1 when CrSev matches the observed severity category
    Severity = if_else(CrSev == Injury, 1, 0),
    
    # Severity dummy variables
    InO  = if_else(CrSev == 1, 1, 0),
    InBC = if_else(CrSev == 2, 1, 0),
    InKA = if_else(CrSev == 3, 1, 0)
  ) %>%
  arrange(CRASH_VEH_NUM1, CrSev)

# Display first few rows of expanded data
print(df_expanded, n = 100, width = Inf)
## # A tidytable: 35,799 × 14
##     CRASH_VEH_NUM1        SpLim Align Eject Access  Road   Age   Int Injury
##     <chr>                 <int> <int> <int>  <int> <int> <int> <int>  <int>
##   1 LA14_1310110334237182     0     0     1      0     0     0     0      3
##   2 LA14_1310110334237182     0     0     1      0     0     0     0      3
##   3 LA14_1310110334237182     0     0     1      0     0     0     0      3
##   4 LA14_1310200250243592     0     0     1      0     0     0     1      3
##   5 LA14_1310200250243592     0     0     1      0     0     0     1      3
##   6 LA14_1310200250243592     0     0     1      0     0     0     1      3
##   7 LA14_1401011516425592     1     0     0      0     1     0     0      3
##   8 LA14_1401011516425592     1     0     0      0     1     0     0      3
##   9 LA14_1401011516425592     1     0     0      0     1     0     0      3
##  10 LA14_1401030944055782     0     0     0      1     0     0     1      3
##  11 LA14_1401030944055782     0     0     0      1     0     0     1      3
##  12 LA14_1401030944055782     0     0     0      1     0     0     1      3
##  13 LA14_1401031918472811     0     0     0      0     0     0     1      3
##  14 LA14_1401031918472811     0     0     0      0     0     0     1      3
##  15 LA14_1401031918472811     0     0     0      0     0     0     1      3
##  16 LA14_1401050857473642     0     0     1      0     1     0     1      3
##  17 LA14_1401050857473642     0     0     1      0     1     0     1      3
##  18 LA14_1401050857473642     0     0     1      0     1     0     1      3
##  19 LA14_1401051443077561     0     0     0      0     1     1     1      3
##  20 LA14_1401051443077561     0     0     0      0     1     1     1      3
##  21 LA14_1401051443077561     0     0     0      0     1     1     1      3
##  22 LA14_1401071126026871     1     0     1      1     0     0     0      2
##  23 LA14_1401071126026871     1     0     1      1     0     0     0      2
##  24 LA14_1401071126026871     1     0     1      1     0     0     0      2
##  25 LA14_1401071719483421     1     0     0      0     1     0     0      2
##  26 LA14_1401071719483421     1     0     0      0     1     0     0      2
##  27 LA14_1401071719483421     1     0     0      0     1     0     0      2
##  28 LA14_1401081314590931     0     0     1      1     0     1     1      3
##  29 LA14_1401081314590931     0     0     1      1     0     1     1      3
##  30 LA14_1401081314590931     0     0     1      1     0     1     1      3
##  31 LA14_1401081924345451     0     0     1      0     1     1     0      3
##  32 LA14_1401081924345451     0     0     1      0     1     1     0      3
##  33 LA14_1401081924345451     0     0     1      0     1     1     0      3
##  34 LA14_1401091542368501     1     1     0      0     0     0     0      2
##  35 LA14_1401091542368501     1     1     0      0     0     0     0      2
##  36 LA14_1401091542368501     1     1     0      0     0     0     0      2
##  37 LA14_1401091927136482     1     0     0      0     1     0     0      2
##  38 LA14_1401091927136482     1     0     0      0     1     0     0      2
##  39 LA14_1401091927136482     1     0     0      0     1     0     0      2
##  40 LA14_1401101206117652     0     0     1      0     0     0     1      3
##  41 LA14_1401101206117652     0     0     1      0     0     0     1      3
##  42 LA14_1401101206117652     0     0     1      0     0     0     1      3
##  43 LA14_1401101601337942     0     0     1      0     1     0     1      3
##  44 LA14_1401101601337942     0     0     1      0     1     0     1      3
##  45 LA14_1401101601337942     0     0     1      0     1     0     1      3
##  46 LA14_1401110941031251     0     0     1      0     0     0     1      3
##  47 LA14_1401110941031251     0     0     1      0     0     0     1      3
##  48 LA14_1401110941031251     0     0     1      0     0     0     1      3
##  49 LA14_1401111708118541     0     1     0      1     0     1     0      2
##  50 LA14_1401111708118541     0     1     0      1     0     1     0      2
##  51 LA14_1401111708118541     0     1     0      1     0     1     0      2
##  52 LA14_1401112117480001     0     0     1      0     1     0     1      2
##  53 LA14_1401112117480001     0     0     1      0     1     0     1      2
##  54 LA14_1401112117480001     0     0     1      0     1     0     1      2
##  55 LA14_1401121341001201     0     1     0      0     1     0     0      3
##  56 LA14_1401121341001201     0     1     0      0     1     0     0      3
##  57 LA14_1401121341001201     0     1     0      0     1     0     0      3
##  58 LA14_1401121704511402     0     0     0      0     1     0     1      2
##  59 LA14_1401121704511402     0     0     0      0     1     0     1      2
##  60 LA14_1401121704511402     0     0     0      0     1     0     1      2
##  61 LA14_1401122029344591     0     0     0      1     0     0     0      3
##  62 LA14_1401122029344591     0     0     0      1     0     0     0      3
##  63 LA14_1401122029344591     0     0     0      1     0     0     0      3
##  64 LA14_1401122121426961     0     1     1      0     1     1     0      3
##  65 LA14_1401122121426961     0     1     1      0     1     1     0      3
##  66 LA14_1401122121426961     0     1     1      0     1     1     0      3
##  67 LA14_1401130431114861     0     1     0      0     0     0     0      3
##  68 LA14_1401130431114861     0     1     0      0     0     0     0      3
##  69 LA14_1401130431114861     0     1     0      0     0     0     0      3
##  70 LA14_1401130717385311     0     0     0      0     0     0     0      3
##  71 LA14_1401130717385311     0     0     0      0     0     0     0      3
##  72 LA14_1401130717385311     0     0     0      0     0     0     0      3
##  73 LA14_1401131034096441     0     0     1      0     1     0     0      1
##  74 LA14_1401131034096441     0     0     1      0     1     0     0      1
##  75 LA14_1401131034096441     0     0     1      0     1     0     0      1
##  76 LA14_1401140826105372     0     0     0      1     0     0     1      2
##  77 LA14_1401140826105372     0     0     0      1     0     0     1      2
##  78 LA14_1401140826105372     0     0     0      1     0     0     1      2
##  79 LA14_1401141209356441     1     0     0      0     1     0     0      2
##  80 LA14_1401141209356441     1     0     0      0     1     0     0      2
##  81 LA14_1401141209356441     1     0     0      0     1     0     0      2
##  82 LA14_1401141703461091     0     0     0      1     0     1     1      2
##  83 LA14_1401141703461091     0     0     0      1     0     1     1      2
##  84 LA14_1401141703461091     0     0     0      1     0     1     1      2
##  85 LA14_1401142228121521     1     0     0      0     1     0     1      1
##  86 LA14_1401142228121521     1     0     0      0     1     0     1      1
##  87 LA14_1401142228121521     1     0     0      0     1     0     1      1
##  88 LA14_1401161929477541     1     1     0      0     1     0     0      3
##  89 LA14_1401161929477541     1     1     0      0     1     0     0      3
##  90 LA14_1401161929477541     1     1     0      0     1     0     0      3
##  91 LA14_1401171256051901     0     1     0      0     1     0     0      2
##  92 LA14_1401171256051901     0     1     0      0     1     0     0      2
##  93 LA14_1401171256051901     0     1     0      0     1     0     0      2
##  94 LA14_1401171512395931     0     0     1      0     0     1     0      2
##  95 LA14_1401171512395931     0     0     1      0     0     1     0      2
##  96 LA14_1401171512395931     0     0     1      0     0     1     0      2
##  97 LA14_1401171810153751     1     0     0      0     0     0     0      3
##  98 LA14_1401171810153751     1     0     0      0     0     0     0      3
##  99 LA14_1401171810153751     1     0     0      0     0     0     0      3
## 100 LA14_1401171911460001     0     0     1      0     0     0     0      2
##     CrSev Severity   InO  InBC  InKA
##     <int>    <dbl> <dbl> <dbl> <dbl>
##   1     1        0     1     0     0
##   2     2        0     0     1     0
##   3     3        1     0     0     1
##   4     1        0     1     0     0
##   5     2        0     0     1     0
##   6     3        1     0     0     1
##   7     1        0     1     0     0
##   8     2        0     0     1     0
##   9     3        1     0     0     1
##  10     1        0     1     0     0
##  11     2        0     0     1     0
##  12     3        1     0     0     1
##  13     1        0     1     0     0
##  14     2        0     0     1     0
##  15     3        1     0     0     1
##  16     1        0     1     0     0
##  17     2        0     0     1     0
##  18     3        1     0     0     1
##  19     1        0     1     0     0
##  20     2        0     0     1     0
##  21     3        1     0     0     1
##  22     1        0     1     0     0
##  23     2        1     0     1     0
##  24     3        0     0     0     1
##  25     1        0     1     0     0
##  26     2        1     0     1     0
##  27     3        0     0     0     1
##  28     1        0     1     0     0
##  29     2        0     0     1     0
##  30     3        1     0     0     1
##  31     1        0     1     0     0
##  32     2        0     0     1     0
##  33     3        1     0     0     1
##  34     1        0     1     0     0
##  35     2        1     0     1     0
##  36     3        0     0     0     1
##  37     1        0     1     0     0
##  38     2        1     0     1     0
##  39     3        0     0     0     1
##  40     1        0     1     0     0
##  41     2        0     0     1     0
##  42     3        1     0     0     1
##  43     1        0     1     0     0
##  44     2        0     0     1     0
##  45     3        1     0     0     1
##  46     1        0     1     0     0
##  47     2        0     0     1     0
##  48     3        1     0     0     1
##  49     1        0     1     0     0
##  50     2        1     0     1     0
##  51     3        0     0     0     1
##  52     1        0     1     0     0
##  53     2        1     0     1     0
##  54     3        0     0     0     1
##  55     1        0     1     0     0
##  56     2        0     0     1     0
##  57     3        1     0     0     1
##  58     1        0     1     0     0
##  59     2        1     0     1     0
##  60     3        0     0     0     1
##  61     1        0     1     0     0
##  62     2        0     0     1     0
##  63     3        1     0     0     1
##  64     1        0     1     0     0
##  65     2        0     0     1     0
##  66     3        1     0     0     1
##  67     1        0     1     0     0
##  68     2        0     0     1     0
##  69     3        1     0     0     1
##  70     1        0     1     0     0
##  71     2        0     0     1     0
##  72     3        1     0     0     1
##  73     1        1     1     0     0
##  74     2        0     0     1     0
##  75     3        0     0     0     1
##  76     1        0     1     0     0
##  77     2        1     0     1     0
##  78     3        0     0     0     1
##  79     1        0     1     0     0
##  80     2        1     0     1     0
##  81     3        0     0     0     1
##  82     1        0     1     0     0
##  83     2        1     0     1     0
##  84     3        0     0     0     1
##  85     1        1     1     0     0
##  86     2        0     0     1     0
##  87     3        0     0     0     1
##  88     1        0     1     0     0
##  89     2        0     0     1     0
##  90     3        1     0     0     1
##  91     1        0     1     0     0
##  92     2        1     0     1     0
##  93     3        0     0     0     1
##  94     1        0     1     0     0
##  95     2        1     0     1     0
##  96     3        0     0     0     1
##  97     1        0     1     0     0
##  98     2        0     0     1     0
##  99     3        1     0     0     1
## 100     1        0     1     0     0
## # ℹ 35,699 more rows
## write.csv(df_expanded, "df_expanded.csv")