library(readxl)
library(dplyr)
library(stringr)
library(forcats)
library(DT)
library(writexl)
library(tidyr)
data_dir <- "D:/DAS_CODE/Tools/data"
setwd(data_dir)
dat <- read_excel("Ped_Bike_Esc_2016_2024_All.xlsx", sheet = "Ped_Lim")
dim(dat)
## [1] 60741 81
names(dat)
## [1] "STATENAME" "YEAR" "CRASH_NUM1"
## [4] "CRASH_VEH_NUM1" "CRASH_VehPer_NUM1" "PER_TYPNAME"
## [7] "INJ_SEVNAME" "LATITUDENAME" "LONGITUDNAME"
## [10] "VE_TOTAL" "FATALS" "DRUNK_DR"
## [13] "HIT_RUNNAME_2" "WRK_ZONENAME" "REL_ROADNAME"
## [16] "WEATHER1NAME" "LGT_CONDNAME" "BODY_TYPNAME"
## [19] "RUR_URBNAME" "FUNC_SYSNAME" "PBAGENAME"
## [22] "PBSEXNAME" "PEDCTYPENAME" "PEDPOSNAME"
## [25] "MOTMANNAME" "PEDLEGNAME" "PEDSNRNAME"
## [28] "PEDCGPNAME" "INJ_SEVNAMEDr" "AGENAMEDr"
## [31] "SEXNAMEDr" "DRINKINGNAMEDr" "ROUTENAME"
## [34] "TYP_INTNAME" "DRINKINGNAME" "ALC_RESNAME"
## [37] "DRUGSNAME" "DRUGRES1NAME" "WORK_INJNAME"
## [40] "RACENAME" "LOCATIONNAME" "NUMOCCS_2"
## [43] "MAKENAME_2" "MAK_MODNAME_2" "MOD_YEAR_2"
## [46] "HAZ_INVNAME_2" "BUS_USENAME_2" "EMER_USENAME_2"
## [49] "TRAV_SPNAME_2" "UNDERIDENAME_2" "ROLLOVERNAME_2"
## [52] "DEFORMEDNAME_2" "VTRAFWAYNAME_2" "VNUM_LANNAME_2"
## [55] "VSPD_LIMNAME_2" "VALIGNNAME_2" "VPROFILENAME_2"
## [58] "VPAVETYPNAME_2" "VSURCONDNAME_2" "P_CRASH1NAME_2"
## [61] "ACC_TYPENAME_2" "L_STATUSNAME_2" "L_TYPENAME_2"
## [64] "CDL_STATNAME_2" "MDRDSTRDNAME_2" "DRIMPAIRNAME_2"
## [67] "MDRMANAVNAME_2" "MVISOBSCNAME_2" "INJ_SEVNAME_2"
## [70] "ALC_RESNAME_2" "DRUGSNAME_2" "COUNTYNAME"
## [73] "CITYNAME" "DAY" "MONTHNAME"
## [76] "DAY_WEEKNAME" "HOUR" "HOURNAME"
## [79] "MAN_COLLNAME" "WEATHERNAME" "SCH_BUSNAME"
dat1 <- dat[, c(1, 2, 3, 39, 8:32, 46:67, 76)]
dim(dat1)
## [1] 60741 52
names(dat1)
## [1] "STATENAME" "YEAR" "CRASH_NUM1" "WORK_INJNAME"
## [5] "LATITUDENAME" "LONGITUDNAME" "VE_TOTAL" "FATALS"
## [9] "DRUNK_DR" "HIT_RUNNAME_2" "WRK_ZONENAME" "REL_ROADNAME"
## [13] "WEATHER1NAME" "LGT_CONDNAME" "BODY_TYPNAME" "RUR_URBNAME"
## [17] "FUNC_SYSNAME" "PBAGENAME" "PBSEXNAME" "PEDCTYPENAME"
## [21] "PEDPOSNAME" "MOTMANNAME" "PEDLEGNAME" "PEDSNRNAME"
## [25] "PEDCGPNAME" "INJ_SEVNAMEDr" "AGENAMEDr" "SEXNAMEDr"
## [29] "DRINKINGNAMEDr" "HAZ_INVNAME_2" "BUS_USENAME_2" "EMER_USENAME_2"
## [33] "TRAV_SPNAME_2" "UNDERIDENAME_2" "ROLLOVERNAME_2" "DEFORMEDNAME_2"
## [37] "VTRAFWAYNAME_2" "VNUM_LANNAME_2" "VSPD_LIMNAME_2" "VALIGNNAME_2"
## [41] "VPROFILENAME_2" "VPAVETYPNAME_2" "VSURCONDNAME_2" "P_CRASH1NAME_2"
## [45] "ACC_TYPENAME_2" "L_STATUSNAME_2" "L_TYPENAME_2" "CDL_STATNAME_2"
## [49] "MDRDSTRDNAME_2" "DRIMPAIRNAME_2" "MDRMANAVNAME_2" "DAY_WEEKNAME"
datatable(
head(dat1, 25),
options = list(scrollX = TRUE, pageLength = 10),
caption = "Selected variables"
)
dat1 <- dat1 %>%
mutate(
age_num = as.numeric(str_extract(PBAGENAME, "\\d+")),
Ped_Age = case_when(
age_num <= 4 ~ "0-4",
age_num <= 9 ~ "5-9",
age_num <= 14 ~ "10-14",
age_num <= 20 ~ "15-20",
age_num <= 24 ~ "21-24",
age_num <= 34 ~ "25-34",
age_num <= 44 ~ "35-44",
age_num <= 54 ~ "45-54",
age_num <= 64 ~ "55-64",
age_num <= 74 ~ "65-74",
age_num >= 75 ~ "75+",
TRUE ~ "Unknown"
),
age_num1 = as.numeric(str_extract(AGENAMEDr, "\\d+")),
Dr_Age = case_when(
age_num1 <= 4 ~ "0-4",
age_num1 <= 9 ~ "5-9",
age_num1 <= 14 ~ "10-14",
age_num1 <= 20 ~ "15-20",
age_num1 <= 24 ~ "21-24",
age_num1 <= 34 ~ "25-34",
age_num1 <= 44 ~ "35-44",
age_num1 <= 54 ~ "45-54",
age_num1 <= 64 ~ "55-64",
age_num1 <= 74 ~ "65-74",
age_num1 >= 75 ~ "75+",
TRUE ~ "Unknown"
)
)
table(dat1$Ped_Age, useNA = "ifany")
##
## 0-4 10-14 15-20 21-24 25-34 35-44 45-54 5-9 55-64 65-74
## 552 667 2581 3077 9797 10098 9599 468 11021 7015
## 75+ Unknown
## 5130 736
table(dat1$Dr_Age, useNA = "ifany")
##
## 0-4 10-14 15-20 21-24 25-34 35-44 45-54 5-9 55-64 65-74
## 8 33 4178 5333 11846 9211 8105 9 7215 3924
## 75+ Unknown
## 1886 8993
dat1 <- dat1 %>%
mutate(
Day_Type = case_when(
DAY_WEEKNAME %in% c("Saturday", "Sunday") ~ "Weekend",
!is.na(DAY_WEEKNAME) ~ "Weekday",
TRUE ~ "Unknown"
)
)
table(dat1$Day_Type, useNA = "ifany")
##
## Weekday Weekend
## 42325 18416
dat2 <- dat1 %>%
mutate(
Veh_Type = case_when(
str_detect(BODY_TYPNAME, regex("sedan|coupe|hatchback|convertible|station wagon|automobile", ignore_case = TRUE)) ~ "Car",
str_detect(BODY_TYPNAME, regex("pickup|utility|light truck|cab chassis based", ignore_case = TRUE)) ~ "Light_Truck",
str_detect(BODY_TYPNAME, regex("truck-tractor|single-unit|heavy truck|medium/heavy|construction equipment|farm equipment|motorhome", ignore_case = TRUE)) ~ "Large_Truck",
str_detect(BODY_TYPNAME, regex("bus", ignore_case = TRUE)) ~ "Bus",
str_detect(BODY_TYPNAME, regex("van|minivan|step-van|walk-in van", ignore_case = TRUE)) ~ "Van",
str_detect(BODY_TYPNAME, regex("motorcycle|moped|scooter|motored cycle", ignore_case = TRUE)) ~ "Motorcycle",
str_detect(BODY_TYPNAME, regex("ATV|ATC|golf cart|off-highway|recreational|three-wheel|go-cart|fork-lift|street sweeper", ignore_case = TRUE)) ~ "Off_Road_Other",
is.na(BODY_TYPNAME) |
BODY_TYPNAME %in% c("NA", "Not Reported", "Unknown body type") |
str_detect(BODY_TYPNAME, regex("unknown|not reported", ignore_case = TRUE)) ~ "Unknown",
TRUE ~ "Off_Road_Other"
)
)
table(dat2$Veh_Type, useNA = "ifany")
##
## Bus Car Large_Truck Light_Truck Motorcycle
## 1205 24241 3394 22579 368
## Off_Road_Other Unknown Van
## 30 7036 1888
names(dat2)
## [1] "STATENAME" "YEAR" "CRASH_NUM1" "WORK_INJNAME"
## [5] "LATITUDENAME" "LONGITUDNAME" "VE_TOTAL" "FATALS"
## [9] "DRUNK_DR" "HIT_RUNNAME_2" "WRK_ZONENAME" "REL_ROADNAME"
## [13] "WEATHER1NAME" "LGT_CONDNAME" "BODY_TYPNAME" "RUR_URBNAME"
## [17] "FUNC_SYSNAME" "PBAGENAME" "PBSEXNAME" "PEDCTYPENAME"
## [21] "PEDPOSNAME" "MOTMANNAME" "PEDLEGNAME" "PEDSNRNAME"
## [25] "PEDCGPNAME" "INJ_SEVNAMEDr" "AGENAMEDr" "SEXNAMEDr"
## [29] "DRINKINGNAMEDr" "HAZ_INVNAME_2" "BUS_USENAME_2" "EMER_USENAME_2"
## [33] "TRAV_SPNAME_2" "UNDERIDENAME_2" "ROLLOVERNAME_2" "DEFORMEDNAME_2"
## [37] "VTRAFWAYNAME_2" "VNUM_LANNAME_2" "VSPD_LIMNAME_2" "VALIGNNAME_2"
## [41] "VPROFILENAME_2" "VPAVETYPNAME_2" "VSURCONDNAME_2" "P_CRASH1NAME_2"
## [45] "ACC_TYPENAME_2" "L_STATUSNAME_2" "L_TYPENAME_2" "CDL_STATNAME_2"
## [49] "MDRDSTRDNAME_2" "DRIMPAIRNAME_2" "MDRMANAVNAME_2" "DAY_WEEKNAME"
## [53] "age_num" "Ped_Age" "age_num1" "Dr_Age"
## [57] "Day_Type" "Veh_Type"
dat2 <- dat2[, -c(18, 27, 52, 53, 55, 15)]
dim(dat2)
## [1] 60741 52
names(dat2)
## [1] "STATENAME" "YEAR" "CRASH_NUM1" "WORK_INJNAME"
## [5] "LATITUDENAME" "LONGITUDNAME" "VE_TOTAL" "FATALS"
## [9] "DRUNK_DR" "HIT_RUNNAME_2" "WRK_ZONENAME" "REL_ROADNAME"
## [13] "WEATHER1NAME" "LGT_CONDNAME" "RUR_URBNAME" "FUNC_SYSNAME"
## [17] "PBSEXNAME" "PEDCTYPENAME" "PEDPOSNAME" "MOTMANNAME"
## [21] "PEDLEGNAME" "PEDSNRNAME" "PEDCGPNAME" "INJ_SEVNAMEDr"
## [25] "SEXNAMEDr" "DRINKINGNAMEDr" "HAZ_INVNAME_2" "BUS_USENAME_2"
## [29] "EMER_USENAME_2" "TRAV_SPNAME_2" "UNDERIDENAME_2" "ROLLOVERNAME_2"
## [33] "DEFORMEDNAME_2" "VTRAFWAYNAME_2" "VNUM_LANNAME_2" "VSPD_LIMNAME_2"
## [37] "VALIGNNAME_2" "VPROFILENAME_2" "VPAVETYPNAME_2" "VSURCONDNAME_2"
## [41] "P_CRASH1NAME_2" "ACC_TYPENAME_2" "L_STATUSNAME_2" "L_TYPENAME_2"
## [45] "CDL_STATNAME_2" "MDRDSTRDNAME_2" "DRIMPAIRNAME_2" "MDRMANAVNAME_2"
## [49] "Ped_Age" "Dr_Age" "Day_Type" "Veh_Type"
dat2 <- dat2 %>%
mutate(
spd_num = as.numeric(str_extract(VSPD_LIMNAME_2, "\\d+")),
PSL = case_when(
spd_num <= 25 ~ "25_or_less",
spd_num <= 35 ~ "30_35",
spd_num <= 45 ~ "40_45",
spd_num <= 55 ~ "50_55",
spd_num >= 60 ~ "60_plus",
TRUE ~ "Unknown"
)
)
table(dat2$PSL, useNA = "ifany")
##
## 25_or_less 30_35 40_45 50_55 60_plus Unknown
## 5427 14474 18063 9752 8789 4236
dat3 <- dat2[, -c(53)]
dim(dat3)
## [1] 60741 53
dat3 <- dat3 %>%
mutate(
across(
where(~ is.character(.) | is.factor(.)),
~ case_when(
is.na(.) ~ "Not Reported",
str_trim(as.character(.)) %in% c(
"NA", "N/A", "Unknown", "Reported as Unknown",
" Reported as Unknown",
"Reported as Unknown if Distracted ",
"Reported as Unknown if Impaired",
"Unknown body type", "Not Reported"
) ~ "Not Reported",
str_detect(as.character(.), regex("^unknown|not reported$", ignore_case = TRUE)) ~ "Not Reported",
TRUE ~ as.character(.)
)
)
)
cat_summary <- dat3 %>%
summarise(across(
where(~ is.character(.) | is.factor(.)),
~ n_distinct(., na.rm = TRUE)
)) %>%
pivot_longer(
everything(),
names_to = "variable",
values_to = "n_categories"
) %>%
arrange(desc(n_categories))
cat_summary
## # A tibble: 49 × 2
## variable n_categories
## <chr> <int>
## 1 CRASH_NUM1 59912
## 2 LONGITUDNAME 59162
## 3 LATITUDENAME 58976
## 4 PEDSNRNAME 179
## 5 TRAV_SPNAME_2 123
## 6 ACC_TYPENAME_2 76
## 7 PEDCTYPENAME 60
## 8 STATENAME 51
## 9 MDRDSTRDNAME_2 27
## 10 PEDCGPNAME 21
## # ℹ 39 more rows
datatable(
cat_summary,
options = list(pageLength = 25),
caption = "Number of categories after lumping"
)
keep_cols <- c("CRASH_NUM1", "Ped_Age", "Dr_Age", "Veh_Type", "LATITUDENAME", "LONGITUDNAME", "STATENAME")
dat3_lumped <- dat3 %>%
mutate(
across(
.cols = setdiff(
names(select(., where(~ is.character(.) | is.factor(.)))),
keep_cols
),
.fns = ~ if (n_distinct(., na.rm = TRUE) > 5) {
fct_lump_n(as.factor(.), n = 5, other_level = "Other")
} else {
as.factor(.)
}
)
)
dim(dat3_lumped)
## [1] 60741 53
category_summary <- data.frame(
Variable = names(dat3_lumped),
Categories = sapply(dat3_lumped, function(x) n_distinct(x, na.rm = FALSE))
) %>%
arrange(desc(Categories))
datatable(
category_summary,
options = list(pageLength = 25),
caption = "Number of categories after lumping"
)
freq_tables <- lapply(
dat3_lumped %>% select(-all_of(keep_cols)),
table,
useNA = "ifany"
)
freq_tables
## $YEAR
##
## 2016 2017 2018 2019 2020 2021 2022 2023 2024
## 6080 6075 6374 6272 6565 7388 7593 7314 7080
##
## $WORK_INJNAME
##
## No Not Reported Redacted Yes
## 53105 6460 133 1043
##
## $VE_TOTAL
##
## 1 2 3 4 5 6 7 8 9 10 11 12 14
## 52229 6160 1645 452 155 48 23 16 4 2 1 3 1
## 130
## 2
##
## $FATALS
##
## 1 2 3 4 5 6 8 20
## 58940 1531 188 57 6 9 8 2
##
## $DRUNK_DR
##
## 0 1 2 <NA>
## 29063 2289 14 29375
##
## $HIT_RUNNAME_2
##
## No Not Reported Yes
## 46422 988 13331
##
## $WRK_ZONENAME
##
## Construction Maintenance None
## 705 106 59455
## Utility Work Zone, Type Unknown
## 33 442
##
## $REL_ROADNAME
##
## Not Reported On Roadside On Roadway On Shoulder
## 295 1928 55468 1956
## Outside Trafficway Other
## 320 774
##
## $WEATHER1NAME
##
## Clear Cloudy Fog, Smog, Smoke Not Reported
## 17338 3484 274 37373
## Rain Other
## 2044 228
##
## $LGT_CONDNAME
##
## Dark - Lighted Dark - Not Lighted Dawn Daylight
## 23917 21172 1014 12078
## Dusk Other
## 1186 1374
##
## $RUR_URBNAME
##
## Not Reported Rural
## 163 10237
## Trafficway Not in State Inventory Urban
## 326 50015
##
## $FUNC_SYSNAME
##
## Interstate Local
## 7231 6119
## Major Collector Minor Arterial
## 5856 14137
## Principal Arterial - Other Other
## 17820 9578
##
## $PBSEXNAME
##
## Female Male Not Reported
## 17933 42482 326
##
## $PEDCTYPENAME
##
## Crossing an Expressway
## 3168
## Disabled Vehicle-Related
## 2693
## Not At Intersection - Other / Unknown
## 5356
## Pedestrian Failed to Yield
## 19768
## Walking/Running Along Roadway With Traffic - From Behind
## 4768
## Other
## 24988
##
## $PEDPOSNAME
##
## Crosswalk Area
## 7700
## Intersection Area
## 2518
## Other/Unknown
## 1802
## Paved Shoulder / Bicycle Lane / Parking Lane
## 2543
## Travel Lane
## 43644
## Other
## 2534
##
## $MOTMANNAME
##
## Left Turn Not Applicable Not Reported Right Turn
## 1983 45795 359 798
## Straight through
## 11806
##
## $PEDLEGNAME
##
## Farside Nearside Not Applicable Not Reported
## 7608 5600 45836 1697
##
## $PEDSNRNAME
##
## Motorist traveling straight through - Crash Occurred on Far Side of Intersection / Pedestrian outside crosswalk area, other
## 456
## Motorist traveling straight through - Crash Occurred on Far Side of Intersection / Pedestrian within crosswalk area, traveled from motorist’s left
## 365
## Motorist traveling straight through - Crash Occurred on Near (Approach) Side of Intersection / Pedestrian within crosswalk area, traveled from motorist’s left
## 364
## Not Applicable
## 46065
## Not Reported
## 1555
## Other
## 11936
##
## $PEDCGPNAME
##
## Crossing Roadway - Vehicle Not Turning
## 22317
## Other / Unknown - Insufficient Details
## 7457
## Pedestrian in Roadway - Circumstances Unknown
## 4374
## Unusual Circumstances
## 5886
## Walking/Running Along Roadway
## 8272
## Other
## 12435
##
## $INJ_SEVNAMEDr
##
## No Apparent Injury (O) Not Reported
## 48413 6826
## Possible Injury (C) Suspected Minor Injury (B)
## 2473 1943
## Suspected Serious Injury (A) Other
## 607 479
##
## $SEXNAMEDr
##
## Female Male Not Reported
## 14830 37170 8741
##
## $DRINKINGNAMEDr
##
## No (Alcohol Not Involved) Not Reported Yes (Alcohol Involved)
## 35594 21161 3986
##
## $HAZ_INVNAME_2
##
## No Not Reported Yes
## 59732 936 73
##
## $BUS_USENAME_2
##
## Charter/Tour Not a Bus Not Reported School
## 39 54506 5804 104
## Transit/ Commuter Other
## 241 47
##
## $EMER_USENAME_2
##
## Emergency Operation, Emergency Warning Equipment in Use
## 57
## Emergency Operation, Emergency Warning Equipment in Use Unknown
## 43
## Non-Emergency, Non-Transport
## 53
## Not Applicable
## 58212
## Not Reported
## 2351
## Other
## 25
##
## $TRAV_SPNAME_2
##
## 035 MPH 040 MPH 045 MPH 055 MPH Not Reported Other
## 2333 2527 3414 1903 38258 12306
##
## $UNDERIDENAME_2
##
## No Underride or Override Noted
## 30840
## Not Reported
## 29877
## Overriding a Motor Vehicle In-Transport
## 4
## Overriding a Motor Vehicle Not In-Transport
## 7
## Underriding a Motor Vehicle In-Transport, Underride, Compartment Intrusion Unknown
## 3
## Underriding a Motor Vehicle Not In-Transport, Underride, Compartment Intrusion Unknown
## 3
## Other
## 7
##
## $ROLLOVERNAME_2
##
## No Rollover Not Applicable
## 59261 125
## Not Reported Rollover
## 936 155
## Rollover, Tripped by Object/Vehicle Other
## 227 37
##
## $DEFORMEDNAME_2
##
## Damage Reported, Extent Unknown Disabling Damage
## 4028 13269
## Functional Damage Minor Damage
## 15432 12081
## Not Reported Other
## 12483 3448
##
## $VTRAFWAYNAME_2
##
## Not Reported
## 1599
## Two-Way, Divided, Positive Median Barrier
## 9525
## Two-Way, Divided, Unprotected Median
## 13168
## Two-Way, Not Divided
## 24466
## Two-Way, Not Divided With a Continuous Left-Turn Lane
## 7725
## Other
## 4258
##
## $VNUM_LANNAME_2
##
## Five lanes Four lanes Not Reported Three lanes Two lanes Other
## 8514 9016 2029 10293 26179 4710
##
## $VSPD_LIMNAME_2
##
## 25 MPH 35 MPH 40 MPH 45 MPH 55 MPH Other
## 4874 9993 7268 10795 7212 20599
##
## $VALIGNNAME_2
##
## Curve - Left Curve Left Curve Right Not Reported Straight Other
## 708 927 944 2682 53922 1558
##
## $VPROFILENAME_2
##
## Downhill Grade, Unknown Slope Level
## 1837 3695 44259
## Not Reported Uphill Other
## 7903 1580 1467
##
## $VPAVETYPNAME_2
##
## Blacktop, Bituminous, or Asphalt Concrete
## 36107 4603
## Non-Trafficway or Driveway Access Not Reported
## 457 19416
## Slag, Gravel or Stone Other
## 69 89
##
## $VSURCONDNAME_2
##
## Dry Ice/Frost
## 50381 236
## Non-Trafficway or Driveway Access Not Reported
## 457 1828
## Wet Other
## 7490 349
##
## $P_CRASH1NAME_2
##
## Going Straight Negotiating a Curve Not Reported Turning Left
## 48441 3146 2633 2355
## Turning Right Other
## 982 3184
##
## $ACC_TYPENAME_2
##
## A1-Single Driver-Right Roadside Departure-Drive Off Road
## 1004
## B6-Single Driver-Left Roadside Departure-Drive Off Road
## 407
## C13-Single Driver-Forward Impact-Pedestrian/ Animal
## 47624
## M98-Other Crash Type
## 884
## Not Reported
## 8136
## Other
## 2686
##
## $L_STATUSNAME_2
##
## Expired Not licensed Not Reported Suspended Valid Other
## 636 2501 9226 2399 45437 542
##
## $L_TYPENAME_2
##
## Full Driver License Intermediate Driver License
## 47828 721
## Learner's Permit Not Licensed
## 213 2501
## Not Reported Other
## 9218 260
##
## $CDL_STATNAME_2
##
## Disqualified No (CDL) Not Reported Suspended Valid Other
## 149 45875 9248 131 5014 324
##
## $MDRDSTRDNAME_2
##
## Distraction/Inattention
## 276
## Inattention (Inattentive), Details Unknown
## 533
## Not Distracted
## 11548
## Not Reported
## 45743
## Reported as Unknown if Distracted
## 1372
## Other
## 1269
##
## $DRIMPAIRNAME_2
##
## Asleep or Fatigued
## 305
## Ill, Blackout
## 253
## None/Apparently Normal
## 27034
## Not Reported
## 28584
## Under the Influence of Alcohol, Drugs or Medication
## 3971
## Other
## 594
##
## $MDRMANAVNAME_2
##
## Driver Did Not Maneuver to Avoid
## 7082
## Motor Vehicle
## 160
## Not Reported
## 50283
## Pedestrian, Pedalcyclist or Other Non-Motorist
## 2952
## Phantom/Non-Contact Motor Vehicle
## 179
## Other
## 85
##
## $Day_Type
##
## Weekday Weekend
## 42325 18416
##
## $PSL
##
## 25_or_less 30_35 40_45 50_55 60_plus Other
## 5427 14474 18063 9752 8789 4236
keep_names <- c("CRASH_NUM1")
old_names <- names(dat3_lumped)
new_names <- ifelse(
old_names %in% keep_names,
old_names,
str_to_title(str_sub(str_replace_all(old_names, "_", ""), 1, 4))
)
new_names <- make.unique(new_names, sep = "")
name_key <- data.frame(
old_name = old_names,
new_name = new_names
)
names(dat3_lumped) <- new_names
names(dat3_lumped)
## [1] "Stat" "Year" "CRASH_NUM1" "Work" "Lati"
## [6] "Long" "Veto" "Fata" "Drun" "Hitr"
## [11] "Wrkz" "Relr" "Weat" "Lgtc" "Ruru"
## [16] "Func" "Pbse" "Pedc" "Pedp" "Motm"
## [21] "Pedl" "Peds" "Pedc1" "Injs" "Sexn"
## [26] "Drin" "Hazi" "Busu" "Emer" "Trav"
## [31] "Unde" "Roll" "Defo" "Vtra" "Vnum"
## [36] "Vspd" "Vali" "Vpro" "Vpav" "Vsur"
## [41] "Pcra" "Acct" "Lsta" "Ltyp" "Cdls"
## [46] "Mdrd" "Drim" "Mdrm" "Peda" "Drag"
## [51] "Dayt" "Veht" "Psl"
datatable(
name_key,
options = list(pageLength = 25, scrollX = TRUE),
caption = "Variable name key"
)
datatable(
head(dat3_lumped, 50),
options = list(scrollX = TRUE, pageLength = 10),
caption = "Final processed dataset"
)
write.csv(dat3_lumped, "Ped01.csv", row.names = FALSE)
write_xlsx(dat3_lumped, "Ped01.xlsx")
write.csv(name_key, "Ped01_variable_name_key.csv", row.names = FALSE)
write_xlsx(name_key, "Ped01_variable_name_key.xlsx")
write.csv(category_summary, "Ped01_category_summary.csv", row.names = FALSE)
write_xlsx(category_summary, "Ped01_category_summary.xlsx")
The following files are created in the working directory:
Ped01.csvPed01.xlsxPed01_variable_name_key.csvPed01_variable_name_key.xlsxPed01_category_summary.csvPed01_category_summary.xlsx