setwd("C:/Users/mvx13/OneDrive - Texas State University/Hackathon_Rohit/Papers/Ped_byVehType")
library(readxl)
## Warning: package 'readxl' was built under R version 4.2.2
library(data.table)
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'vip' was built under R version 4.2.3
##
## Attaching package: 'vip'
## The following object is masked from 'package:utils':
##
## vi
## Warning: package 'xgboost' was built under R version 4.2.3
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
## Warning: package 'ggplot2' was built under R version 4.2.3
library(readxl)
dat1= read_excel("LA_17_21_PedwithOtherVarFin.xlsx", sheet="Main")
names(dat1)
## [1] "CRASH_NUM1" "PED_NUM" "CRASH_PED_NUM1" "Ped_Impair"
## [5] "Ped_Act" "Ped_Act1" "Ped_Age" "Ped_BAC"
## [9] "Ped_Cl_Low" "Ped_Cl_Up" "Ped_Cond" "Ped_Cond1"
## [13] "Ped_Inj" "Ped_State" "Ped_Gen" "Ped_Rac"
## [17] "Cited" "Year" "Seve" "DrInj"
## [21] "DOW" "Season" "Access" "Align"
## [25] "PriContrib" "HAR" "Lighting" "Loc"
## [29] "CrashType" "NumVeh" "RoadCond" "RoadRel"
## [33] "RoadType" "TrkBus" "Weather" "HwyType"
## [37] "NumOcc" "MostHarm" "DrAlcDrug" "DrAge"
## [41] "DrCond" "DrDistract" "DrRace" "DrGen"
## [45] "MovReason" "NumOccV" "PSL" "PriorMov"
## [49] "TCD" "TCDCond" "VehCond" "VehLight"
## [53] "VehType" "Violation" "VisObs" "City"
## [57] "Parish" "Int" "ESTAlc" "Ped"
## [61] "RwD" "LaneDepart" "Latitude" "Longitude"
dat2= dat1[, c(1, 2, 3, 18, 20, 19, 6, 7, 12, 13, 15, 16, 21:55)]
names(dat2)
## [1] "CRASH_NUM1" "PED_NUM" "CRASH_PED_NUM1" "Year"
## [5] "DrInj" "Seve" "Ped_Act1" "Ped_Age"
## [9] "Ped_Cond1" "Ped_Inj" "Ped_Gen" "Ped_Rac"
## [13] "DOW" "Season" "Access" "Align"
## [17] "PriContrib" "HAR" "Lighting" "Loc"
## [21] "CrashType" "NumVeh" "RoadCond" "RoadRel"
## [25] "RoadType" "TrkBus" "Weather" "HwyType"
## [29] "NumOcc" "MostHarm" "DrAlcDrug" "DrAge"
## [33] "DrCond" "DrDistract" "DrRace" "DrGen"
## [37] "MovReason" "NumOccV" "PSL" "PriorMov"
## [41] "TCD" "TCDCond" "VehCond" "VehLight"
## [45] "VehType" "Violation" "VisObs"
dat2= dat2 %>% mutate_if(is.character, as.factor)
summary(dat2)
## CRASH_NUM1 PED_NUM CRASH_PED_NUM1
## LA17_170309132154541: 23 Min. : 1.000 LA17_1405131936324181: 1
## LA18_20180039626 : 9 1st Qu.: 1.000 LA17_17-0020881 : 1
## LA17_170712141541690: 8 Median : 1.000 LA17_17000092101 : 1
## LA1920190012302 : 6 Mean : 1.115 LA17_17000102501 : 1
## LA17_20170012722 : 5 3rd Qu.: 1.000 LA17_1701021529437921: 1
## LA17_6246316 : 5 Max. :23.000 LA17_1701021937334951: 1
## (Other) :8157 (Other) :8207
## Year DrInj Seve
## Min. :2017 A: 17 A: 951
## 1st Qu.:2018 B: 102 B:3043
## Median :2019 C: 456 C:2719
## Mean :2019 K: 5 K: 780
## 3rd Qu.:2020 O:7633 O: 720
## Max. :2021
##
## Ped_Act1 Ped_Age
## Crossing, Entering Road At Intersection :1882 25-45 years :2848
## Crossing, Entering Road Not At Intersection:1778 46-65 years :2071
## Other :1159 Children : 686
## Walking In Road - With Traffic : 932 Infant : 39
## Unknown : 775 Older than 65 years: 559
## Not In Roadway : 653 Unknown : 396
## (Other) :1034 Young :1614
## Ped_Cond1 Ped_Inj Ped_Gen Ped_Rac DOW
## Normal :3324 A: 874 F :2816 B :4365 Weekday:6022
## Unknown :2316 B:2970 M :5160 I : 4 Weekend:2191
## Inattentive :1612 C:2780 Unknown: 237 O : 260
## Alc Impaired: 381 K: 734 Unknown: 272
## Other : 246 O: 855 W :3312
## Distracted : 181
## (Other) : 153
## Season Access Align
## Autumn:2280 Full Control : 344 Curve : 243
## Spring:2034 No Control :7052 Dip : 15
## Summer:1700 Other : 105 Hillcrest: 46
## Winter:2199 Partial Control: 712 On Grade : 97
## Other : 56
## Straight :7756
##
## PriContrib HAR Lighting
## Violations :4223 Min. :0.0000 Dark - Cont St Lts :2191
## Pedestrian :2095 1st Qu.:0.0000 Dark - No St Lts :1222
## Move Prior to Crash:1223 Median :0.0000 Dark - St Lts at Int: 539
## Dr Condition : 264 Mean :0.2809 Daylight :3940
## Lighting : 105 3rd Qu.:1.0000 Dusk/Dawn : 239
## Vision Obscure : 100 Max. :1.0000 Other : 82
## (Other) : 203
## Loc CrashType NumVeh
## Business Cont :2087 NC WMV :5597 Multiple: 266
## Business Residen :2813 Other : 999 Single :7312
## Manufac/Industrial: 148 Right Angle: 528 Two : 635
## Open Country : 411 Rear End : 343
## Other : 131 Sideswipe : 307
## Residential :2515 Left Turn : 228
## School : 108 (Other) : 211
## RoadCond RoadRel
## No Abnormalities :7740 Beyond ROW : 24
## Other : 201 Median : 18
## Construction : 56 On Roadway :7339
## Water On Roadway : 49 Other : 189
## Previous Crash : 48 Shoulder : 391
## Object In Roadway: 38 Shoulder Left : 90
## (Other) : 81 Shoulder Right: 162
## RoadType TrkBus Weather
## One-Way Road :1025 Min. :0.0000 Clear :6467
## Other : 155 1st Qu.:0.0000 Cloudy :1064
## Two-Way Road Div. :1834 Median :0.0000 Fog/Smoke : 66
## Two-Way Road Undiv. :4944 Mean :0.0347 Other : 84
## Two-Way Road With Barrier: 255 3rd Qu.:0.0000 Rain : 524
## Max. :1.0000 Sleet/Hail/Snow: 8
##
## HwyType NumOcc MostHarm
## City Street:4102 Multiple: 860 Other :5940
## Interstate : 331 NA : 1 MV in Transport :1872
## Other : 53 Single :6267 Unknown : 203
## Parish Road:1140 Two :1085 Parked Motor Vehicle: 144
## State Hwy :1682 Animal : 20
## Toll Road : 3 Ditch : 14
## U.S. Hwy : 902 (Other) : 20
## DrAlcDrug DrAge
## Alcohol And Drugs Present: 84 25-45 years :2614
## Alcohol Present : 281 46-65 years :1776
## Drugs Present : 35 Older than 65 years: 778
## Neither Alc/Drugs :5439 Unknown :1807
## Unknown :2374 Young :1238
##
##
## DrCond DrDistract DrRace
## Normal :4084 Cell Phone : 50 Afri-Amcn:3152
## Unknown :2156 Not Distracted :4492 Caucasian:3119
## Inattentive :1237 Other Electronic Device: 11 Other :1942
## Distracted : 224 Other Inside Vehicle : 120
## Other : 199 Other Outside Vehicle : 258
## Alc (Impaired): 198 Unknown :3282
## (Other) : 115
## DrGen MovReason NumOccV
## F :2632 Normal Movement :3771 Multiple: 413
## M :3891 Unknown :1753 Single :7008
## Unknown:1690 Driver Violation :1358 Two : 792
## To Avoid Pedestrian: 411
## Driver Condition : 316
## Other : 308
## (Other) : 296
## PSL PriorMov
## 25 MPH or Lower :3129 Changing Lanes On Multi-Lane Road: 74
## 30-45 MPH :3938 Making Left Turn : 725
## 50-60 MPH : 949 Other :1534
## 65-70 MPH : 188 Other Or Unknown : 736
## 75 MPH and Above: 9 Proceeding Straight Ahead :4864
## Slowing To Stop : 94
## Stopped : 186
## TCD TCDCond
## No Control :1950 Defective Markings: 20
## White Dashed Line :1797 Functioning :5686
## Yellow No Passing Line: 790 No Controls :1763
## Yellow Dashed Line : 745 Not Functioning : 17
## Stop Sign : 737 Obscured : 6
## Green Signal On : 634 Unknown : 721
## (Other) :1560
## VehCond VehLight
## Defective : 76 Daytime Running Lights: 508
## No Defects Observed:5881 Headlights Off :1364
## Other : 136 Headlights On :3398
## Unknown :2120 Unknown :2943
##
##
##
## VehType Violation
## Car :3516 No Violations :3572
## Light Truck/Van :2023 Other :2465
## SUV :1829 Unknown :1280
## Other : 516 Failure to Yield : 579
## Medium/Large Truck: 201 Disregarded Traffic Control: 128
## Motorcycle : 56 Improper Backing : 96
## (Other) : 72 (Other) : 93
## VisObs
## Moving Vehicles : 94
## No Obscurements :5337
## Other : 482
## Rain/Snow Winshield: 155
## Unknown :2145
##
##
##
## Bicycle Bus Car Emergency Vehicle
## 11 20 3516 16
## Farm Equipment Light Truck/Van Medium/Large Truck Motor Home
## 1 2023 201 4
## Motorcycle Other School Bus SUV
## 56 516 20 1829
suv1= subset(dat2, VehType== "Light Truck/Van"| VehType== "SUV")
dim(suv1)
## [1] 3852 47
suv2= suv1[, c("Ped_Inj", "Ped_Gen", "CrashType", "Ped_Age", "Ped_Act1", "Ped_Cond1", "HwyType", "Violation", "TCD",
"DrAge", "Loc", "PriorMov", "PSL", "Lighting", "Season", "DrCond", "VisObs", "RoadType",
"DrRace", "VehLight", "Ped_Rac", "MostHarm", "Weather", "NumOcc", "RoadRel", "VehType",
"RoadCond", "DrGen", "Access", "TCDCond", "VehCond", "NumVeh", "DOW", "Align",
"HAR", "TrkBus")]
oldnames <- c("Ped_Inj", "Ped_Gen", "CrashType", "Ped_Age", "Ped_Act1", "Ped_Cond1", "HwyType", "Violation", "TCD",
"DrAge", "Loc", "PriorMov", "PSL", "Lighting", "Season", "DrCond", "VisObs", "RoadType",
"DrRace", "VehLight", "Ped_Rac", "MostHarm", "Weather", "NumOcc", "RoadRel", "VehType",
"RoadCond", "DrGen", "Access", "TCDCond", "VehCond", "NumVeh", "DOW", "Align",
"HAR", "TrkBus")
newnames <- c("PIn", "PGn", "CTy", "PAg", "PAc", "PCn", "HTy", "Vln", "TCD",
"DAg", "Loc", "PMv", "PSL", "Lgh", "Ssn", "DCn", "VsO", "RTy",
"DRc", "VLg", "PRc", "MHr", "Wth", "NOc", "RRl", "VTy",
"RCn", "DGn", "Acs", "TCn", "VCn", "NVh", "DOW", "Aln",
"HAR", "TBs")
on= as.data.frame(oldnames)
on$varid= 1:nrow(on)
nn= as.data.frame(newnames)
nn$varid= 1:nrow(nn)
onn= left_join(on, nn, by="varid")
head(onn)
## oldnames varid newnames
## 1 Ped_Inj 1 PIn
## 2 Ped_Gen 2 PGn
## 3 CrashType 3 CTy
## 4 Ped_Age 4 PAg
## 5 Ped_Act1 5 PAc
## 6 Ped_Cond1 6 PCn
suv2 = suv2 %>% rename_with(oldnames, .fn = ~ newnames)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(oldnames)
##
## # Now:
## data %>% select(all_of(oldnames))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
library(xgboost)
# Fit a GBM
set.seed(102) # for reproducibility
bst <- xgboost(
data = data.matrix(subset(suv2, select = -PIn)),
label = suv2$PIn,
objective = "reg:squarederror",
nrounds = 100,
max_depth = 5,
eta = 0.3,
verbose = 0 # suppress printing
)
library(vip)
vi_bst <- xgb.importance(model = bst)
vi(bst)
## # A tibble: 35 × 2
## Variable Importance
## <chr> <dbl>
## 1 PGn 0.118
## 2 CTy 0.0739
## 3 PAg 0.0668
## 4 TCD 0.0582
## 5 PCn 0.0524
## 6 PAc 0.0509
## 7 HTy 0.0426
## 8 Lgh 0.0382
## 9 Ssn 0.0373
## 10 Vln 0.0359
## # ℹ 25 more rows
library(ggplot2)
vip(bst, num_features=42) +theme_bw(base_size=14)

suv3= suv2[, c("PIn","PGn", "CTy", "PAg", "PAc", "PCn", "HTy",
"Vln", "TCD", "DAg", "Loc", "PMv", "PSL", "Lgh",
"Ssn", "DCn", "VsO", "RTy", "DRc", "VLg",
"PRc", "MHr")]
dim(suv3)
## [1] 3852 22
suv4= suv3 %>% mutate_if(is.character, as.factor)
library(forcats)
## Warning: package 'forcats' was built under R version 4.2.3
library(dplyr)
library(forcats)
factor_columns= c("PGn","CTy", "PAg", "PAc", "PCn", "HTy",
"Vln", "TCD", "DAg", "Loc", "PMv", "PSL", "Lgh",
"Ssn", "DCn", "VsO", "RTy", "DRc", "VLg",
"PRc", "MHr")
suv4a = suv4 %>%
mutate(across(factor_columns, fct_lump_n, n = 2,other_level = 'other'))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(factor_columns, fct_lump_n, n = 2, other_level =
## "other")`.
## Caused by warning:
## ! Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(factor_columns)
##
## # Now:
## data %>% select(all_of(factor_columns))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
suv4a$id= 1:nrow(suv4a)
dim(suv4a)
## [1] 3852 23
library(data.table)
library(mltools)
## Warning: package 'mltools' was built under R version 4.2.3
suv4b <- one_hot(as.data.table(suv4a[, -c(1)]))
suv4b$id= 1:nrow(suv4b)
dim(suv4b)
## [1] 3852 64
suv4c= left_join(suv4a[, c(1, 23)], suv4b, by="id")
head(suv4c)
## # A tibble: 6 × 65
## PIn id PGn_F PGn_M PGn_other `CTy_NC WMV` CTy_Other CTy_other
## <fct> <int> <int> <int> <int> <int> <int> <int>
## 1 C 1 0 1 0 0 1 0
## 2 C 2 0 1 0 1 0 0
## 3 O 3 0 1 0 0 0 1
## 4 B 4 0 1 0 0 0 1
## 5 C 5 0 1 0 0 0 1
## 6 B 6 1 0 0 0 0 1
## # ℹ 57 more variables: `PAg_25-45 years` <int>, `PAg_46-65 years` <int>,
## # PAg_other <int>, `PAc_Crossing, Entering Road At Intersection` <int>,
## # `PAc_Crossing, Entering Road Not At Intersection` <int>, PAc_other <int>,
## # PCn_Normal <int>, PCn_Unknown <int>, PCn_other <int>,
## # `HTy_City Street` <int>, `HTy_State Hwy` <int>, HTy_other <int>,
## # `Vln_No Violations` <int>, Vln_Other <int>, Vln_other <int>,
## # `TCD_No Control` <int>, `TCD_White Dashed Line` <int>, TCD_other <int>, …
## names(suv4c) = gsub(pattern = "`*`", replacement = "", x = names(suv4c))
## head(suv4c)
names(suv4c)
## [1] "PIn"
## [2] "id"
## [3] "PGn_F"
## [4] "PGn_M"
## [5] "PGn_other"
## [6] "CTy_NC WMV"
## [7] "CTy_Other"
## [8] "CTy_other"
## [9] "PAg_25-45 years"
## [10] "PAg_46-65 years"
## [11] "PAg_other"
## [12] "PAc_Crossing, Entering Road At Intersection"
## [13] "PAc_Crossing, Entering Road Not At Intersection"
## [14] "PAc_other"
## [15] "PCn_Normal"
## [16] "PCn_Unknown"
## [17] "PCn_other"
## [18] "HTy_City Street"
## [19] "HTy_State Hwy"
## [20] "HTy_other"
## [21] "Vln_No Violations"
## [22] "Vln_Other"
## [23] "Vln_other"
## [24] "TCD_No Control"
## [25] "TCD_White Dashed Line"
## [26] "TCD_other"
## [27] "DAg_25-45 years"
## [28] "DAg_46-65 years"
## [29] "DAg_other"
## [30] "Loc_Business Residen"
## [31] "Loc_Residential"
## [32] "Loc_other"
## [33] "PMv_Other"
## [34] "PMv_Proceeding Straight Ahead"
## [35] "PMv_other"
## [36] "PSL_25 MPH or Lower"
## [37] "PSL_30-45 MPH"
## [38] "PSL_other"
## [39] "Lgh_Dark - Cont St Lts"
## [40] "Lgh_Daylight"
## [41] "Lgh_other"
## [42] "Ssn_Autumn"
## [43] "Ssn_Winter"
## [44] "Ssn_other"
## [45] "DCn_Normal"
## [46] "DCn_Unknown"
## [47] "DCn_other"
## [48] "VsO_No Obscurements"
## [49] "VsO_Unknown"
## [50] "VsO_other"
## [51] "RTy_Two-Way Road Div."
## [52] "RTy_Two-Way Road Undiv."
## [53] "RTy_other"
## [54] "DRc_Afri-Amcn"
## [55] "DRc_Caucasian"
## [56] "DRc_other"
## [57] "VLg_Headlights On"
## [58] "VLg_Unknown"
## [59] "VLg_other"
## [60] "PRc_B"
## [61] "PRc_W"
## [62] "PRc_other"
## [63] "MHr_MV in Transport"
## [64] "MHr_Other"
## [65] "MHr_other"
suv4d <- setNames(names(suv4c),
abbreviate(names(suv4c)))
names(suv4d)
## [1] "PIn" "id" "PG_F" "PG_M" "PGn_" "CT_W" "CT_O" "CTy_"
## [9] "PA_2y" "PA_4y" "PAg_" "PERAI" "PERNAI" "PAc_" "PC_N" "PC_U"
## [17] "PCn_" "HT_S" "HT_H" "HTy_" "V_NV" "Vl_O" "Vln_" "TCDC"
## [25] "TCDL" "TCD_" "DA_2y" "DA_4y" "DAg_" "L_BR" "Lc_R" "Lc_t"
## [33] "PM_O" "PMSA" "PMv_" "PMoL" "PSLM" "PSL_" "L-CSL" "Lg_D"
## [41] "Lgh_" "Ss_A" "Ss_W" "Ssn_" "DC_N" "DC_U" "DCn_" "VO_O"
## [49] "VO_U" "VsO_" "RTRD" "RTRU" "RTy_" "DR_A" "DR_C" "DRc_"
## [57] "VL_O" "VL_U" "VLg_" "PR_B" "PR_W" "PRc_" "MHiT" "MH_O"
## [65] "MHr_"
suv4e = suv4c %>% rename_with(names(suv4c), .fn = ~ names(suv4d))
head(suv4e)
## # A tibble: 6 × 65
## PIn id PG_F PG_M PGn_ CT_W CT_O CTy_ PA_2y PA_4y PAg_ PERAI PERNAI
## <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 C 1 0 1 0 0 1 0 0 0 1 0 1
## 2 C 2 0 1 0 1 0 0 0 0 1 0 1
## 3 O 3 0 1 0 0 0 1 0 0 1 0 0
## 4 B 4 0 1 0 0 0 1 0 1 0 0 0
## 5 C 5 0 1 0 0 0 1 1 0 0 0 0
## 6 B 6 1 0 0 0 0 1 1 0 0 0 0
## # ℹ 52 more variables: PAc_ <int>, PC_N <int>, PC_U <int>, PCn_ <int>,
## # HT_S <int>, HT_H <int>, HTy_ <int>, V_NV <int>, Vl_O <int>, Vln_ <int>,
## # TCDC <int>, TCDL <int>, TCD_ <int>, DA_2y <int>, DA_4y <int>, DAg_ <int>,
## # L_BR <int>, Lc_R <int>, Lc_t <int>, PM_O <int>, PMSA <int>, PMv_ <int>,
## # PMoL <int>, PSLM <int>, PSL_ <int>, `L-CSL` <int>, Lg_D <int>, Lgh_ <int>,
## # Ss_A <int>, Ss_W <int>, Ssn_ <int>, DC_N <int>, DC_U <int>, DCn_ <int>,
## # VO_O <int>, VO_U <int>, VsO_ <int>, RTRD <int>, RTRU <int>, RTy_ <int>, …
## PIn id PG_F PG_M PGn_
## A: 391 Min. : 1.0 Min. :0.0000 Min. :0.0000 Min. :0.000
## B:1331 1st Qu.: 963.8 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## C:1348 Median :1926.5 Median :0.0000 Median :1.0000 Median :0.000
## K: 380 Mean :1926.5 Mean :0.3494 Mean :0.6236 Mean :0.027
## O: 402 3rd Qu.:2889.2 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :3852.0 Max. :1.0000 Max. :1.0000 Max. :1.000
## CT_W CT_O CTy_ PA_2y
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.6882 Mean :0.1197 Mean :0.1921 Mean :0.3528
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## PA_4y PAg_ PERAI PERNAI
## Min. :0.0000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.2552 Mean :0.392 Mean :0.2323 Mean :0.2126
## 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.000 Max. :1.0000 Max. :1.0000
## PAc_ PC_N PC_U PCn_
## Min. :0.000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.555 Mean :0.4123 Mean :0.2721 Mean :0.3157
## 3rd Qu.:1.000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## HT_S HT_H HTy_ V_NV
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4592 Mean :0.2214 Mean :0.3193 Mean :0.4792
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Vl_O Vln_ TCDC TCDL
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.2892 Mean :0.2316 Mean :0.2251 Mean :0.2251
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## TCD_ DA_2y DA_4y DAg_
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.5498 Mean :0.3313 Mean :0.2747 Mean :0.3941
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## L_BR Lc_R Lc_t PM_O
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.000
## Mean :0.3424 Mean :0.2934 Mean :0.3642 Mean :0.197
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.000
## PMSA PMv_ PMoL PSLM
## Min. :0.0000 Min. :0.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :1.0000 Median :0.00 Median :0.0000 Median :0.0000
## Mean :0.5929 Mean :0.21 Mean :0.3614 Mean :0.4844
## 3rd Qu.:1.0000 3rd Qu.:0.00 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.00 Max. :1.0000 Max. :1.0000
## PSL_ L-CSL Lg_D Lgh_
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1542 Mean :0.2378 Mean :0.4995 Mean :0.2627
## 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## Ss_A Ss_W Ssn_ DC_N
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.2809 Mean :0.2684 Mean :0.4507 Mean :0.5449
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## DC_U DCn_ VO_O VO_U
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.2022 Mean :0.2529 Mean :0.7147 Mean :0.1999
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## VsO_ RTRD RTRU RTy_
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :1.0000 Median :0.0000
## Mean :0.08541 Mean :0.2253 Mean :0.6077 Mean :0.1669
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## DR_A DR_C DRc_ VL_O
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3432 Mean :0.4821 Mean :0.1747 Mean :0.4463
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## VL_U VLg_ PR_B PR_W
## Min. :0.0000 Min. :0.0000 Min. :0.000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.000 Median :0.0000
## Mean :0.2985 Mean :0.2552 Mean :0.507 Mean :0.4356
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.000 Max. :1.0000
## PRc_ MHiT MH_O MHr_
## Min. :0.00000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :1.0000 Median :0.00000
## Mean :0.05737 Mean :0.2227 Mean :0.7357 Mean :0.04154
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000 Max. :1.00000
on= as.data.frame(names(suv4c))
on$varid= 1:nrow(on)
nn= as.data.frame(names(suv4d))
nn$varid= 1:nrow(nn)
onn= left_join(on, nn, by="varid")
head(onn)
## names(suv4c) varid names(suv4d)
## 1 PIn 1 PIn
## 2 id 2 id
## 3 PGn_F 3 PG_F
## 4 PGn_M 4 PG_M
## 5 PGn_other 5 PGn_
## 6 CTy_NC WMV 6 CT_W