library(summarytools)
## Warning: package 'summarytools' was built under R version 3.4.3
setwd("/Volumes/Seagate Backup Plus Drive/DataSets/LA_Data/Crash/csv/2010")
library(data.table)
## Warning: package 'data.table' was built under R version 3.4.2
dat1 <- fread("2010DOT_TB.csv", na.strings=c("NA", "", " ","NaN"))
dim(dat1)
[1] 147831 40
dfSummary(dat1, style='grid', plain.ascii = FALSE, graph.col = FALSE)
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2017c.
## 1.0/zoneinfo/America/Chicago'
dat1
N: 147831
No | Variable | Stats / Values | Freqs (% of Valid) | Valid | Missing |
---|---|---|---|---|---|
1 |
crash_num [character] |
|
1 (0.0%) |
147830 (100%) |
1 (0%) |
2 |
CRASH_NUM1 [character] |
|
1 (0.0%) |
147831 (100%) |
0 (0%) |
3 |
dotd_crash_num [integer] |
mean (sd) : 110519842.9 (152445720.79) |
147656 distinct val. |
147805 (99.98%) |
26 (0.02%) |
4 |
hwy_type_cd [character] |
|
14339 ( 9.8%) |
146469 (99.08%) |
1362 (0.92%) |
5 |
pri_hwy_num [integer] |
mean (sd) : 233.83 (649.78) |
974 distinct val. |
146959 (99.41%) |
872 (0.59%) |
6 |
bypass_cd [character] |
|
2207 (41.1%) |
5373 (3.63%) |
142458 (96.37%) |
7 |
milepost [numeric] |
mean (sd) : 44.5 (150.96) |
18223 distinct val. |
147831 (100%) |
0 (0%) |
8 |
stl_route [character] |
|
7420 ( 8.4%) |
88392 (59.79%) |
59439 (40.21%) |
9 |
adt [integer] |
mean (sd) : 28670.78 (30035.63) |
725 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
10 |
control_section [character] |
|
1666 ( 1.9%) |
87559 (59.23%) |
60272 (40.77%) |
11 |
dotd_district [integer] |
mean (sd) : 20.7 (26.39) |
2 : 41730 (28.2%) |
147816 (99.99%) |
15 (0.01%) |
12 |
functional_class [integer] |
mean (sd) : 12.19 (4.26) |
13 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
13 |
highway_class [integer] |
mean (sd) : 5.52 (2.19) |
1 : 12467 (14.4%) |
86618 (58.59%) |
61213 (41.41%) |
14 |
logmile [numeric] |
mean (sd) : 5.27 (5.62) |
3054 distinct val. |
87559 (59.23%) |
60272 (40.77%) |
15 |
logmile_from [numeric] |
mean (sd) : 4.61 (5.49) |
1625 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
16 |
logmile_to [numeric] |
mean (sd) : 5.86 (5.84) |
1761 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
17 |
latitude [numeric] |
mean (sd) : 30.52 (2.69) |
65064 distinct val. |
117341 (79.38%) |
30490 (20.62%) |
18 |
longitude [numeric] |
mean (sd) : -91.4 (3.45) |
71682 distinct val. |
117292 (79.34%) |
30539 (20.66%) |
19 |
median_width [integer] |
mean (sd) : 27.39 (56.27) |
75 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
20 |
milepoint [numeric] |
mean (sd) : 64.64 (84.82) |
17804 distinct val. |
86618 (58.59%) |
61213 (41.41%) |
21 |
milepost_from [numeric] |
mean (sd) : 70.58 (113.85) |
4501 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
22 |
milepost_to [numeric] |
mean (sd) : 72.2 (113.81) |
4652 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
23 |
num_lanes [integer] |
mean (sd) : 3.48 (1.43) |
9 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
24 |
parish_cd [integer] |
mean (sd) : 28.9 (15.57) |
65 distinct val. |
146980 (99.42%) |
851 (0.58%) |
25 |
pavement_type [integer] |
mean (sd) : 61.44 (9.98) |
6 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
26 |
pavement_width [integer] |
mean (sd) : 41.97 (18.35) |
73 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
27 |
section_length [numeric] |
mean (sd) : 1.77 (1.91) |
849 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
28 |
hwy_type_flag [integer] |
mean (sd) : 0 (0.04) |
2 distinct val. |
147831 (100%) |
0 (0%) |
29 |
hwy_num_flag [integer] |
mean (sd) : 0 (0.04) |
2 distinct val. |
147831 (100%) |
0 (0%) |
30 |
bypass_flag [integer] |
mean (sd) : 0.04 (0.19) |
2 distinct val. |
147831 (100%) |
0 (0%) |
31 |
milepost_flag [integer] |
mean (sd) : 2.8 (2.44) |
3 distinct val. |
147831 (100%) |
0 (0%) |
32 |
direction [character] |
|
84850 (97.0%) |
87460 (59.16%) |
60371 (40.84%) |
33 |
section_oid [integer] |
mean (sd) : 289416670.63 (279838654.09) |
7396 distinct val. |
86618 (58.59%) |
61213 (41.41%) |
34 |
type_acc [character] |
|
112572 (76.1%) |
147831 (100%) |
0 (0%) |
35 |
urban_area [integer] |
mean (sd) : 2.85 (1.22) |
5 distinct val. |
88392 (59.79%) |
59439 (40.21%) |
36 |
intersection [integer] |
mean (sd) : 0.36 (0.48) |
2 distinct val. |
147831 (100%) |
0 (0%) |
37 |
milepoint_from [numeric] |
mean (sd) : 63.74 (84.85) |
4418 distinct val. |
86618 (58.59%) |
61213 (41.41%) |
38 |
milepoint_to [numeric] |
mean (sd) : 65.55 (84.79) |
4601 distinct val. |
86618 (58.59%) |
61213 (41.41%) |
39 |
lrs_id [character] |
|
639 ( 1.1%) |
57916 (39.18%) |
89915 (60.82%) |
40 |
lrs_logmile [numeric] |
mean (sd) : 5.22 (5.64) |
2826 distinct val. |
57916 (39.18%) |
89915 (60.82%) |
dat2 <- fread("2010CRASH_TB.csv", na.strings=c("NA", "", " ","NaN"))
## Warning in fread("2010CRASH_TB.csv", na.strings = c("NA", "", " ", "NaN")):
## Bumped column 124 to type character on data row 1958, field contains
## '1138-2'. Coercing previously read values in this column from logical,
## integer or numeric back to character which may not be lossless; e.g., if
## '00' and '000' occurred before they will now be just '0', and there may
## be inconsistencies with treatment of ',,' and ',NA,' too (if they occurred
## in this column before the bump). If this matters please rerun and set
## 'colClasses' to 'character' for this column. Please note that column type
## detection uses a sample of 1,000 rows (100 rows at 10 points) so hopefully
## this message should be very rare. If reporting to datatable-help, please
## rerun and include the output from verbose=TRUE.
Read 94.7% of 147869 rows Read 147869 rows and 154 (of 154) columns from 0.092 GB file in 00:00:03
dim(dat2)
[1] 147869 154
dfSummary(dat2, style='grid', plain.ascii = FALSE, graph.col = FALSE)
dat2
N: 147869
No | Variable | Stats / Values | Freqs (% of Valid) | Valid | Missing |
---|---|---|---|---|---|
1 |
CRASH_NUM [character] |
|
1 (0.0%) |
147868 (100%) |
1 (0%) |
2 |
CRASH_NUM1 [character] |
|
1 (0.0%) |
147869 (100%) |
0 (0%) |
3 |
ISDS_CODE [character] |
|
29554 (88.1%) |
33538 (22.68%) |
114331 (77.32%) |
4 |
CRASH_DATE [integer] |
mean (sd) : 40359.83 (105.3) |
403 distinct val. |
147869 (100%) |
0 (0%) |
5 |
CRASH_TIME [numeric] |
mean (sd) : 367.54 (3.03) |
1450 distinct val. |
147869 (100%) |
0 (0%) |
6 |
CR_MONTH [integer] |
mean (sd) : 6.5 (3.43) |
12 distinct val. |
147869 (100%) |
0 (0%) |
7 |
CR_HOUR [integer] |
mean (sd) : 14.22 (5.32) |
24 distinct val. |
147869 (100%) |
0 (0%) |
8 |
LAT [numeric] |
mean (sd) : 18.8 (15.56) |
64838 distinct val. |
147869 (100%) |
0 (0%) |
9 |
LONG [numeric] |
mean (sd) : 55.31 (44.55) |
66165 distinct val. |
147869 (100%) |
0 (0%) |
10 |
ACCESS_CNTL_CD [character] |
|
120461 (81.7%) |
147449 (99.72%) |
420 (0.28%) |
11 |
ALCOHOL [logical] |
141322 (95.6%) |
147869 (100%) |
0 (0%) |
|
12 |
ALIGNMENT_CD [character] |
|
125978 (85.3%) |
147678 (99.87%) |
191 (0.13%) |
13 |
AMBULANCE [logical] |
117366 (79.4%) |
147869 (100%) |
0 (0%) |
|
14 |
AMB_SERVICE [character] |
|
6705 (22.3%) |
30025 (20.31%) |
117844 (79.69%) |
15 |
BYPASS_CD [character] |
|
2308 (33.4%) |
6902 (4.67%) |
140967 (95.33%) |
16 |
CITY_CD [integer] |
mean (sd) : 2.62 (4.47) |
28 distinct val. |
147869 (100%) |
0 (0%) |
17 |
PRI_CONTRIB_FAC_CD [character] |
|
111589 (75.8%) |
147320 (99.63%) |
549 (0.37%) |
18 |
SEC_CONTRIB_FAC_CD [character] |
|
95330 (72.8%) |
130918 (88.54%) |
16951 (11.46%) |
19 |
DAY_OF_WK [character] |
|
25981 (17.6%) |
147869 (100%) |
0 (0%) |
20 |
DIST_ZONE [character] |
|
2907 ( 3.0%) |
97407 (65.87%) |
50462 (34.13%) |
21 |
PUB_PROP_DAM [logical] |
142923 (96.7%) |
147869 (100%) |
0 (0%) |
|
22 |
RR_TRAIN_INV [logical] |
147762 (99.9%) |
147869 (100%) |
0 (0%) |
|
23 |
HIT_AND_RUN [logical] |
131116 (88.7%) |
147869 (100%) |
0 (0%) |
|
24 |
DRUGS [logical] |
146094 (98.8%) |
147869 (100%) |
0 (0%) |
|
25 |
FIRE_DEPT [character] |
|
637 ( 5.0%) |
12721 (8.6%) |
135148 (91.4%) |
26 |
INTER_ROAD [character] |
|
973 ( 0.7%) |
139404 (94.28%) |
8465 (5.72%) |
27 |
INVEST_AGENCY_CD [character] |
|
34271 (23.2%) |
147770 (99.93%) |
99 (0.07%) |
28 |
INVEST_COMP [logical] |
12180 ( 8.2%) |
147869 (100%) |
0 (0%) |
|
29 |
INVEST_OFFICER [character] |
|
358 ( 0.2%) |
144591 (97.78%) |
3278 (2.22%) |
30 |
LIGHTING_CD [character] |
|
106781 (72.6%) |
147160 (99.52%) |
709 (0.48%) |
31 |
LOC_TYPE_CD [character] |
|
3577 ( 2.4%) |
147555 (99.79%) |
314 (0.21%) |
32 |
MAN_COLL_CD [character] |
|
50541 (35.0%) |
144326 (97.6%) |
3543 (2.4%) |
33 |
MILEPOST [numeric] |
mean (sd) : 44.92 (1321.71) |
7494 distinct val. |
147868 (100%) |
1 (0%) |
34 |
NUM_DRI_IK [integer] |
mean (sd) : 0.31 (0.57) |
7 distinct val. |
147869 (100%) |
0 (0%) |
35 |
NUM_DRI_INJ [integer] |
mean (sd) : 0.3 (0.57) |
7 distinct val. |
147869 (100%) |
0 (0%) |
36 |
NUM_DRI_KIL [integer] |
mean (sd) : 0 (0.06) |
4 distinct val. |
147869 (100%) |
0 (0%) |
37 |
NUM_OCC_IK [integer] |
mean (sd) : 0.15 (0.63) |
27 distinct val. |
147869 (100%) |
0 (0%) |
38 |
NUM_OCC_INJ [integer] |
mean (sd) : 0.15 (0.63) |
27 distinct val. |
147869 (100%) |
0 (0%) |
39 |
NUM_OCC_KIL [integer] |
mean (sd) : 0 (0.04) |
4 distinct val. |
147869 (100%) |
0 (0%) |
40 |
NUM_PED_IK [integer] |
mean (sd) : 0.01 (0.1) |
7 distinct val. |
147869 (100%) |
0 (0%) |
41 |
NUM_PED_INJ [integer] |
mean (sd) : 0.01 (0.1) |
7 distinct val. |
147869 (100%) |
0 (0%) |
42 |
NUM_PED_KIL [integer] |
mean (sd) : 0 (0.02) |
2 distinct val. |
147869 (100%) |
0 (0%) |
43 |
NUM_TOT_IK [integer] |
mean (sd) : 0.47 (0.98) |
30 distinct val. |
147869 (100%) |
0 (0%) |
44 |
NUM_TOT_INJ [integer] |
mean (sd) : 0.46 (0.97) |
30 distinct val. |
147869 (100%) |
0 (0%) |
45 |
NUM_TOT_KIL [integer] |
mean (sd) : 0 (0.08) |
5 distinct val. |
147869 (100%) |
0 (0%) |
46 |
NUM_VEH [integer] |
mean (sd) : 1.89 (0.53) |
11 distinct val. |
147869 (100%) |
0 (0%) |
47 |
PARISH_CD [integer] |
mean (sd) : 28.91 (15.55) |
65 distinct val. |
147869 (100%) |
0 (0%) |
48 |
PRI_HWY_NUM [integer] |
mean (sd) : 251.95 (696.56) |
1007 distinct val. |
147848 (99.99%) |
21 (0.01%) |
49 |
PRI_ROAD_DIR [character] |
|
5944 (24.4%) |
24342 (16.46%) |
123527 (83.54%) |
50 |
PRI_ROAD_NUM [numeric] |
mean (sd) : 841.02 (2602.6) |
3525 distinct val. |
147726 (99.9%) |
143 (0.1%) |
51 |
PRI_ROAD_NAME [character] |
|
2337 ( 1.7%) |
140546 (95.05%) |
7323 (4.95%) |
52 |
PRI_ROAD_TYPE [character] |
|
13587 (17.4%) |
77932 (52.7%) |
69937 (47.3%) |
53 |
PRI_DIST [numeric] |
mean (sd) : 57.98 (2658.1) |
1500 distinct val. |
145701 (98.53%) |
2168 (1.47%) |
54 |
PRI_DIR [character] |
|
20 ( 0.0%) |
94606 (63.98%) |
53263 (36.02%) |
55 |
REPORT_NUM [character] |
|
1870 (2.4%) |
78694 (53.22%) |
69175 (46.78%) |
56 |
RESCUE_UNIT [logical] |
137252 (92.8%) |
147869 (100%) |
0 (0%) |
|
57 |
ROAD_COND_CD [character] |
|
140027 (94.8%) |
147665 (99.86%) |
204 (0.14%) |
58 |
ROAD_REL_CD [character] |
|
125560 (85.0%) |
147667 (99.86%) |
202 (0.14%) |
59 |
ROAD_TYPE_CD [character] |
|
14849 (10.1%) |
147650 (99.85%) |
219 (0.15%) |
60 |
RPT_ORIGIN [character] |
|
34034 (23.0%) |
147869 (100%) |
0 (0%) |
61 |
SEC_DIST [numeric] |
mean (sd) : 11 (139.73) |
1069 distinct val. |
142226 (96.18%) |
5643 (3.82%) |
62 |
SEC_DIR [character] |
|
1 ( 0.0%) |
23922 (16.18%) |
123947 (83.82%) |
63 |
SEC_INTERSECTION [logical] |
145954 (98.7%) |
147869 (100%) |
0 (0%) |
|
64 |
SEC_INTERSECTION_NOT [logical] |
100657 (68.1%) |
147869 (100%) |
0 (0%) |
|
65 |
INTERSECTION [logical] |
94758 (64.1%) |
147869 (100%) |
0 (0%) |
|
66 |
INTERSECTION_NOT [logical] |
60411 (40.8%) |
147869 (100%) |
0 (0%) |
|
67 |
SEC_INTER_ROAD [character] |
|
352 ( 1.3%) |
26288 (17.78%) |
121581 (82.22%) |
68 |
SEVERITY_CD [character] |
|
636 ( 0.4%) |
147869 (100%) |
0 (0%) |
69 |
SURF_COND_CD [character] |
|
126463 (85.6%) |
147734 (99.91%) |
135 (0.09%) |
70 |
SURF_TYPE_CD [character] |
|
88378 (61.4%) |
143875 (97.3%) |
3994 (2.7%) |
71 |
TIME_AMB_ARR [numeric] |
mean (sd) : 367.09 (2.71) |
1448 distinct val. |
147867 (100%) |
2 (0%) |
72 |
TIME_AMB_ARR_HOSP [numeric] |
mean (sd) : 367.06 (2.14) |
1444 distinct val. |
147864 (100%) |
5 (0%) |
73 |
TIME_AMB_CALLED [numeric] |
mean (sd) : 367.09 (2.87) |
1449 distinct val. |
147868 (100%) |
1 (0%) |
74 |
TIME_AMB_DEPART [numeric] |
mean (sd) : 367.09 (2.71) |
1447 distinct val. |
147867 (100%) |
2 (0%) |
75 |
TIME_RESCUE_CALLED [numeric] |
mean (sd) : 367.03 (2.14) |
1424 distinct val. |
147864 (100%) |
5 (0%) |
76 |
TIME_RESCUE_ARR [numeric] |
mean (sd) : 367.03 (1.91) |
1429 distinct val. |
147863 (100%) |
6 (0%) |
77 |
TIME_LANES_OPEN [numeric] |
mean (sd) : 367.53 (3.03) |
1450 distinct val. |
147869 (100%) |
0 (0%) |
78 |
TIME_POLICE_NOTE [numeric] |
mean (sd) : 367.54 (3.03) |
1450 distinct val. |
147869 (100%) |
0 (0%) |
79 |
TIME_POLICE_ARR [numeric] |
mean (sd) : 367.55 (3.03) |
1450 distinct val. |
147869 (100%) |
0 (0%) |
80 |
TRK_BUS_INV [logical] |
143096 (96.8%) |
147869 (100%) |
0 (0%) |
|
81 |
TROOP [character] |
|
10455 (20.8%) |
50242 (33.98%) |
97627 (66.02%) |
82 |
WEATHER_CD [character] |
|
108915 (73.7%) |
147705 (99.89%) |
164 (0.11%) |
83 |
CODE [integer] |
mean (sd) : 2892.64 (1554.51) |
315 distinct val. |
147869 (100%) |
0 (0%) |
84 |
EST_ALCOHOL [integer] |
mean (sd) : 0.06 (0.24) |
3 distinct val. |
147869 (100%) |
0 (0%) |
85 |
WHO_ENTERED [character] |
|
9202 ( 6.6%) |
138885 (93.92%) |
8984 (6.08%) |
86 |
DATE_CHANGED [numeric] |
mean (sd) : 40405.49 (299.53) |
487 distinct val. |
147869 (100%) |
0 (0%) |
87 |
PROCESS_DATE [integer] |
mean (sd) : 40061.16 (3816.58) |
708 distinct val. |
147869 (100%) |
0 (0%) |
88 |
CITY [character] |
|
16315 (16.8%) |
96857 (65.5%) |
51012 (34.5%) |
89 |
CONST_MAINT_ZN [logical] |
146012 (98.7%) |
147869 (100%) |
0 (0%) |
|
90 |
FATALITY [logical] |
147213 (99.6%) |
147869 (100%) |
0 (0%) |
|
91 |
HWY_TYPE_CD [character] |
|
15039 (10.3%) |
146385 (99%) |
1484 (1%) |
92 |
INJURY [logical] |
120323 (81.4%) |
147869 (100%) |
0 (0%) |
|
93 |
INTER_ROAD_DIR [character] |
|
4425 (25.3%) |
17514 (11.84%) |
130355 (88.16%) |
94 |
INTER_ROAD_NAME [character] |
|
1159 ( 0.8%) |
144868 (97.97%) |
3001 (2.03%) |
95 |
INTER_ROAD_NUM [numeric] |
mean (sd) : 232.87 (1207.18) |
961 distinct val. |
142279 (96.22%) |
5590 (3.78%) |
96 |
INTER_ROAD_TYPE [character] |
|
15650 (20.8%) |
75048 (50.75%) |
72821 (49.25%) |
97 |
INVEST_AGENCY_NAME [character] |
|
34030 (23.0%) |
147802 (99.95%) |
67 (0.05%) |
98 |
INVEST_OFFICER_BADGENUM [character] |
|
424 ( 0.3%) |
145578 (98.45%) |
2291 (1.55%) |
99 |
PARISH [character] |
|
20336 (13.8%) |
147869 (100%) |
0 (0%) |
100 |
PEDESTRIAN [logical] |
146942 (99.4%) |
147869 (100%) |
0 (0%) |
|
101 |
PHOTOS_MADE [logical] |
128838 (87.1%) |
147869 (100%) |
0 (0%) |
|
102 |
PRI_ROAD [character] |
|
1348 ( 1.0%) |
134924 (91.25%) |
12945 (8.75%) |
103 |
QUADRANT_NE [logical] |
147385 (99.7%) |
147869 (100%) |
0 (0%) |
|
104 |
QUADRANT_NW [logical] |
147250 (99.6%) |
147869 (100%) |
0 (0%) |
|
105 |
QUADRANT_SE [logical] |
147430 (99.7%) |
147869 (100%) |
0 (0%) |
|
106 |
QUADRANT_SW [logical] |
147378 (99.7%) |
147869 (100%) |
0 (0%) |
|
107 |
REPORT_NUM_2 [character] |
|
1250 (1.5%) |
85552 (57.86%) |
62317 (42.14%) |
108 |
SEC_INTER_ROAD_DIR [character] |
|
1004 (33.8%) |
2975 (2.01%) |
144894 (97.99%) |
109 |
SEC_INTER_ROAD_NAME [character] |
|
407 ( 2.1%) |
19467 (13.17%) |
128402 (86.83%) |
110 |
SEC_INTER_ROAD_NUM [numeric] |
mean (sd) : 31.01 (363.09) |
443 distinct val. |
142167 (96.14%) |
5702 (3.86%) |
111 |
SEC_INTER_ROAD_TYPE [character] |
|
2147 (19.2%) |
11177 (7.56%) |
136692 (92.44%) |
112 |
SERVICE_ROAD_N [logical] |
147500 (99.8%) |
147869 (100%) |
0 (0%) |
|
113 |
SERVICE_ROAD_S [logical] |
147496 (99.8%) |
147869 (100%) |
0 (0%) |
|
114 |
SERVICE_ROAD_E [logical] |
147717 (99.9%) |
147869 (100%) |
0 (0%) |
|
115 |
SERVICE_ROAD_W [logical] |
147730 (99.9%) |
147869 (100%) |
0 (0%) |
|
116 |
SERVICE_ROAD [logical] |
All NA’s |
0 (0%) |
147869 (100%) |
|
117 |
PRI_MEASUREMENT_MILES [logical] |
104686 (70.8%) |
147869 (100%) |
0 (0%) |
|
118 |
PRI_MEASUREMENT_FEET [logical] |
94718 (64.1%) |
147869 (100%) |
0 (0%) |
|
119 |
SEC_MEASUREMENT_FEET [logical] |
143858 (97.3%) |
147869 (100%) |
0 (0%) |
|
120 |
SEC_MEASUREMENT_MILES [logical] |
127553 (86.3%) |
147869 (100%) |
0 (0%) |
|
121 |
DATE_ENTERED [numeric] |
mean (sd) : 40327.63 (1662.23) |
492 distinct val. |
147869 (100%) |
0 (0%) |
122 |
DISTRICT [integer] |
mean (sd) : 20.76 (26.41) |
2 : 41608 (28.1%) |
147853 (99.99%) |
16 (0.01%) |
123 |
CRASH_YEAR [integer] |
mean (sd) : 2010 (0.03) |
2004 : 1 ( 0.0%) |
147869 (100%) |
0 (0%) |
124 |
HWY_BYPASS [character] |
|
230 (10.7%) |
2146 (1.45%) |
145723 (98.55%) |
125 |
STATE_RPT_NUM [integer] |
mean (sd) : 0 (0) |
1 distinct val. |
6 (0%) |
147863 (100%) |
126 |
REV_NUM [integer] |
mean (sd) : 0.08 (0.33) |
9 distinct val. |
147869 (100%) |
0 (0%) |
127 |
CURR_STAT [logical] |
147869 (100.0%) |
147869 (100%) |
0 (0%) |
|
128 |
REV_STAT [integer] |
mean (sd) : 0.01 (0.12) |
6 distinct val. |
146724 (99.23%) |
1145 (0.77%) |
129 |
GLOB_STAT [logical] |
147869 (100.0%) |
147869 (100%) |
0 (0%) |
|
130 |
OUT [character] |
|
86156 (60.6%) |
142067 (96.08%) |
5802 (3.92%) |
131 |
PAGE_OWNER [character] |
|
438 (24.5%) |
1789 (1.21%) |
146080 (98.79%) |
132 |
REV_DATE [integer] |
mean (sd) : 10445.77 (17366.44) |
411 distinct val. |
142155 (96.14%) |
5714 (3.86%) |
133 |
PAGE_OWNER_APPROVER [character] |
|
438 (24.5%) |
1789 (1.21%) |
146080 (98.79%) |
134 |
VERSION_ORIGINAL [character] |
|
605 ( 0.7%) |
85556 (57.86%) |
62313 (42.14%) |
135 |
VERSION_CURRENT [character] |
|
427 ( 0.5%) |
88189 (59.64%) |
59680 (40.36%) |
136 |
num_tot_inj_b_veh [integer] |
mean (sd) : 0.01 (0.11) |
6 distinct val. |
147705 (99.89%) |
164 (0.11%) |
137 |
num_tot_inj_c_veh [integer] |
mean (sd) : 0.08 (0.35) |
11 distinct val. |
147705 (99.89%) |
164 (0.11%) |
138 |
num_tot_inj_d_veh [integer] |
mean (sd) : 0.37 (0.88) |
29 distinct val. |
147705 (99.89%) |
164 (0.11%) |
139 |
num_tot_inj_b_ped [integer] |
mean (sd) : 0 (0.03) |
4 distinct val. |
147869 (100%) |
0 (0%) |
140 |
num_tot_inj_c_ped [integer] |
mean (sd) : 0 (0.06) |
4 distinct val. |
147869 (100%) |
0 (0%) |
141 |
num_tot_inj_d_ped [integer] |
mean (sd) : 0 (0.06) |
5 distinct val. |
147869 (100%) |
0 (0%) |
142 |
num_tot_inj_b [integer] |
mean (sd) : 0.01 (0.11) |
6 distinct val. |
147705 (99.89%) |
164 (0.11%) |
143 |
num_tot_inj_c [integer] |
mean (sd) : 0.08 (0.35) |
11 distinct val. |
147705 (99.89%) |
164 (0.11%) |
144 |
num_tot_inj_d [integer] |
mean (sd) : 0.37 (0.88) |
29 distinct val. |
147705 (99.89%) |
164 (0.11%) |
145 |
num_occ [integer] |
mean (sd) : 2.72 (1.88) |
56 distinct val. |
147705 (99.89%) |
164 (0.11%) |
146 |
num_occ_no_seatb [integer] |
mean (sd) : 0.1 (1.13) |
52 distinct val. |
147705 (99.89%) |
164 (0.11%) |
147 |
violation [integer] |
mean (sd) : 1 (0.37) |
7 distinct val. |
147705 (99.89%) |
164 (0.11%) |
148 |
speed [integer] |
mean (sd) : 0 (0) |
1 distinct val. |
6 (0%) |
147863 (100%) |
149 |
num_hospitalized_veh [integer] |
mean (sd) : 0.22 (0.65) |
18 distinct val. |
147705 (99.89%) |
164 (0.11%) |
150 |
num_hospitalized_ped [integer] |
mean (sd) : 0.01 (0.08) |
5 distinct val. |
147869 (100%) |
0 (0%) |
151 |
num_tot_hospitalized [integer] |
mean (sd) : 0.23 (0.66) |
18 distinct val. |
147705 (99.89%) |
164 (0.11%) |
152 |
agressive [integer] |
mean (sd) : 0.7 (0.48) |
6 distinct val. |
147705 (99.89%) |
164 (0.11%) |
153 |
distraction [integer] |
mean (sd) : 0.58 (0.52) |
6 distinct val. |
147705 (99.89%) |
164 (0.11%) |
154 |
HSRG_DATE [integer] |
mean (sd) : 40407.52 (117.45) |
462 distinct val. |
147869 (100%) |
0 (0%) |
library(DataExplorer)
## Warning: package 'DataExplorer' was built under R version 3.4.3
plot_missing(dat1)
plot_missing(dat2)
plot_bar(dat2)
## 27 columns ignored with more than 50 categories.
## CRASH_NUM: 147869 categories
## CRASH_NUM1: 147869 categories
## ISDS_CODE: 86 categories
## AMB_SERVICE: 5608 categories
## DIST_ZONE: 4319 categories
## FIRE_DEPT: 4507 categories
## INTER_ROAD: 41269 categories
## INVEST_OFFICER: 19042 categories
## PRI_ROAD_NAME: 20189 categories
## PRI_ROAD_TYPE: 208 categories
## REPORT_NUM: 70243 categories
## RPT_ORIGIN: 161 categories
## SEC_INTER_ROAD: 9527 categories
## WHO_ENTERED: 4608 categories
## CITY: 554 categories
## INTER_ROAD_NAME: 25868 categories
## INTER_ROAD_TYPE: 220 categories
## INVEST_AGENCY_NAME: 1497 categories
## INVEST_OFFICER_BADGENUM: 6251 categories
## PARISH: 65 categories
## PRI_ROAD: 40175 categories
## REPORT_NUM_2: 79934 categories
## SEC_INTER_ROAD_NAME: 5023 categories
## SEC_INTER_ROAD_TYPE: 92 categories
## HWY_BYPASS: 219 categories
## PAGE_OWNER: 568 categories
## PAGE_OWNER_APPROVER: 320 categories
plot_correlation(dat2, use = "pairwise.complete.obs")
## 28 features with more than 20 categories ignored!
## CRASH_NUM: 147869 categories
## CRASH_NUM1: 147869 categories
## ISDS_CODE: 86 categories
## AMB_SERVICE: 5608 categories
## BYPASS_CD: 43 categories
## DIST_ZONE: 4319 categories
## FIRE_DEPT: 4507 categories
## INTER_ROAD: 41269 categories
## INVEST_OFFICER: 19042 categories
## PRI_ROAD_NAME: 20189 categories
## PRI_ROAD_TYPE: 208 categories
## REPORT_NUM: 70243 categories
## RPT_ORIGIN: 161 categories
## SEC_INTER_ROAD: 9527 categories
## WHO_ENTERED: 4608 categories
## CITY: 554 categories
## INTER_ROAD_NAME: 25868 categories
## INTER_ROAD_TYPE: 220 categories
## INVEST_AGENCY_NAME: 1497 categories
## INVEST_OFFICER_BADGENUM: 6251 categories
## PARISH: 65 categories
## PRI_ROAD: 40175 categories
## REPORT_NUM_2: 79934 categories
## SEC_INTER_ROAD_NAME: 5023 categories
## SEC_INTER_ROAD_TYPE: 92 categories
## HWY_BYPASS: 219 categories
## PAGE_OWNER: 568 categories
## PAGE_OWNER_APPROVER: 320 categories
## Warning in cor(final_data, ...): the standard deviation is zero