##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/NCHRP 03-134")
dat <- read.csv("New_Cat_Master_Date1.csv")
dat1=dat[, c("STATE", "RDWY_ALIGN", "LIGHT_COND", "V1_VHCL_BDY_TYP", "RDWY_GRADE",
"RD_SRFC_COND", "LOC_WTHN_ZONE", "WRK_PRSNT", "ALC_DRUG_INV",
"V1_VHCL_MOVE", "EVNT_WTHR_COND", "WRK_ZONE_TYP", "D1_DR_COND",
"TRAF_CTRL", "FRST_HARM_EVNT", "V1_FRST_DR_ACTN", "MED_TYPE",
"D1_VISN_OBST", "V1_SCND_DR_ACTN", "SUR_TYPE", "FUN_CLASS",
"SHLD_TYPE_R", "SHLD_TYPE_L", "ACC_SID_RD")]
library(SmartEDA)
## Registered S3 method overwritten by 'GGally':
## method from
## +.gg ggplot2
tab= ExpCTable(dat1,Target=NULL,clim=5,nlim=15,round=2,bin=NULL,per=F)
library(DT)
datatable(tab)
##
## Curve Other straight
## 3518 427 14308
library(forcats)
dat2= dat1 %>%
mutate(RDWY_GRADE = fct_lump(RDWY_GRADE, n = 3)) %>%
mutate(V1_VHCL_MOVE = fct_lump(V1_VHCL_MOVE, n = 3)) %>%
mutate(V1_FRST_DR_ACTN = fct_lump(V1_FRST_DR_ACTN, n = 3))
table(dat2$RDWY_GRADE)
##
## Grade Hillcrest/Uphill/Downhill Level
## 2690 1458 13905
## Other
## 200
##
## Straight Ahead Unknown Other
## 4724 12484 1045
table(dat2$V1_FRST_DR_ACTN)
##
## No Contributing Action Unknown Violation
## 6259 2654 6266
## Other
## 3074
### before
ggplot(dat1, aes(x = fct_infreq(V1_FRST_DR_ACTN))) +
geom_bar() +
coord_flip()+theme_bw(base_size=16)

### after
ggplot(dat2, aes(x = fct_infreq(V1_FRST_DR_ACTN))) +
geom_bar() +
coord_flip()+theme_bw(base_size=16)
