forcat

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(forcats)
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/NCHRP 03-134")

dat <- read.csv("New_Cat_Master_Date1.csv")
dat1=dat[, c("STATE", "RDWY_ALIGN", "LIGHT_COND", "V1_VHCL_BDY_TYP", "RDWY_GRADE", 
             "RD_SRFC_COND", "LOC_WTHN_ZONE", "WRK_PRSNT", "ALC_DRUG_INV", 
             "V1_VHCL_MOVE", "EVNT_WTHR_COND", "WRK_ZONE_TYP", "D1_DR_COND", 
             "TRAF_CTRL", "FRST_HARM_EVNT", "V1_FRST_DR_ACTN", "MED_TYPE", 
             "D1_VISN_OBST", "V1_SCND_DR_ACTN", "SUR_TYPE", "FUN_CLASS", 
             "SHLD_TYPE_R", "SHLD_TYPE_L", "ACC_SID_RD")]

library(SmartEDA)
## Registered S3 method overwritten by 'GGally':
##   method from   
##   +.gg   ggplot2
tab= ExpCTable(dat1,Target=NULL,clim=5,nlim=15,round=2,bin=NULL,per=F)
library(DT)
datatable(tab)
table(dat1$RDWY_ALIGN)
## 
##   Curve     Other straight 
##     3518      427    14308
library(forcats)
dat2= dat1 %>%
  mutate(RDWY_GRADE = fct_lump(RDWY_GRADE, n = 3))  %>%
  mutate(V1_VHCL_MOVE = fct_lump(V1_VHCL_MOVE, n = 3)) %>% 
  mutate(V1_FRST_DR_ACTN = fct_lump(V1_FRST_DR_ACTN, n = 3)) 

table(dat2$RDWY_GRADE)
## 
##                     Grade Hillcrest/Uphill/Downhill                     Level 
##                      2690                      1458                     13905 
##                     Other 
##                       200
table(dat2$V1_VHCL_MOVE)
## 
## Straight Ahead        Unknown          Other 
##           4724          12484           1045
table(dat2$V1_FRST_DR_ACTN)
## 
## No Contributing Action                Unknown              Violation 
##                   6259                   2654                   6266 
##                  Other 
##                   3074
### before
ggplot(dat1, aes(x = fct_infreq(V1_FRST_DR_ACTN))) + 
  geom_bar() + 
  coord_flip()+theme_bw(base_size=16)

### after
ggplot(dat2, aes(x = fct_infreq(V1_FRST_DR_ACTN))) + 
  geom_bar() + 
  coord_flip()+theme_bw(base_size=16)