setwd("/Users/subasishdas1/Desktop/wwc")
list.files()
## [1] "a1.csv"               "as.html"              "as.Rmd"              
## [4] "m1.csv"               "WWC_2008_13_Fin1.csv"
am <- read.csv("WWC_2008_13_Fin1.csv")
dim(am)
## [1] 2651   20
head(am)
##       Hour Access_Control      Alignment Contributing_Factor
## 1     Peak     No Control Straight-Level          Violations
## 2 Off-peak     No Control Straight-Level          Violations
## 3     Peak     No Control Straight-Level          Violations
## 4 Off-peak     No Control Straight-Level          Violations
## 5     Peak     No Control    Curve-Level          Violations
## 6 Off-peak     No Control Straight-Level          Violations
##                         Lighting     Locality   Collision
## 1                       Daylight     Business    Rear End
## 2 Dark - Continuous Street Light Residential        Other
## 3                       Daylight     Business     Head-On
## 4 Dark - Continuous Street Light Residential    Sideswipe
## 5 Dark - Continuous Street Light     Business     Head-On
## 6                       Daylight     Business Right Angle
##                                  Road_Type  Severity             Weather
## 1 Two-Way Road With No Physical Separation No Injury               Clear
## 2 Two-Way Road With No Physical Separation No Injury Rain/Fog/Sleet/Snow
## 3  Two-Way Road With A Physical Separation Complaint Rain/Fog/Sleet/Snow
## 4 Two-Way Road With No Physical Separation No Injury              Cloudy
## 5  Two-Way Road With A Physical Separation No Injury Rain/Fog/Sleet/Snow
## 6 Two-Way Road With No Physical Separation Complaint              Cloudy
##            Roadway     Damage Dr_Age Dr_Condition Dr_Gender
## 1 Street/Lane/Road     Front   45-54   Distracted      Male
## 2            Drive    Unknown  15-24      Unknown   Unknown
## 3 Street/Lane/Road     Front   25-34   Distracted      Male
## 4 Street/Lane/Road Front Left  15-24   Distracted      Male
## 5            Other     Front   15-24   Distracted    Female
## 6            Drive       None  25-34       Normal      Male
##             Reason    Traffic_Control    Traffic_Condition    Violation
## 1 Driver Violation              Other          No Controls Driver Error
## 2            Other              Other          No Controls Driver Error
## 3 Driver Violation              Other Controls Functioning Driver Error
## 4 Driver Violation Yellow Dashed Line Controls Functioning Driver Error
## 5 Driver Condition              Other          No Controls Driver Error
## 6 Driver Violation              Other          No Controls        Other
##     Posted_Speed
## 1      21-30 mph
## 2 20 mph or less
## 3      51-60 mph
## 4      21-30 mph
## 5 20 mph or less
## 6      41-50 mph
library(FactoMineR)
library(ggplot2)
library(ggrepel)

cats = apply(am, 2, function(x) nlevels(as.factor(x)))
cats
##                Hour      Access_Control           Alignment 
##                   2                   4                   7 
## Contributing_Factor            Lighting            Locality 
##                   4                   6                   3 
##           Collision           Road_Type            Severity 
##                   8                   5                   5 
##             Weather             Roadway              Damage 
##                   4                   7                  11 
##              Dr_Age        Dr_Condition           Dr_Gender 
##                   7                   7                   3 
##              Reason     Traffic_Control   Traffic_Condition 
##                   5                   6                   5 
##           Violation        Posted_Speed 
##                   6                   7
mca1 = MCA(am, graph = FALSE)

mca1_vars_df = data.frame(mca1$var$coord, Variable = rep(names(cats), cats))
library(RColorBrewer)


mca1_vars_df <- read.csv("m1.csv")
rownames(mca1_vars_df) <- mca1_vars_df[,1]

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=3)) +
               geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=4, force=4)+
  scale_y_continuous(limits = c(-0.75, 0.75))+
  scale_x_continuous(limits = c(-0.75, 0.75))+theme_bw()+theme(legend.position="none")
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
               geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
  scale_y_continuous(limits = c(0, 0.25))+
  scale_x_continuous(limits = c(-0.1, 0.75))+theme_bw()+theme(legend.position="none")
## Warning: Removed 98 rows containing missing values (geom_point).
## Warning: Removed 98 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
               geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
  scale_y_continuous(limits = c(0, 0.5))+
  scale_x_continuous(limits = c(-0.8, 0.1))+theme_bw()+theme(legend.position="none")
## Warning: Removed 90 rows containing missing values (geom_point).
## Warning: Removed 90 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
               geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
  scale_y_continuous(limits = c(-0.4, 0))+
  scale_x_continuous(limits = c(-1, 0.1))+theme_bw()+theme(legend.position="none")
## Warning: Removed 82 rows containing missing values (geom_point).
## Warning: Removed 82 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
               geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
  scale_y_continuous(limits = c(-0.4, 0))+
  scale_x_continuous(limits = c(-0.15, 0.85))+theme_bw()+theme(legend.position="none")
## Warning: Removed 92 rows containing missing values (geom_point).
## Warning: Removed 92 rows containing missing values (geom_text_repel).

### Conventional
ggplot(data=mca1_vars_df, 
       aes(x = Dim.1, y = Dim.2, label = rownames(mca1_vars_df))) +
  geom_hline(yintercept = 0, colour = "gray70") +
  geom_vline(xintercept = 0, colour = "gray70") +
  geom_text(size=5) +
  scale_y_continuous(limits = c(-0.4, 0))+
  scale_x_continuous(limits = c(-0.15, 0.85))+theme_bw()+
  ggtitle("MCA plot of variables using R package FactoMineR")
## Warning: Removed 92 rows containing missing values (geom_text).