setwd("/Users/subasishdas1/Desktop/wwc")
list.files()
## [1] "a1.csv" "as.html" "as.Rmd"
## [4] "m1.csv" "WWC_2008_13_Fin1.csv"
am <- read.csv("WWC_2008_13_Fin1.csv")
dim(am)
## [1] 2651 20
head(am)
## Hour Access_Control Alignment Contributing_Factor
## 1 Peak No Control Straight-Level Violations
## 2 Off-peak No Control Straight-Level Violations
## 3 Peak No Control Straight-Level Violations
## 4 Off-peak No Control Straight-Level Violations
## 5 Peak No Control Curve-Level Violations
## 6 Off-peak No Control Straight-Level Violations
## Lighting Locality Collision
## 1 Daylight Business Rear End
## 2 Dark - Continuous Street Light Residential Other
## 3 Daylight Business Head-On
## 4 Dark - Continuous Street Light Residential Sideswipe
## 5 Dark - Continuous Street Light Business Head-On
## 6 Daylight Business Right Angle
## Road_Type Severity Weather
## 1 Two-Way Road With No Physical Separation No Injury Clear
## 2 Two-Way Road With No Physical Separation No Injury Rain/Fog/Sleet/Snow
## 3 Two-Way Road With A Physical Separation Complaint Rain/Fog/Sleet/Snow
## 4 Two-Way Road With No Physical Separation No Injury Cloudy
## 5 Two-Way Road With A Physical Separation No Injury Rain/Fog/Sleet/Snow
## 6 Two-Way Road With No Physical Separation Complaint Cloudy
## Roadway Damage Dr_Age Dr_Condition Dr_Gender
## 1 Street/Lane/Road Front 45-54 Distracted Male
## 2 Drive Unknown 15-24 Unknown Unknown
## 3 Street/Lane/Road Front 25-34 Distracted Male
## 4 Street/Lane/Road Front Left 15-24 Distracted Male
## 5 Other Front 15-24 Distracted Female
## 6 Drive None 25-34 Normal Male
## Reason Traffic_Control Traffic_Condition Violation
## 1 Driver Violation Other No Controls Driver Error
## 2 Other Other No Controls Driver Error
## 3 Driver Violation Other Controls Functioning Driver Error
## 4 Driver Violation Yellow Dashed Line Controls Functioning Driver Error
## 5 Driver Condition Other No Controls Driver Error
## 6 Driver Violation Other No Controls Other
## Posted_Speed
## 1 21-30 mph
## 2 20 mph or less
## 3 51-60 mph
## 4 21-30 mph
## 5 20 mph or less
## 6 41-50 mph
library(FactoMineR)
library(ggplot2)
library(ggrepel)
cats = apply(am, 2, function(x) nlevels(as.factor(x)))
cats
## Hour Access_Control Alignment
## 2 4 7
## Contributing_Factor Lighting Locality
## 4 6 3
## Collision Road_Type Severity
## 8 5 5
## Weather Roadway Damage
## 4 7 11
## Dr_Age Dr_Condition Dr_Gender
## 7 7 3
## Reason Traffic_Control Traffic_Condition
## 5 6 5
## Violation Posted_Speed
## 6 7
mca1 = MCA(am, graph = FALSE)
mca1_vars_df = data.frame(mca1$var$coord, Variable = rep(names(cats), cats))
library(RColorBrewer)
mca1_vars_df <- read.csv("m1.csv")
rownames(mca1_vars_df) <- mca1_vars_df[,1]
ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=3)) +
geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=4, force=4)+
scale_y_continuous(limits = c(-0.75, 0.75))+
scale_x_continuous(limits = c(-0.75, 0.75))+theme_bw()+theme(legend.position="none")
## Warning: Removed 34 rows containing missing values (geom_point).
## Warning: Removed 34 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
scale_y_continuous(limits = c(0, 0.25))+
scale_x_continuous(limits = c(-0.1, 0.75))+theme_bw()+theme(legend.position="none")
## Warning: Removed 98 rows containing missing values (geom_point).
## Warning: Removed 98 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
scale_y_continuous(limits = c(0, 0.5))+
scale_x_continuous(limits = c(-0.8, 0.1))+theme_bw()+theme(legend.position="none")
## Warning: Removed 90 rows containing missing values (geom_point).
## Warning: Removed 90 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
scale_y_continuous(limits = c(-0.4, 0))+
scale_x_continuous(limits = c(-1, 0.1))+theme_bw()+theme(legend.position="none")
## Warning: Removed 82 rows containing missing values (geom_point).
## Warning: Removed 82 rows containing missing values (geom_text_repel).

ggplot(data=mca1_vars_df)+
geom_point(aes(x = Dim.1, y = Dim.2, color='red', size=4)) +
geom_text_repel(aes(Dim.1, y = Dim.2, label = rownames(mca1_vars_df)), size=5, force=0.9)+
scale_y_continuous(limits = c(-0.4, 0))+
scale_x_continuous(limits = c(-0.15, 0.85))+theme_bw()+theme(legend.position="none")
## Warning: Removed 92 rows containing missing values (geom_point).
## Warning: Removed 92 rows containing missing values (geom_text_repel).

### Conventional
ggplot(data=mca1_vars_df,
aes(x = Dim.1, y = Dim.2, label = rownames(mca1_vars_df))) +
geom_hline(yintercept = 0, colour = "gray70") +
geom_vline(xintercept = 0, colour = "gray70") +
geom_text(size=5) +
scale_y_continuous(limits = c(-0.4, 0))+
scale_x_continuous(limits = c(-0.15, 0.85))+theme_bw()+
ggtitle("MCA plot of variables using R package FactoMineR")
## Warning: Removed 92 rows containing missing values (geom_text).
