Loading Libraries
library(tidyverse)
library(dplyr)
library(tidyr)
library(stats)
library(ggplot2)
library(readxl)
Read Data and Data Summarization
setwd("C:/Users/Sadia Bhuiyan Shampa/Desktop/CE 7393")
df_1 <- read_excel("df_1.xlsx") #The Structure of Data Frame
str(df_1)
## tibble [1,295 × 19] (S3: tbl_df/tbl/data.frame)
## $ Wthr_Cond_ID : chr [1:1295] "Clear" "Clear" "Clear" "Clear" ...
## $ Light_Cond_ID : chr [1:1295] "Dark, not lighted" "Dark, not lighted" "Daylight" "Daylight" ...
## $ Road_Type_ID : chr [1:1295] "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" ...
## $ Road_Algn_ID : chr [1:1295] "Straight, level" "Straight, level" "Straight, level" "Straight, level" ...
## $ SurfDry : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
## $ Traffic_Cntl_ID : chr [1:1295] "Marked lanes" "Center stripe/divider" "Marked lanes" "Center stripe/divider" ...
## $ Harm_Evnt_ID : chr [1:1295] "Motor vehicle in transport" "Motor vehicle in transport" "Motor vehicle in transport" "Fixed object" ...
## $ Intrsct_Relat_ID : chr [1:1295] "Non intersection" "Non intersection" "Intersection" "Non intersection" ...
## $ FHE_Collsn_ID : chr [1:1295] "Sd both going straight-rear end" "Sd both going straight-rear end" "Other" "Omv vehicle going straight" ...
## $ Road_Part_Adj_ID : chr [1:1295] "Main/proper lane" "Main/proper lane" "Main/proper lane" "Main/proper lane" ...
## $ Road_Cls_ID : chr [1:1295] "Farm to market" "Us & state highways" "Farm to market" "Us & state highways" ...
## $ Pop_Group_ID : chr [1:1295] "10,000 - 24,999 pop" "Rural" "Other" "Rural" ...
## $ Crash_Speed_LimitCat: chr [1:1295] "30-40 mph" "65-70 mph" "45-60 mph" "65-70 mph" ...
## $ Veh_Body_Styl_ID : chr [1:1295] "Farm equipment" "Farm equipment" "Farm equipment" "Farm equipment" ...
## $ Prsn_Ethnicity_ID : chr [1:1295] "White" "White" "White" "White" ...
## $ GenMale : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
## $ TrafVol : num [1:1295] 7654 13770 11470 16972 413 ...
## $ Prsn_Age : chr [1:1295] "25-54 years" "25-54 years" "Other" "25-54 years" ...
## $ Prsn_Injry_Sev_ID : chr [1:1295] "O" "O" "O" "O" ...
## Wthr_Cond_ID Light_Cond_ID Road_Type_ID Road_Algn_ID
## Length:1295 Length:1295 Length:1295 Length:1295
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## SurfDry Traffic_Cntl_ID Harm_Evnt_ID Intrsct_Relat_ID
## Min. :0.0000 Length:1295 Length:1295 Length:1295
## 1st Qu.:1.0000 Class :character Class :character Class :character
## Median :1.0000 Mode :character Mode :character Mode :character
## Mean :0.9143
## 3rd Qu.:1.0000
## Max. :1.0000
## FHE_Collsn_ID Road_Part_Adj_ID Road_Cls_ID Pop_Group_ID
## Length:1295 Length:1295 Length:1295 Length:1295
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Crash_Speed_LimitCat Veh_Body_Styl_ID Prsn_Ethnicity_ID GenMale
## Length:1295 Length:1295 Length:1295 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:1.0000
## Mode :character Mode :character Mode :character Median :1.0000
## Mean :0.8842
## 3rd Qu.:1.0000
## Max. :1.0000
## TrafVol Prsn_Age Prsn_Injry_Sev_ID
## Min. : 215 Length:1295 Length:1295
## 1st Qu.: 6765 Class :character Class :character
## Median :14170 Mode :character Mode :character
## Mean :14417
## 3rd Qu.:22010
## Max. :28970
Data Visualization
Bar Plot
Bar plot of Crash_Speed_LimitCat
barplot(table(df_1$Crash_Speed_LimitCat), main="Bar plot of Crash_Speed_LimitCat",
xlab="Category of Data among 1295 Entries", ylab="Frequency",
col = rgb(0.2,0.5,0.8),
ylim=c(0,600))

Bar plot of Road_Cls_ID
frequency_table<-table(df_1$Road_Cls_ID)
colors<-c("burlywood", "lightblue", "lightgreen", "salmon", "gold")
barplot(frequency_table,
main="Bar plot of Road_Cls_ID",
xlab="", ylab="Frequency",
col= c("burlywood", "turquoise", "lightgreen", "salmon", "gold"),
ylim=c(0,500), las=2)

Bar plot of Wthr_Cond_ID
##
## Clear Cloudy Fog Other Rain
## 1084 146 10 10 45
barplot(table(df_1$Wthr_Cond_ID), main="Bar plot of Weather Condition ID",
xlab="Category of Data among 1295 Entries", ylab="Frequency",
col="burlywood",
ylim=c(0,1300))

Bar plot of Surface Condition
frequency_table <- table(df_1$SurfDry)
names(frequency_table) <- gsub("0", "Wet", names(frequency_table))
names(frequency_table) <- gsub("1", "Dry", names(frequency_table))
frequency_table
## Wet Dry
## 111 1184
barplot(frequency_table,
main = "Road Surface Condition", xlab = "Surface Type",
ylab = "Frequency",
col = c("limegreen", "maroon"),
ylim = c(0,1200))

Density Plot
Density Plot of Crash Speed Limit Category
table(df_1$Crash_Speed_LimitCat)
##
## > 70 mph 30-40 mph 45-60 mph 65-70 mph Other
## 203 336 471 223 62
ggplot(df_1, aes(x = factor(Crash_Speed_LimitCat),fill=Crash_Speed_LimitCat)) +
geom_density(alpha = 0.5) + scale_fill_manual(values=c("darkcyan","ivory", "brown", "chocolate", "seashell4"))+
labs(title = "Density Plot of Crash Speed Limit Category",x = "Crash Speed Limit",y = "Density") +theme_minimal()

Density Plot of Traffic Volume
ggplot(df_1, aes(x = TrafVol)) +
geom_density(alpha = 0.5, fill = "orange2") +
labs(title = "Density Plot of Traffic Volume",
x = "Traffic Volume",
y = "Density") +
theme_minimal()

Box Plot
Box plot of Traffic Volume by Road Class ID
library(ggplot2)
ggplot(df_1, aes(x = factor(Road_Cls_ID), y = TrafVol, fill = factor(Road_Cls_ID))) +
geom_boxplot(outlier.colour = "red", alpha = 0.5) +
labs(title = "Box Plot of Traffic Volume by Road Class ID",
x = "Road Class ID",
y = "Traffic Volume") +
scale_fill_manual(values = c("maroon", "magenta", "lightsalmon", "indianred4", "seagreen")) +
theme_minimal()

Boxplot of Traffic Volume by Road Type
ggplot(df_1, aes(x = TrafVol, y = factor(Road_Type_ID))) +
geom_boxplot(fill = c("peru", "green", "saddlebrown", "tan1", "turquoise"), outlier.colour = "red", alpha = 0.5) +
labs(title = "Box Plot of Traffic Volume by Road Type",
x = "Traffic Volume",
y = "Road Type") +
theme_minimal()

Histogram
Histogram of Traffic Volume
hist(df_1$TrafVol, xlab = "Traffic Volume", col = "turquoise",
main = "Histogram of Traffic Volume",
ylim = c(0,130))

Heat Map
Heat Map of FHE Collision and Road Class
# creating Contingency Table
con_tab_1 <- xtabs(~ FHE_Collsn_ID + Road_Cls_ID, data = df_1)
print(con_tab_1)
## Road_Cls_ID
## FHE_Collsn_ID City street County road Farm to market Other
## Omv vehicle going straight 58 52 59 25
## Other 101 81 91 32
## Sd both going straight-rear end 23 17 110 15
## Sd both going straight-sideswipe 24 16 35 15
## Sd one straight-one left turn 15 25 79 2
## Road_Cls_ID
## FHE_Collsn_ID Us & state highways
## Omv vehicle going straight 65
## Other 92
## Sd both going straight-rear end 147
## Sd both going straight-sideswipe 69
## Sd one straight-one left turn 47
# Converting to dataframe
con_df1 <- as.data.frame(con_tab_1)
# Heat Map
ggplot(con_df1, aes(x = FHE_Collsn_ID, y = Road_Cls_ID, fill = Freq)) +
geom_tile(alpha = 0.5) + # Set transparency here
scale_fill_gradient(low = "goldenrod", high = "royalblue") +
labs(title = "Heat Map of Road Class vs FHE Collision Type",
x = "FHE Collision Type",
y = "Road Class",
fill = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 20, hjust = 1))

Heat Map of Road Class and Crash Speed Limit
# Creating Contingency Table
con_tab_2 <- xtabs(~ Crash_Speed_LimitCat + Road_Cls_ID, data = df_1)
print(con_tab_2)
## Road_Cls_ID
## Crash_Speed_LimitCat City street County road Farm to market Other
## > 70 mph 0 1 43 12
## 30-40 mph 171 104 14 7
## 45-60 mph 26 78 234 37
## 65-70 mph 0 3 81 6
## Other 24 5 2 27
## Road_Cls_ID
## Crash_Speed_LimitCat Us & state highways
## > 70 mph 147
## 30-40 mph 40
## 45-60 mph 96
## 65-70 mph 133
## Other 4
# Converting to dataframe
con_df2 <- as.data.frame(con_tab_2)
# Heat Map
ggplot(con_df2, aes(x = Crash_Speed_LimitCat, y = Road_Cls_ID, fill = Freq)) +
geom_tile(alpha = 0.5) + # Set transparency here
scale_fill_gradient(low = "maroon", high = "royalblue") +
labs(title = "Heat Map of Road Class vs Crash Speed Limit",
x = "Crash Speed Limit (mph)",
y = "Road Class",
fill = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 20, hjust = 1))

Violin Plot
Violin Plot of Traffic Volume by Road Class
set.seed(42)
# Calculating mean traffic volume by road class
mean_values <- df_1 %>%
group_by(Road_Cls_ID) %>%
summarize(mean_TrafVol = mean(TrafVol, na.rm = TRUE))
# Creating the violin plot with a boxplot and jittered points
ggplot(df_1, aes(x = factor(Road_Cls_ID), y = TrafVol, fill = factor(Road_Cls_ID))) +
geom_violin(trim = FALSE, fill = "goldenrod", color = "red", alpha = 0.8) +
geom_boxplot(width = 0.1, color = "seagreen", outlier.shape = NA, alpha = 0.6) + # Boxplot inside the violin
geom_jitter(width = 0.1, size = 1, alpha = 0.2, color = "blue") +
geom_point(data = mean_values, aes(x = factor(Road_Cls_ID), y = mean_TrafVol),
color = "black", size = 3, position = position_dodge(width = 0.75)) +
labs(title = "Violin Plot of Traffic Volume by Road Class",
x = "Road Class",
y = "Traffic Volume") +
theme_minimal()
