Loading Libraries
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
library(stats)
library(ggplot2)
library(readxl)
setwd("C:/Users/Sadia Bhuiyan Shampa/Desktop/CE 7393")
df_1 <- read_excel("df_1.xlsx") #The Structure of Data Frame
Read Data and Data Summarization
## tibble [1,295 × 19] (S3: tbl_df/tbl/data.frame)
## $ Wthr_Cond_ID : chr [1:1295] "Clear" "Clear" "Clear" "Clear" ...
## $ Light_Cond_ID : chr [1:1295] "Dark, not lighted" "Dark, not lighted" "Daylight" "Daylight" ...
## $ Road_Type_ID : chr [1:1295] "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" ...
## $ Road_Algn_ID : chr [1:1295] "Straight, level" "Straight, level" "Straight, level" "Straight, level" ...
## $ SurfDry : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
## $ Traffic_Cntl_ID : chr [1:1295] "Marked lanes" "Center stripe/divider" "Marked lanes" "Center stripe/divider" ...
## $ Harm_Evnt_ID : chr [1:1295] "Motor vehicle in transport" "Motor vehicle in transport" "Motor vehicle in transport" "Fixed object" ...
## $ Intrsct_Relat_ID : chr [1:1295] "Non intersection" "Non intersection" "Intersection" "Non intersection" ...
## $ FHE_Collsn_ID : chr [1:1295] "Sd both going straight-rear end" "Sd both going straight-rear end" "Other" "Omv vehicle going straight" ...
## $ Road_Part_Adj_ID : chr [1:1295] "Main/proper lane" "Main/proper lane" "Main/proper lane" "Main/proper lane" ...
## $ Road_Cls_ID : chr [1:1295] "Farm to market" "Us & state highways" "Farm to market" "Us & state highways" ...
## $ Pop_Group_ID : chr [1:1295] "10,000 - 24,999 pop" "Rural" "Other" "Rural" ...
## $ Crash_Speed_LimitCat: chr [1:1295] "30-40 mph" "65-70 mph" "45-60 mph" "65-70 mph" ...
## $ Veh_Body_Styl_ID : chr [1:1295] "Farm equipment" "Farm equipment" "Farm equipment" "Farm equipment" ...
## $ Prsn_Ethnicity_ID : chr [1:1295] "White" "White" "White" "White" ...
## $ GenMale : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
## $ TrafVol : num [1:1295] 7654 13770 11470 16972 413 ...
## $ Prsn_Age : chr [1:1295] "25-54 years" "25-54 years" "Other" "25-54 years" ...
## $ Prsn_Injry_Sev_ID : chr [1:1295] "O" "O" "O" "O" ...
## Wthr_Cond_ID Light_Cond_ID Road_Type_ID Road_Algn_ID
## Length:1295 Length:1295 Length:1295 Length:1295
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## SurfDry Traffic_Cntl_ID Harm_Evnt_ID Intrsct_Relat_ID
## Min. :0.0000 Length:1295 Length:1295 Length:1295
## 1st Qu.:1.0000 Class :character Class :character Class :character
## Median :1.0000 Mode :character Mode :character Mode :character
## Mean :0.9143
## 3rd Qu.:1.0000
## Max. :1.0000
## FHE_Collsn_ID Road_Part_Adj_ID Road_Cls_ID Pop_Group_ID
## Length:1295 Length:1295 Length:1295 Length:1295
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Crash_Speed_LimitCat Veh_Body_Styl_ID Prsn_Ethnicity_ID GenMale
## Length:1295 Length:1295 Length:1295 Min. :0.0000
## Class :character Class :character Class :character 1st Qu.:1.0000
## Mode :character Mode :character Mode :character Median :1.0000
## Mean :0.8842
## 3rd Qu.:1.0000
## Max. :1.0000
## TrafVol Prsn_Age Prsn_Injry_Sev_ID
## Min. : 215 Length:1295 Length:1295
## 1st Qu.: 6765 Class :character Class :character
## Median :14170 Mode :character Mode :character
## Mean :14417
## 3rd Qu.:22010
## Max. :28970
Data Visualization
Bar plot of Crash_Speed_LimitCat
barplot(table(df_1$Crash_Speed_LimitCat), main="Bar plot of Crash_Speed_LimitCat",
xlab="Category of Data among 1295 Entries", ylab="Frequency",
col = rgb(0.2,0.5,0.8),
ylim=c(0,600))

Bar plot of Road_Cls_ID
frequency_table<-table(df_1$Road_Cls_ID)
colors<-c("burlywood", "lightblue", "lightgreen", "salmon", "gold")
barplot(frequency_table,
main="Bar plot of Road_Cls_ID",
xlab="", ylab="Frequency",
col= c("burlywood", "turquoise", "lightgreen", "salmon", "gold"),
ylim=c(0,500), las=2)

Bar plot of Wthr_Cond_ID
##
## Clear Cloudy Fog Other Rain
## 1084 146 10 10 45
barplot(table(df_1$Wthr_Cond_ID), main="Bar plot of Weather Condition ID",
xlab="Category of Data among 1295 Entries", ylab="Frequency",
col="burlywood",
ylim=c(0,1300))

Bar plot of Surface Condition
frequency_table <- table(df_1$SurfDry)
names(frequency_table) <- gsub("0", "Wet", names(frequency_table))
names(frequency_table) <- gsub("1", "Dry", names(frequency_table))
frequency_table
## Wet Dry
## 111 1184
barplot(frequency_table,
main = "Road Surface Condition", xlab = "Surface Type",
ylab = "Frequency",
col = c("limegreen", "maroon"),
ylim = c(0,1200))

Density Plot of Crash Speed Limit Category
table(df_1$Crash_Speed_LimitCat)
##
## > 70 mph 30-40 mph 45-60 mph 65-70 mph Other
## 203 336 471 223 62
ggplot(df_1, aes(x = factor(Crash_Speed_LimitCat),fill=Crash_Speed_LimitCat)) +
geom_density(alpha = 0.5) + scale_fill_manual(values=c("darkcyan","ivory", "brown", "chocolate", "seashell4"))+
labs(title = "Density Plot of Crash Speed Limit Category",x = "Crash Speed Limit",y = "Density") +theme_minimal()

Density Plot of Traffic Volume
ggplot(df_1, aes(x = TrafVol)) +
geom_density(alpha = 0.5, fill = "orange2") +
labs(title = "Density Plot of Traffic Volume",
x = "Traffic Volume",
y = "Density") +
theme_minimal()

Boxplot of Traffic Volume by Road Class ID
library(ggplot2)
ggplot(df_1, aes(x = factor(Road_Cls_ID), y = TrafVol, fill = factor(Road_Cls_ID))) +
geom_boxplot(outlier.colour = "red", alpha = 0.5) +
labs(title = "Box Plot of Traffic Volume by Road Class ID",
x = "Road Class ID",
y = "Traffic Volume") +
scale_fill_manual(values = c("maroon", "magenta", "lightsalmon", "indianred4", "seagreen")) +
theme_minimal()

Boxplot of Traffic Volume by Road Type
ggplot(df_1, aes(x = TrafVol, y = factor(Road_Type_ID))) +
geom_boxplot(fill = c("peru", "green", "saddlebrown", "tan1", "turquoise"), outlier.colour = "red", alpha = 0.5) +
labs(title = "Box Plot of Traffic Volume by Road Type",
x = "Traffic Volume",
y = "Road Type") +
theme_minimal()

Histogram of Traffic Volume
hist(df_1$TrafVol, xlab = "Traffic Volume", col = "turquoise",
main = "Histogram of Traffic Volume",
ylim = c(0,130))
