HW1_Sadia

Loading Libraries

library(tidyverse)
library(dplyr)
library(tidyr)
library(stats)
library(ggplot2)
library(readxl)

Read Data and Data Summarization

setwd("C:/Users/Sadia Bhuiyan Shampa/Desktop/CE 7393")
df_1 <- read_excel("df_1.xlsx")      #The Structure of Data Frame
str(df_1)
## tibble [1,295 × 19] (S3: tbl_df/tbl/data.frame)
##  $ Wthr_Cond_ID        : chr [1:1295] "Clear" "Clear" "Clear" "Clear" ...
##  $ Light_Cond_ID       : chr [1:1295] "Dark, not lighted" "Dark, not lighted" "Daylight" "Daylight" ...
##  $ Road_Type_ID        : chr [1:1295] "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" ...
##  $ Road_Algn_ID        : chr [1:1295] "Straight, level" "Straight, level" "Straight, level" "Straight, level" ...
##  $ SurfDry             : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Traffic_Cntl_ID     : chr [1:1295] "Marked lanes" "Center stripe/divider" "Marked lanes" "Center stripe/divider" ...
##  $ Harm_Evnt_ID        : chr [1:1295] "Motor vehicle in transport" "Motor vehicle in transport" "Motor vehicle in transport" "Fixed object" ...
##  $ Intrsct_Relat_ID    : chr [1:1295] "Non intersection" "Non intersection" "Intersection" "Non intersection" ...
##  $ FHE_Collsn_ID       : chr [1:1295] "Sd both going straight-rear end" "Sd both going straight-rear end" "Other" "Omv vehicle going straight" ...
##  $ Road_Part_Adj_ID    : chr [1:1295] "Main/proper lane" "Main/proper lane" "Main/proper lane" "Main/proper lane" ...
##  $ Road_Cls_ID         : chr [1:1295] "Farm to market" "Us & state highways" "Farm to market" "Us & state highways" ...
##  $ Pop_Group_ID        : chr [1:1295] "10,000 - 24,999 pop" "Rural" "Other" "Rural" ...
##  $ Crash_Speed_LimitCat: chr [1:1295] "30-40 mph" "65-70 mph" "45-60 mph" "65-70 mph" ...
##  $ Veh_Body_Styl_ID    : chr [1:1295] "Farm equipment" "Farm equipment" "Farm equipment" "Farm equipment" ...
##  $ Prsn_Ethnicity_ID   : chr [1:1295] "White" "White" "White" "White" ...
##  $ GenMale             : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
##  $ TrafVol             : num [1:1295] 7654 13770 11470 16972 413 ...
##  $ Prsn_Age            : chr [1:1295] "25-54 years" "25-54 years" "Other" "25-54 years" ...
##  $ Prsn_Injry_Sev_ID   : chr [1:1295] "O" "O" "O" "O" ...
summary(df_1)
##  Wthr_Cond_ID       Light_Cond_ID      Road_Type_ID       Road_Algn_ID      
##  Length:1295        Length:1295        Length:1295        Length:1295       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     SurfDry       Traffic_Cntl_ID    Harm_Evnt_ID       Intrsct_Relat_ID  
##  Min.   :0.0000   Length:1295        Length:1295        Length:1295       
##  1st Qu.:1.0000   Class :character   Class :character   Class :character  
##  Median :1.0000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.9143                                                           
##  3rd Qu.:1.0000                                                           
##  Max.   :1.0000                                                           
##  FHE_Collsn_ID      Road_Part_Adj_ID   Road_Cls_ID        Pop_Group_ID      
##  Length:1295        Length:1295        Length:1295        Length:1295       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Crash_Speed_LimitCat Veh_Body_Styl_ID   Prsn_Ethnicity_ID     GenMale      
##  Length:1295          Length:1295        Length:1295        Min.   :0.0000  
##  Class :character     Class :character   Class :character   1st Qu.:1.0000  
##  Mode  :character     Mode  :character   Mode  :character   Median :1.0000  
##                                                             Mean   :0.8842  
##                                                             3rd Qu.:1.0000  
##                                                             Max.   :1.0000  
##     TrafVol        Prsn_Age         Prsn_Injry_Sev_ID 
##  Min.   :  215   Length:1295        Length:1295       
##  1st Qu.: 6765   Class :character   Class :character  
##  Median :14170   Mode  :character   Mode  :character  
##  Mean   :14417                                        
##  3rd Qu.:22010                                        
##  Max.   :28970

Data Visualization

Bar Plot

Bar plot of Crash_Speed_LimitCat

barplot(table(df_1$Crash_Speed_LimitCat), main="Bar plot of Crash_Speed_LimitCat", 
       xlab="Category of Data among 1295 Entries", ylab="Frequency", 
       col = rgb(0.2,0.5,0.8),
       ylim=c(0,600))

Bar plot of Road_Cls_ID

frequency_table<-table(df_1$Road_Cls_ID)
colors<-c("burlywood", "lightblue", "lightgreen", "salmon", "gold")
barplot(frequency_table, 
        main="Bar plot of Road_Cls_ID",
        xlab="", ylab="Frequency",
        col= c("burlywood", "turquoise", "lightgreen", "salmon", "gold"),
        ylim=c(0,500), las=2)

Bar plot of Wthr_Cond_ID

table(df_1$Wthr_Cond_ID)
## 
##  Clear Cloudy    Fog  Other   Rain 
##   1084    146     10     10     45
barplot(table(df_1$Wthr_Cond_ID), main="Bar plot of Weather Condition ID",
        xlab="Category of Data among 1295 Entries", ylab="Frequency",
        col="burlywood",
        ylim=c(0,1300))

Bar plot of Surface Condition

frequency_table <- table(df_1$SurfDry)
names(frequency_table) <- gsub("0", "Wet", names(frequency_table))
names(frequency_table) <- gsub("1", "Dry", names(frequency_table))
frequency_table
##  Wet  Dry 
##  111 1184
barplot(frequency_table,  
        main = "Road Surface Condition", xlab = "Surface Type",
        ylab  = "Frequency",  
        col = c("limegreen", "maroon"), 
        ylim = c(0,1200))

Density Plot

Density Plot of Crash Speed Limit Category

table(df_1$Crash_Speed_LimitCat)
## 
##  > 70 mph 30-40 mph 45-60 mph 65-70 mph     Other 
##       203       336       471       223        62
ggplot(df_1, aes(x = factor(Crash_Speed_LimitCat),fill=Crash_Speed_LimitCat)) + 
geom_density(alpha = 0.5) + scale_fill_manual(values=c("darkcyan","ivory", "brown", "chocolate", "seashell4"))+
labs(title = "Density Plot of Crash Speed Limit Category",x = "Crash Speed Limit",y = "Density") +theme_minimal()

Density Plot of Traffic Volume

ggplot(df_1, aes(x = TrafVol)) + 
  geom_density(alpha = 0.5, fill = "orange2") +
  labs(title = "Density Plot of Traffic Volume",
       x = "Traffic Volume",
       y = "Density") +
  theme_minimal()

Box Plot

Box plot of Traffic Volume by Road Class ID

library(ggplot2)

ggplot(df_1, aes(x = factor(Road_Cls_ID), y = TrafVol, fill = factor(Road_Cls_ID))) +
  geom_boxplot(outlier.colour = "red", alpha = 0.5) +
  labs(title = "Box Plot of Traffic Volume by Road Class ID",
       x = "Road Class ID",
       y = "Traffic Volume") +
  scale_fill_manual(values = c("maroon", "magenta", "lightsalmon", "indianred4", "seagreen")) +
  theme_minimal()

Boxplot of Traffic Volume by Road Type

ggplot(df_1, aes(x = TrafVol, y = factor(Road_Type_ID))) +
  geom_boxplot(fill = c("peru", "green", "saddlebrown", "tan1", "turquoise"), outlier.colour = "red", alpha = 0.5) +
  labs(title = "Box Plot of Traffic Volume by Road Type",
       x = "Traffic Volume",
       y = "Road Type") +
  theme_minimal()

Histogram

Histogram of Traffic Volume

hist(df_1$TrafVol, xlab = "Traffic Volume", col = "turquoise", 
     main = "Histogram of Traffic Volume",
     ylim = c(0,130))

Heat Map

Heat Map of FHE Collision and Road Class

# creating Contingency Table
con_tab_1 <- xtabs(~ FHE_Collsn_ID + Road_Cls_ID, data = df_1)
print(con_tab_1)
##                                   Road_Cls_ID
## FHE_Collsn_ID                      City street County road Farm to market Other
##   Omv vehicle going straight                58          52             59    25
##   Other                                    101          81             91    32
##   Sd both going straight-rear end           23          17            110    15
##   Sd both going straight-sideswipe          24          16             35    15
##   Sd one straight-one left turn             15          25             79     2
##                                   Road_Cls_ID
## FHE_Collsn_ID                      Us & state highways
##   Omv vehicle going straight                        65
##   Other                                             92
##   Sd both going straight-rear end                  147
##   Sd both going straight-sideswipe                  69
##   Sd one straight-one left turn                     47
# Converting to dataframe
con_df1 <- as.data.frame(con_tab_1)

# Heat Map
ggplot(con_df1, aes(x = FHE_Collsn_ID, y = Road_Cls_ID, fill = Freq)) +
  geom_tile(alpha = 0.5) +  # Set transparency here
  scale_fill_gradient(low = "goldenrod", high = "royalblue") +
  labs(title = "Heat Map of Road Class vs FHE Collision Type",
       x = "FHE Collision Type",
       y = "Road Class",
       fill = "Frequency") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

Heat Map of Road Class and Crash Speed Limit

# Creating Contingency Table
con_tab_2 <- xtabs(~ Crash_Speed_LimitCat + Road_Cls_ID, data = df_1)
print(con_tab_2)
##                     Road_Cls_ID
## Crash_Speed_LimitCat City street County road Farm to market Other
##            > 70 mph            0           1             43    12
##            30-40 mph         171         104             14     7
##            45-60 mph          26          78            234    37
##            65-70 mph           0           3             81     6
##            Other              24           5              2    27
##                     Road_Cls_ID
## Crash_Speed_LimitCat Us & state highways
##            > 70 mph                  147
##            30-40 mph                  40
##            45-60 mph                  96
##            65-70 mph                 133
##            Other                       4
# Converting to dataframe
con_df2 <- as.data.frame(con_tab_2)

# Heat Map
ggplot(con_df2, aes(x = Crash_Speed_LimitCat, y = Road_Cls_ID, fill = Freq)) +
  geom_tile(alpha = 0.5) +  # Set transparency here
  scale_fill_gradient(low = "maroon", high = "royalblue") +
  labs(title = "Heat Map of Road Class vs Crash Speed Limit",
       x = "Crash Speed Limit (mph)",
       y = "Road Class",
       fill = "Frequency") +
  theme_minimal() + 
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

Violin Plot

Violin Plot of Traffic Volume by Road Class

set.seed(42)

# Calculating mean traffic volume by road class
mean_values <- df_1 %>%
  group_by(Road_Cls_ID) %>%
  summarize(mean_TrafVol = mean(TrafVol, na.rm = TRUE))

# Creating the violin plot with a boxplot and jittered points
ggplot(df_1, aes(x = factor(Road_Cls_ID), y = TrafVol, fill = factor(Road_Cls_ID))) +
  geom_violin(trim = FALSE, fill = "goldenrod", color = "red", alpha = 0.8) +
  geom_boxplot(width = 0.1, color = "seagreen", outlier.shape = NA, alpha = 0.6) +  # Boxplot inside the violin
  geom_jitter(width = 0.1, size = 1, alpha = 0.2, color = "blue") + 
  geom_point(data = mean_values, aes(x = factor(Road_Cls_ID), y = mean_TrafVol), 
             color = "black", size = 3, position = position_dodge(width = 0.75)) +
  labs(title = "Violin Plot of Traffic Volume by Road Class",
       x = "Road Class",
       y = "Traffic Volume") +
  theme_minimal()