Homework 1 (Syed Aaqib Javed)

setwd("C:/Users/sonia/OneDrive - Texas State University (1)/My file/course/Fall 24/Artificial Inteligence in Civil Engineering/HW1")

library(readxl)
library(readr)
library(ggplot2)
library(gridExtra)
library(DT)
library(dplyr)
library(ggstatsplot)

Data Type

HW1= read_excel("HW1_Data.xlsx")
dim(HW1)
## [1] 1295   19
names(HW1)
##  [1] "Wthr_Cond_ID"         "Light_Cond_ID"        "Road_Type_ID"        
##  [4] "Road_Algn_ID"         "SurfDry"              "Traffic_Cntl_ID"     
##  [7] "Harm_Evnt_ID"         "Intrsct_Relat_ID"     "FHE_Collsn_ID"       
## [10] "Road_Part_Adj_ID"     "Road_Cls_ID"          "Pop_Group_ID"        
## [13] "Crash_Speed_LimitCat" "Veh_Body_Styl_ID"     "Prsn_Ethnicity_ID"   
## [16] "GenMale"              "TrafVol"              "Prsn_Age"            
## [19] "Prsn_Injry_Sev_ID"
head(HW1)
## # A tibble: 6 × 19
##   Wthr_Cond_ID Light_Cond_ID   Road_Type_ID Road_Algn_ID SurfDry Traffic_Cntl_ID
##   <chr>        <chr>           <chr>        <chr>          <dbl> <chr>          
## 1 Clear        Dark, not ligh… 2 lane, 2 w… Straight, l…       1 Marked lanes   
## 2 Clear        Dark, not ligh… 2 lane, 2 w… Straight, l…       1 Center stripe/…
## 3 Clear        Daylight        2 lane, 2 w… Straight, l…       1 Marked lanes   
## 4 Clear        Daylight        2 lane, 2 w… Straight, l…       1 Center stripe/…
## 5 Clear        Dark, not ligh… 2 lane, 2 w… Straight, g…       1 None           
## 6 Clear        Daylight        Unknown      Straight, l…       1 None           
## # ℹ 13 more variables: Harm_Evnt_ID <chr>, Intrsct_Relat_ID <chr>,
## #   FHE_Collsn_ID <chr>, Road_Part_Adj_ID <chr>, Road_Cls_ID <chr>,
## #   Pop_Group_ID <chr>, Crash_Speed_LimitCat <chr>, Veh_Body_Styl_ID <chr>,
## #   Prsn_Ethnicity_ID <chr>, GenMale <dbl>, TrafVol <dbl>, Prsn_Age <chr>,
## #   Prsn_Injry_Sev_ID <chr>
str(HW1)
## tibble [1,295 × 19] (S3: tbl_df/tbl/data.frame)
##  $ Wthr_Cond_ID        : chr [1:1295] "Clear" "Clear" "Clear" "Clear" ...
##  $ Light_Cond_ID       : chr [1:1295] "Dark, not lighted" "Dark, not lighted" "Daylight" "Daylight" ...
##  $ Road_Type_ID        : chr [1:1295] "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" ...
##  $ Road_Algn_ID        : chr [1:1295] "Straight, level" "Straight, level" "Straight, level" "Straight, level" ...
##  $ SurfDry             : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Traffic_Cntl_ID     : chr [1:1295] "Marked lanes" "Center stripe/divider" "Marked lanes" "Center stripe/divider" ...
##  $ Harm_Evnt_ID        : chr [1:1295] "Motor vehicle in transport" "Motor vehicle in transport" "Motor vehicle in transport" "Fixed object" ...
##  $ Intrsct_Relat_ID    : chr [1:1295] "Non intersection" "Non intersection" "Intersection" "Non intersection" ...
##  $ FHE_Collsn_ID       : chr [1:1295] "Sd both going straight-rear end" "Sd both going straight-rear end" "Other" "Omv vehicle going straight" ...
##  $ Road_Part_Adj_ID    : chr [1:1295] "Main/proper lane" "Main/proper lane" "Main/proper lane" "Main/proper lane" ...
##  $ Road_Cls_ID         : chr [1:1295] "Farm to market" "Us & state highways" "Farm to market" "Us & state highways" ...
##  $ Pop_Group_ID        : chr [1:1295] "10,000 - 24,999 pop" "Rural" "Other" "Rural" ...
##  $ Crash_Speed_LimitCat: chr [1:1295] "30-40 mph" "65-70 mph" "45-60 mph" "65-70 mph" ...
##  $ Veh_Body_Styl_ID    : chr [1:1295] "Farm equipment" "Farm equipment" "Farm equipment" "Farm equipment" ...
##  $ Prsn_Ethnicity_ID   : chr [1:1295] "White" "White" "White" "White" ...
##  $ GenMale             : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
##  $ TrafVol             : num [1:1295] 4919 5109 6003 15761 1649 ...
##  $ Prsn_Age            : chr [1:1295] "25-54 years" "25-54 years" "Other" "25-54 years" ...
##  $ Prsn_Injry_Sev_ID   : chr [1:1295] "O" "O" "O" "O" ...

Descriptive Analysis

library(compareGroups)
res1 <-compareGroups(`Veh_Body_Styl_ID` ~ ., data= HW1, max.ylev = 50, max.xlev = 50,ref=1) ### AVOID SPACING IN R COLUMN NAME
res2= createTable(res1, show.ratio=TRUE)
res2
## 
## --------Summary descriptives table by 'Veh_Body_Styl_ID'---------
## 
## _____________________________________________________________ 
##                                      Farm equipment p.overall 
##                                          N=1295               
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯ 
## Wthr_Cond_ID:                                           .     
##     Clear                             1084 (83.7%)            
##     Cloudy                            146 (11.3%)             
##     Fog                                10 (0.77%)             
##     Other                              10 (0.77%)             
##     Rain                               45 (3.47%)             
## Light_Cond_ID:                                          .     
##     Dark, lighted                      42 (3.24%)             
##     Dark, not lighted                 154 (11.9%)             
##     Daylight                          1040 (80.3%)            
##     Dusk                               39 (3.01%)             
##     Other                              20 (1.54%)             
## Road_Type_ID:                                           .     
##     2 lane, 2 way                     551 (42.5%)             
##     4 or more lanes, divided          185 (14.3%)             
##     4 or more lanes, undivided         96 (7.41%)             
##     Other                              2 (0.15%)              
##     Unknown                           461 (35.6%)             
## Road_Algn_ID:                                           .     
##     Curve, level                       62 (4.79%)             
##     Other                              44 (3.40%)             
##     Straight, grade                   135 (10.4%)             
##     Straight, hillcrest                47 (3.63%)             
##     Straight, level                   1007 (77.8%)            
## SurfDry                               0.91 (0.28)       .     
## Traffic_Cntl_ID:                                        .     
##     Center stripe/divider             306 (23.6%)             
##     Marked lanes                      389 (30.0%)             
##     No passing zone                    94 (7.26%)             
##     None                              296 (22.9%)             
##     Other                             210 (16.2%)             
## Harm_Evnt_ID:                                           .     
##     Fixed object                      144 (11.1%)             
##     Motor vehicle in transport        1006 (77.7%)            
##     Other                              32 (2.47%)             
##     Overturned                         26 (2.01%)             
##     Parked car                         87 (6.72%)             
## Intrsct_Relat_ID:                                       .     
##     Driveway access                   145 (11.2%)             
##     Intersection                      170 (13.1%)             
##     Intersection related              112 (8.65%)             
##     Non intersection                  868 (67.0%)             
## FHE_Collsn_ID:                                          .     
##     Omv vehicle going straight        259 (20.0%)             
##     Other                             397 (30.7%)             
##     Sd both going straight-rear end   312 (24.1%)             
##     Sd both going straight-sideswipe  159 (12.3%)             
##     Sd one straight-one left turn     168 (13.0%)             
## Road_Part_Adj_ID:                                       .     
##     Exit/off ramp                      4 (0.31%)              
##     Main/proper lane                  1225 (94.6%)            
##     Other                              4 (0.31%)              
##     Other (explain in narrative)       30 (2.32%)             
##     Unknown                            32 (2.47%)             
## Road_Cls_ID:                                            .     
##     City street                       221 (17.1%)             
##     County road                       191 (14.7%)             
##     Farm to market                    374 (28.9%)             
##     Other                              89 (6.87%)             
##     Us & state highways               420 (32.4%)             
## Pop_Group_ID:                                           .     
##     10,000 - 24,999 pop                63 (4.86%)             
##     250,000 pop. And over             117 (9.03%)             
##     Other                             193 (14.9%)             
##     Rural                             866 (66.9%)             
##     Town under 2,499 pop.              56 (4.32%)             
## Crash_Speed_LimitCat:                                   .     
##     > 70 mph                          203 (15.7%)             
##     30-40 mph                         336 (25.9%)             
##     45-60 mph                         471 (36.4%)             
##     65-70 mph                         223 (17.2%)             
##     Other                              62 (4.79%)             
## Prsn_Ethnicity_ID:                                      .     
##     Black                              63 (4.86%)             
##     Hispanic                          422 (32.6%)             
##     Other                              39 (3.01%)             
##     Unknown                            83 (6.41%)             
##     White                             688 (53.1%)             
## GenMale                               0.88 (0.32)       .     
## TrafVol                               14502 (8222)      .     
## Prsn_Age:                                               .     
##     15-24 years                       188 (14.5%)             
##     25-54 years                       572 (44.2%)             
##     55-64 years                       232 (17.9%)             
##     65-74 years                       118 (9.11%)             
##     Other                             185 (14.3%)             
## Prsn_Injry_Sev_ID:                                      .     
##     BC                                120 (9.27%)             
##     KA                                 50 (3.86%)             
##     O                                 1125 (86.9%)            
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
export2xls(res2, file='HW1DS.xlsx')

library(skimr)
skim(HW1)
Data summary
Name HW1
Number of rows 1295
Number of columns 19
_______________________
Column type frequency:
character 16
numeric 3
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
Wthr_Cond_ID 0 1 3 6 0 5 0
Light_Cond_ID 0 1 4 17 0 5 0
Road_Type_ID 0 1 5 26 0 5 0
Road_Algn_ID 0 1 5 19 0 5 0
Traffic_Cntl_ID 0 1 4 21 0 5 0
Harm_Evnt_ID 0 1 5 26 0 5 0
Intrsct_Relat_ID 0 1 12 20 0 4 0
FHE_Collsn_ID 0 1 5 32 0 5 0
Road_Part_Adj_ID 0 1 5 28 0 5 0
Road_Cls_ID 0 1 5 19 0 5 0
Pop_Group_ID 0 1 5 21 0 5 0
Crash_Speed_LimitCat 0 1 5 9 0 5 0
Veh_Body_Styl_ID 0 1 14 14 0 1 0
Prsn_Ethnicity_ID 0 1 5 8 0 5 0
Prsn_Age 0 1 5 11 0 5 0
Prsn_Injry_Sev_ID 0 1 1 2 0 3 0

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SurfDry 0 1 0.91 0.28 0 1 1 1 1 ▁▁▁▁▇
GenMale 0 1 0.88 0.32 0 1 1 1 1 ▁▁▁▁▇
TrafVol 0 1 14501.90 8222.31 243 7336 14822 21460 28996 ▇▇▇▇▇

Data Visualizations

print("The data visualizations for the variables are shown below.")
## [1] "The data visualizations for the variables are shown below."

Traffic Volume Histogram

# Create a histogram of Traffic Volume (TrafVol)
ggplot(HW1, aes(x = TrafVol)) + 
  geom_histogram(binwidth = 5000, fill = "lightgreen", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Traffic Volume", x = "Traffic Volume", y = "Count") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5),  # Center the title
    axis.title.x = element_text(size = 14),  # Set x-axis title font size
    axis.title.y = element_text(size = 14),  # Set y-axis title font size
    axis.text.x = element_text(size = 12),   # Set x-axis text font size
    axis.text.y = element_text(size = 12)    # Set y-axis text font size
  )

Weather, lighting condition and Road Type

# Create a vertical bar plot for Weather Condition
plot1 <- ggplot(HW1, aes(x = Wthr_Cond_ID, fill = Wthr_Cond_ID)) + 
  geom_bar(width = 0.6) +
  labs(title = "Weather Condition", x = "Weather Condition", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 60, hjust = 1),
    axis.text.y = element_text(size = 10),
    legend.position = "none"   # Hide legend for a cleaner look
  )

# Create a vertical bar plot for Light Condition
plot2 <- ggplot(HW1, aes(x = Light_Cond_ID, fill = Light_Cond_ID)) + 
  geom_bar(width = 0.6) +
  labs(title = "Light Condition", x = "Light Condition", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 60, hjust = 1),  # Slightly more rotated for clarity
    axis.text.y = element_text(size = 10),
    legend.position = "none"
  )

# Create a vertical bar plot for Road Type
plot3 <- ggplot(HW1, aes(x = Road_Type_ID, fill = Road_Type_ID)) + 
  geom_bar(width = 0.6) +
  labs(title = "Road Type", x = "Road Type", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 60, hjust = 1),
    axis.text.y = element_text(size = 10),
    legend.position = "none"
  )

# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Road Alignment, Traffic Control and Harmful Event

# Create a vertical bar plot for Road Alignment
plot1 <- ggplot(HW1, aes(x = Road_Algn_ID)) + 
  geom_bar(width = 0.6, fill = "dodgerblue") +  # Set color for the first plot
  labs(title = "Road Alignment", x = "Road Alignment", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 1200)

# Create a vertical bar plot for Traffic Control
plot2 <- ggplot(HW1, aes(x = Traffic_Cntl_ID)) + 
  geom_bar(width = 0.6, fill = "seagreen") +  # Set color for the second plot
  labs(title = "Traffic Control", x = "Traffic Control", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 500)

# Create a vertical bar plot for Harmful Event
plot3 <- ggplot(HW1, aes(x = Harm_Evnt_ID)) + 
  geom_bar(width = 0.6, fill = "coral") +  # Set color for the third plot
  labs(title = "Harmful Event", x = "Harmful Event", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 1200)

# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Intersection Relation, Frist Harmful Event and Road part Adjacent

# Create a vertical bar plot for Intersection Relation
plot1 <- ggplot(HW1, aes(x = Intrsct_Relat_ID)) + 
  geom_bar(width = 0.6, fill = "steelblue") + 
  labs(title = "Intersection Relation", x = "Intersection Relation", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 1000)

# Create a vertical bar plot for First Harmful Event Collision
plot2 <- ggplot(HW1, aes(x = FHE_Collsn_ID)) + 
  geom_bar(width = 0.6, fill = "seagreen") +
  labs(title = "First Harmful Event Collision", x = "Collision Type", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 500)

# Create a vertical bar plot for Road Part Adjacent with logarithmic scale
plot3 <- ggplot(HW1, aes(x = Road_Part_Adj_ID)) + 
  geom_bar(width = 0.6, fill = "coral") +
  labs(title = "Road Part Adjacent", x = "Road Part Adjacent", y = "Frequency (Log Scale)") +
  scale_y_continuous(trans = 'log10') +  # Apply logarithmic scale to the y-axis
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  )

# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Road Class, Population Group and Speed Limit

# Create a vertical bar plot for Road Class
plot1 <- ggplot(HW1, aes(x = Road_Cls_ID)) + 
  geom_bar(width = 0.6, fill = "steelblue") +
  labs(title = "Road Class", x = "Road Class", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 500)

# Create a vertical bar plot for Population Group
plot2 <- ggplot(HW1, aes(x = Pop_Group_ID)) + 
  geom_bar(width = 0.6, fill = "seagreen") +
  labs(title = "Population Group", x = "Population Group", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 1000)

# Create a vertical bar plot for Crash Speed Limit Category
plot3 <- ggplot(HW1, aes(x = Crash_Speed_LimitCat)) + 
  geom_bar(width = 0.6, fill = "coral") +
  labs(title = "Crash Speed Limit ", x = "Speed Limit Category", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 500)

# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Person Ethnicity, Person Age and Person Injury Severity

# Create a vertical bar plot for Person Ethnicity
plot1 <- ggplot(HW1, aes(x = Prsn_Ethnicity_ID)) + 
  geom_bar(width = 0.6, fill = "steelblue") +  
  labs(title = "Person Ethnicity", x = "Ethnicity", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 500)

# Create a vertical bar plot for Person Age
plot2 <- ggplot(HW1, aes(x = Prsn_Age)) + 
  geom_bar(width = 0.6, fill = "seagreen") +  
  labs(title = "Person Age", x = "Age", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 600)

# Create a vertical bar plot for Person Injury Severity with adjusted title
plot3 <- ggplot(HW1, aes(x = Prsn_Injry_Sev_ID)) + 
  geom_bar(width = 0.6, fill = "coral") +  
  labs(title = "Person Injury\nSeverity", x = "Injury Severity", y = "Frequency") +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10, angle = 45, hjust = 1), 
    axis.text.y = element_text(size = 10)
  ) +
  ylim(0, 1250)

# Arrange the three plots side by side with adjusted spacing
grid.arrange(plot1, plot2, plot3, ncol = 3)

Gender and Surface Condition

# Create the plot for Gender
plot1 <- ggplot(HW1, aes(x = as.factor(GenMale))) + 
  geom_bar(fill = "steelblue") +  
  labs(title = "Gender", x = "Gender (Male = 1, Female = 0)", y = "Frequency") +
  scale_x_discrete(breaks = c("0", "1"), labels = c("Female", "Male")) +
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10), 
    axis.text.y = element_text(size = 10)
  )

# Create the plot for Surface Condition
plot2 <- ggplot(HW1, aes(x = as.factor(SurfDry))) + 
  geom_bar(fill = "seagreen") +  
  labs(title = "Surface Condition", x = "Surface Condition (Dry = 1, Wet = 0)", y = "Frequency") +
  scale_x_discrete(breaks = c("0", "1"), labels = c("Wet", "Dry")) +  # Set labels for x-axis
  theme_minimal() +
  theme(
    plot.title = element_text(hjust = 0.5, size = 14), 
    axis.title.x = element_text(size = 12), 
    axis.title.y = element_text(size = 12), 
    axis.text.x = element_text(size = 10), 
    axis.text.y = element_text(size = 10)
  )

# Arrange the two plots side by side
grid.arrange(plot1, plot2, ncol = 2)

Visual Exploration of Variable Relationships

# Plot 1: Scatter Plot of Traffic Volume vs. Person Age Faceted by Weather and Light Conditions
ggplot(HW1, aes(x = Prsn_Age, y = TrafVol)) +
  geom_point(alpha = 0.7, color = "blue") +
  facet_grid(Wthr_Cond_ID ~ Light_Cond_ID) +
  scale_y_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
  labs(
    title = "Traffic Volume vs. Person Age",
    x = "Person Age",
    y = "Traffic Volume"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10),
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Plot 2: Scatter Plot of Traffic Volume vs. First Harmful Event Collision Faceted by Intersection and Road Class
ggplot(HW1, aes(x = FHE_Collsn_ID, y = TrafVol)) +
  geom_point(alpha = 0.7, color = "red") +
  facet_grid(Intrsct_Relat_ID ~ Road_Cls_ID) +
  scale_y_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
  labs(
    title = "Traffic Volume vs. First Harmful Event Collision",
    x = "First Harmful Event Collision",
    y = "Traffic Volume"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10),
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Plot 3: Histogram of Traffic Volume Faceted by Injury Severity and Road Type
ggplot(HW1, aes(x = TrafVol)) +
  geom_histogram(binwidth = 5000, fill = "grey", color = "black") +
  facet_grid(Prsn_Injry_Sev_ID ~ Road_Type_ID) +
  scale_x_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
  labs(
    title = "Distribution of Traffic Volume by Injury Severity and Road Type",
    x = "Traffic Volume",
    y = "Count"
  ) +
  theme_minimal() +
  theme(
    strip.text = element_text(size = 10),
    plot.title = element_text(hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1)
  )

# Plot 4: Scatter Plot of Traffic Volume vs. Person Age with color indicating Injury Severity
ggplot(HW1, aes(x = Prsn_Age, y = TrafVol, color = Prsn_Injry_Sev_ID)) +
  geom_point(alpha = 0.7) +
  scale_color_manual(values = c("blue", "red", "green")) +
  labs(
    title = "Traffic Volume vs. Person Age by Injury Severity",
    x = "Person Age",
    y = "Traffic Volume",
    color = "Injury Severity"
  ) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

# Plot 5: Boxplot of Traffic Volume by Weather Condition
ggplot(HW1, aes(x = as.factor(Wthr_Cond_ID), y = TrafVol)) + 
  geom_boxplot(fill = "lightblue") +
  labs(title = "Traffic Volume by Weather Condition", x = "Weather Condition", y = "Traffic Volume") +
  theme_minimal() + theme(plot.title = element_text(hjust = 0.5))

# Create a frequency table for Crash Speed Limit and Road Type
heatmap_data <- as.data.frame(table(HW1$Crash_Speed_LimitCat, HW1$Road_Type_ID))

# Plot 6: heatmap for Road Type and Crash Speed Limit
ggplot(heatmap_data, aes(Var1, Var2, fill = Freq)) + 
  geom_tile() + 
  scale_fill_gradient(low = "lightgreen", high = "darkgreen") +
  labs(title = "Heatmap: Crash Speed Limit vs Road Type", 
       x = "Crash Speed Limit", 
       y = "Road Type", 
       fill = "Count") +
  theme_minimal() + theme(plot.title = element_text(hjust = 0.5))

# Plot 7: Violin Plot of Injury Severity Vs Traffic Volume

ggbetweenstats(
  data  = HW1,
  x     = Prsn_Injry_Sev_ID,
  y     = TrafVol,
  pairwise.comparisons = TRUE,
  type = "parametric",
  palette = "Dark2"
) + 
  theme_bw(base_size = 16) +
  labs(title = "Injury Severity Vs Traffic Volume",
       x = "Injury Severity",
       y = "Traffic Volume") +
  theme(
    plot.title = element_text(hjust = 0.5, margin = margin(b = 20))
  )