setwd("C:/Users/sonia/OneDrive - Texas State University (1)/My file/course/Fall 24/Artificial Inteligence in Civil Engineering/HW1")
library(readxl)
library(readr)
library(ggplot2)
library(gridExtra)
library(DT)
library(dplyr)
library(ggstatsplot)
Data Type
HW1= read_excel("HW1_Data.xlsx")
dim(HW1)
## [1] 1295 19
## [1] "Wthr_Cond_ID" "Light_Cond_ID" "Road_Type_ID"
## [4] "Road_Algn_ID" "SurfDry" "Traffic_Cntl_ID"
## [7] "Harm_Evnt_ID" "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [10] "Road_Part_Adj_ID" "Road_Cls_ID" "Pop_Group_ID"
## [13] "Crash_Speed_LimitCat" "Veh_Body_Styl_ID" "Prsn_Ethnicity_ID"
## [16] "GenMale" "TrafVol" "Prsn_Age"
## [19] "Prsn_Injry_Sev_ID"
## # A tibble: 6 × 19
## Wthr_Cond_ID Light_Cond_ID Road_Type_ID Road_Algn_ID SurfDry Traffic_Cntl_ID
## <chr> <chr> <chr> <chr> <dbl> <chr>
## 1 Clear Dark, not ligh… 2 lane, 2 w… Straight, l… 1 Marked lanes
## 2 Clear Dark, not ligh… 2 lane, 2 w… Straight, l… 1 Center stripe/…
## 3 Clear Daylight 2 lane, 2 w… Straight, l… 1 Marked lanes
## 4 Clear Daylight 2 lane, 2 w… Straight, l… 1 Center stripe/…
## 5 Clear Dark, not ligh… 2 lane, 2 w… Straight, g… 1 None
## 6 Clear Daylight Unknown Straight, l… 1 None
## # ℹ 13 more variables: Harm_Evnt_ID <chr>, Intrsct_Relat_ID <chr>,
## # FHE_Collsn_ID <chr>, Road_Part_Adj_ID <chr>, Road_Cls_ID <chr>,
## # Pop_Group_ID <chr>, Crash_Speed_LimitCat <chr>, Veh_Body_Styl_ID <chr>,
## # Prsn_Ethnicity_ID <chr>, GenMale <dbl>, TrafVol <dbl>, Prsn_Age <chr>,
## # Prsn_Injry_Sev_ID <chr>
## tibble [1,295 × 19] (S3: tbl_df/tbl/data.frame)
## $ Wthr_Cond_ID : chr [1:1295] "Clear" "Clear" "Clear" "Clear" ...
## $ Light_Cond_ID : chr [1:1295] "Dark, not lighted" "Dark, not lighted" "Daylight" "Daylight" ...
## $ Road_Type_ID : chr [1:1295] "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" ...
## $ Road_Algn_ID : chr [1:1295] "Straight, level" "Straight, level" "Straight, level" "Straight, level" ...
## $ SurfDry : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
## $ Traffic_Cntl_ID : chr [1:1295] "Marked lanes" "Center stripe/divider" "Marked lanes" "Center stripe/divider" ...
## $ Harm_Evnt_ID : chr [1:1295] "Motor vehicle in transport" "Motor vehicle in transport" "Motor vehicle in transport" "Fixed object" ...
## $ Intrsct_Relat_ID : chr [1:1295] "Non intersection" "Non intersection" "Intersection" "Non intersection" ...
## $ FHE_Collsn_ID : chr [1:1295] "Sd both going straight-rear end" "Sd both going straight-rear end" "Other" "Omv vehicle going straight" ...
## $ Road_Part_Adj_ID : chr [1:1295] "Main/proper lane" "Main/proper lane" "Main/proper lane" "Main/proper lane" ...
## $ Road_Cls_ID : chr [1:1295] "Farm to market" "Us & state highways" "Farm to market" "Us & state highways" ...
## $ Pop_Group_ID : chr [1:1295] "10,000 - 24,999 pop" "Rural" "Other" "Rural" ...
## $ Crash_Speed_LimitCat: chr [1:1295] "30-40 mph" "65-70 mph" "45-60 mph" "65-70 mph" ...
## $ Veh_Body_Styl_ID : chr [1:1295] "Farm equipment" "Farm equipment" "Farm equipment" "Farm equipment" ...
## $ Prsn_Ethnicity_ID : chr [1:1295] "White" "White" "White" "White" ...
## $ GenMale : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
## $ TrafVol : num [1:1295] 4919 5109 6003 15761 1649 ...
## $ Prsn_Age : chr [1:1295] "25-54 years" "25-54 years" "Other" "25-54 years" ...
## $ Prsn_Injry_Sev_ID : chr [1:1295] "O" "O" "O" "O" ...
Descriptive Analysis
library(compareGroups)
res1 <-compareGroups(`Veh_Body_Styl_ID` ~ ., data= HW1, max.ylev = 50, max.xlev = 50,ref=1) ### AVOID SPACING IN R COLUMN NAME
res2= createTable(res1, show.ratio=TRUE)
res2
##
## --------Summary descriptives table by 'Veh_Body_Styl_ID'---------
##
## _____________________________________________________________
## Farm equipment p.overall
## N=1295
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
## Wthr_Cond_ID: .
## Clear 1084 (83.7%)
## Cloudy 146 (11.3%)
## Fog 10 (0.77%)
## Other 10 (0.77%)
## Rain 45 (3.47%)
## Light_Cond_ID: .
## Dark, lighted 42 (3.24%)
## Dark, not lighted 154 (11.9%)
## Daylight 1040 (80.3%)
## Dusk 39 (3.01%)
## Other 20 (1.54%)
## Road_Type_ID: .
## 2 lane, 2 way 551 (42.5%)
## 4 or more lanes, divided 185 (14.3%)
## 4 or more lanes, undivided 96 (7.41%)
## Other 2 (0.15%)
## Unknown 461 (35.6%)
## Road_Algn_ID: .
## Curve, level 62 (4.79%)
## Other 44 (3.40%)
## Straight, grade 135 (10.4%)
## Straight, hillcrest 47 (3.63%)
## Straight, level 1007 (77.8%)
## SurfDry 0.91 (0.28) .
## Traffic_Cntl_ID: .
## Center stripe/divider 306 (23.6%)
## Marked lanes 389 (30.0%)
## No passing zone 94 (7.26%)
## None 296 (22.9%)
## Other 210 (16.2%)
## Harm_Evnt_ID: .
## Fixed object 144 (11.1%)
## Motor vehicle in transport 1006 (77.7%)
## Other 32 (2.47%)
## Overturned 26 (2.01%)
## Parked car 87 (6.72%)
## Intrsct_Relat_ID: .
## Driveway access 145 (11.2%)
## Intersection 170 (13.1%)
## Intersection related 112 (8.65%)
## Non intersection 868 (67.0%)
## FHE_Collsn_ID: .
## Omv vehicle going straight 259 (20.0%)
## Other 397 (30.7%)
## Sd both going straight-rear end 312 (24.1%)
## Sd both going straight-sideswipe 159 (12.3%)
## Sd one straight-one left turn 168 (13.0%)
## Road_Part_Adj_ID: .
## Exit/off ramp 4 (0.31%)
## Main/proper lane 1225 (94.6%)
## Other 4 (0.31%)
## Other (explain in narrative) 30 (2.32%)
## Unknown 32 (2.47%)
## Road_Cls_ID: .
## City street 221 (17.1%)
## County road 191 (14.7%)
## Farm to market 374 (28.9%)
## Other 89 (6.87%)
## Us & state highways 420 (32.4%)
## Pop_Group_ID: .
## 10,000 - 24,999 pop 63 (4.86%)
## 250,000 pop. And over 117 (9.03%)
## Other 193 (14.9%)
## Rural 866 (66.9%)
## Town under 2,499 pop. 56 (4.32%)
## Crash_Speed_LimitCat: .
## > 70 mph 203 (15.7%)
## 30-40 mph 336 (25.9%)
## 45-60 mph 471 (36.4%)
## 65-70 mph 223 (17.2%)
## Other 62 (4.79%)
## Prsn_Ethnicity_ID: .
## Black 63 (4.86%)
## Hispanic 422 (32.6%)
## Other 39 (3.01%)
## Unknown 83 (6.41%)
## White 688 (53.1%)
## GenMale 0.88 (0.32) .
## TrafVol 14502 (8222) .
## Prsn_Age: .
## 15-24 years 188 (14.5%)
## 25-54 years 572 (44.2%)
## 55-64 years 232 (17.9%)
## 65-74 years 118 (9.11%)
## Other 185 (14.3%)
## Prsn_Injry_Sev_ID: .
## BC 120 (9.27%)
## KA 50 (3.86%)
## O 1125 (86.9%)
## ¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
export2xls(res2, file='HW1DS.xlsx')
library(skimr)
skim(HW1)
Data summary
| Name |
HW1 |
| Number of rows |
1295 |
| Number of columns |
19 |
| _______________________ |
|
| Column type frequency: |
|
| character |
16 |
| numeric |
3 |
| ________________________ |
|
| Group variables |
None |
Variable type: character
| Wthr_Cond_ID |
0 |
1 |
3 |
6 |
0 |
5 |
0 |
| Light_Cond_ID |
0 |
1 |
4 |
17 |
0 |
5 |
0 |
| Road_Type_ID |
0 |
1 |
5 |
26 |
0 |
5 |
0 |
| Road_Algn_ID |
0 |
1 |
5 |
19 |
0 |
5 |
0 |
| Traffic_Cntl_ID |
0 |
1 |
4 |
21 |
0 |
5 |
0 |
| Harm_Evnt_ID |
0 |
1 |
5 |
26 |
0 |
5 |
0 |
| Intrsct_Relat_ID |
0 |
1 |
12 |
20 |
0 |
4 |
0 |
| FHE_Collsn_ID |
0 |
1 |
5 |
32 |
0 |
5 |
0 |
| Road_Part_Adj_ID |
0 |
1 |
5 |
28 |
0 |
5 |
0 |
| Road_Cls_ID |
0 |
1 |
5 |
19 |
0 |
5 |
0 |
| Pop_Group_ID |
0 |
1 |
5 |
21 |
0 |
5 |
0 |
| Crash_Speed_LimitCat |
0 |
1 |
5 |
9 |
0 |
5 |
0 |
| Veh_Body_Styl_ID |
0 |
1 |
14 |
14 |
0 |
1 |
0 |
| Prsn_Ethnicity_ID |
0 |
1 |
5 |
8 |
0 |
5 |
0 |
| Prsn_Age |
0 |
1 |
5 |
11 |
0 |
5 |
0 |
| Prsn_Injry_Sev_ID |
0 |
1 |
1 |
2 |
0 |
3 |
0 |
Variable type: numeric
| SurfDry |
0 |
1 |
0.91 |
0.28 |
0 |
1 |
1 |
1 |
1 |
▁▁▁▁▇ |
| GenMale |
0 |
1 |
0.88 |
0.32 |
0 |
1 |
1 |
1 |
1 |
▁▁▁▁▇ |
| TrafVol |
0 |
1 |
14501.90 |
8222.31 |
243 |
7336 |
14822 |
21460 |
28996 |
▇▇▇▇▇ |
Data Visualizations
print("The data visualizations for the variables are shown below.")
## [1] "The data visualizations for the variables are shown below."
Traffic Volume Histogram
# Create a histogram of Traffic Volume (TrafVol)
ggplot(HW1, aes(x = TrafVol)) +
geom_histogram(binwidth = 5000, fill = "lightgreen", color = "black", alpha = 0.7) +
labs(title = "Distribution of Traffic Volume", x = "Traffic Volume", y = "Count") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5), # Center the title
axis.title.x = element_text(size = 14), # Set x-axis title font size
axis.title.y = element_text(size = 14), # Set y-axis title font size
axis.text.x = element_text(size = 12), # Set x-axis text font size
axis.text.y = element_text(size = 12) # Set y-axis text font size
)

Weather, lighting condition and Road Type
# Create a vertical bar plot for Weather Condition
plot1 <- ggplot(HW1, aes(x = Wthr_Cond_ID, fill = Wthr_Cond_ID)) +
geom_bar(width = 0.6) +
labs(title = "Weather Condition", x = "Weather Condition", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 60, hjust = 1),
axis.text.y = element_text(size = 10),
legend.position = "none" # Hide legend for a cleaner look
)
# Create a vertical bar plot for Light Condition
plot2 <- ggplot(HW1, aes(x = Light_Cond_ID, fill = Light_Cond_ID)) +
geom_bar(width = 0.6) +
labs(title = "Light Condition", x = "Light Condition", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 60, hjust = 1), # Slightly more rotated for clarity
axis.text.y = element_text(size = 10),
legend.position = "none"
)
# Create a vertical bar plot for Road Type
plot3 <- ggplot(HW1, aes(x = Road_Type_ID, fill = Road_Type_ID)) +
geom_bar(width = 0.6) +
labs(title = "Road Type", x = "Road Type", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 60, hjust = 1),
axis.text.y = element_text(size = 10),
legend.position = "none"
)
# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Road Alignment, Traffic Control and Harmful Event
# Create a vertical bar plot for Road Alignment
plot1 <- ggplot(HW1, aes(x = Road_Algn_ID)) +
geom_bar(width = 0.6, fill = "dodgerblue") + # Set color for the first plot
labs(title = "Road Alignment", x = "Road Alignment", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 1200)
# Create a vertical bar plot for Traffic Control
plot2 <- ggplot(HW1, aes(x = Traffic_Cntl_ID)) +
geom_bar(width = 0.6, fill = "seagreen") + # Set color for the second plot
labs(title = "Traffic Control", x = "Traffic Control", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 500)
# Create a vertical bar plot for Harmful Event
plot3 <- ggplot(HW1, aes(x = Harm_Evnt_ID)) +
geom_bar(width = 0.6, fill = "coral") + # Set color for the third plot
labs(title = "Harmful Event", x = "Harmful Event", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 1200)
# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Intersection Relation, Frist Harmful Event and Road part
Adjacent
# Create a vertical bar plot for Intersection Relation
plot1 <- ggplot(HW1, aes(x = Intrsct_Relat_ID)) +
geom_bar(width = 0.6, fill = "steelblue") +
labs(title = "Intersection Relation", x = "Intersection Relation", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 1000)
# Create a vertical bar plot for First Harmful Event Collision
plot2 <- ggplot(HW1, aes(x = FHE_Collsn_ID)) +
geom_bar(width = 0.6, fill = "seagreen") +
labs(title = "First Harmful Event Collision", x = "Collision Type", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 500)
# Create a vertical bar plot for Road Part Adjacent with logarithmic scale
plot3 <- ggplot(HW1, aes(x = Road_Part_Adj_ID)) +
geom_bar(width = 0.6, fill = "coral") +
labs(title = "Road Part Adjacent", x = "Road Part Adjacent", y = "Frequency (Log Scale)") +
scale_y_continuous(trans = 'log10') + # Apply logarithmic scale to the y-axis
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
)
# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Road Class, Population Group and Speed Limit
# Create a vertical bar plot for Road Class
plot1 <- ggplot(HW1, aes(x = Road_Cls_ID)) +
geom_bar(width = 0.6, fill = "steelblue") +
labs(title = "Road Class", x = "Road Class", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 500)
# Create a vertical bar plot for Population Group
plot2 <- ggplot(HW1, aes(x = Pop_Group_ID)) +
geom_bar(width = 0.6, fill = "seagreen") +
labs(title = "Population Group", x = "Population Group", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 1000)
# Create a vertical bar plot for Crash Speed Limit Category
plot3 <- ggplot(HW1, aes(x = Crash_Speed_LimitCat)) +
geom_bar(width = 0.6, fill = "coral") +
labs(title = "Crash Speed Limit ", x = "Speed Limit Category", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 500)
# Arrange the three plots side by side
grid.arrange(plot1, plot2, plot3, ncol = 3)

Person Ethnicity, Person Age and Person Injury Severity
# Create a vertical bar plot for Person Ethnicity
plot1 <- ggplot(HW1, aes(x = Prsn_Ethnicity_ID)) +
geom_bar(width = 0.6, fill = "steelblue") +
labs(title = "Person Ethnicity", x = "Ethnicity", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 500)
# Create a vertical bar plot for Person Age
plot2 <- ggplot(HW1, aes(x = Prsn_Age)) +
geom_bar(width = 0.6, fill = "seagreen") +
labs(title = "Person Age", x = "Age", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 600)
# Create a vertical bar plot for Person Injury Severity with adjusted title
plot3 <- ggplot(HW1, aes(x = Prsn_Injry_Sev_ID)) +
geom_bar(width = 0.6, fill = "coral") +
labs(title = "Person Injury\nSeverity", x = "Injury Severity", y = "Frequency") +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10, angle = 45, hjust = 1),
axis.text.y = element_text(size = 10)
) +
ylim(0, 1250)
# Arrange the three plots side by side with adjusted spacing
grid.arrange(plot1, plot2, plot3, ncol = 3)

Gender and Surface Condition
# Create the plot for Gender
plot1 <- ggplot(HW1, aes(x = as.factor(GenMale))) +
geom_bar(fill = "steelblue") +
labs(title = "Gender", x = "Gender (Male = 1, Female = 0)", y = "Frequency") +
scale_x_discrete(breaks = c("0", "1"), labels = c("Female", "Male")) +
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10)
)
# Create the plot for Surface Condition
plot2 <- ggplot(HW1, aes(x = as.factor(SurfDry))) +
geom_bar(fill = "seagreen") +
labs(title = "Surface Condition", x = "Surface Condition (Dry = 1, Wet = 0)", y = "Frequency") +
scale_x_discrete(breaks = c("0", "1"), labels = c("Wet", "Dry")) + # Set labels for x-axis
theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5, size = 14),
axis.title.x = element_text(size = 12),
axis.title.y = element_text(size = 12),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10)
)
# Arrange the two plots side by side
grid.arrange(plot1, plot2, ncol = 2)

Visual Exploration of Variable Relationships
# Plot 1: Scatter Plot of Traffic Volume vs. Person Age Faceted by Weather and Light Conditions
ggplot(HW1, aes(x = Prsn_Age, y = TrafVol)) +
geom_point(alpha = 0.7, color = "blue") +
facet_grid(Wthr_Cond_ID ~ Light_Cond_ID) +
scale_y_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
labs(
title = "Traffic Volume vs. Person Age",
x = "Person Age",
y = "Traffic Volume"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
)

# Plot 2: Scatter Plot of Traffic Volume vs. First Harmful Event Collision Faceted by Intersection and Road Class
ggplot(HW1, aes(x = FHE_Collsn_ID, y = TrafVol)) +
geom_point(alpha = 0.7, color = "red") +
facet_grid(Intrsct_Relat_ID ~ Road_Cls_ID) +
scale_y_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
labs(
title = "Traffic Volume vs. First Harmful Event Collision",
x = "First Harmful Event Collision",
y = "Traffic Volume"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
)

# Plot 3: Histogram of Traffic Volume Faceted by Injury Severity and Road Type
ggplot(HW1, aes(x = TrafVol)) +
geom_histogram(binwidth = 5000, fill = "grey", color = "black") +
facet_grid(Prsn_Injry_Sev_ID ~ Road_Type_ID) +
scale_x_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
labs(
title = "Distribution of Traffic Volume by Injury Severity and Road Type",
x = "Traffic Volume",
y = "Count"
) +
theme_minimal() +
theme(
strip.text = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
axis.text.x = element_text(angle = 45, hjust = 1)
)

# Plot 4: Scatter Plot of Traffic Volume vs. Person Age with color indicating Injury Severity
ggplot(HW1, aes(x = Prsn_Age, y = TrafVol, color = Prsn_Injry_Sev_ID)) +
geom_point(alpha = 0.7) +
scale_color_manual(values = c("blue", "red", "green")) +
labs(
title = "Traffic Volume vs. Person Age by Injury Severity",
x = "Person Age",
y = "Traffic Volume",
color = "Injury Severity"
) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))

# Plot 5: Boxplot of Traffic Volume by Weather Condition
ggplot(HW1, aes(x = as.factor(Wthr_Cond_ID), y = TrafVol)) +
geom_boxplot(fill = "lightblue") +
labs(title = "Traffic Volume by Weather Condition", x = "Weather Condition", y = "Traffic Volume") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))

# Create a frequency table for Crash Speed Limit and Road Type
heatmap_data <- as.data.frame(table(HW1$Crash_Speed_LimitCat, HW1$Road_Type_ID))
# Plot 6: heatmap for Road Type and Crash Speed Limit
ggplot(heatmap_data, aes(Var1, Var2, fill = Freq)) +
geom_tile() +
scale_fill_gradient(low = "lightgreen", high = "darkgreen") +
labs(title = "Heatmap: Crash Speed Limit vs Road Type",
x = "Crash Speed Limit",
y = "Road Type",
fill = "Count") +
theme_minimal() + theme(plot.title = element_text(hjust = 0.5))

# Plot 7: Violin Plot of Injury Severity Vs Traffic Volume
ggbetweenstats(
data = HW1,
x = Prsn_Injry_Sev_ID,
y = TrafVol,
pairwise.comparisons = TRUE,
type = "parametric",
palette = "Dark2"
) +
theme_bw(base_size = 16) +
labs(title = "Injury Severity Vs Traffic Volume",
x = "Injury Severity",
y = "Traffic Volume") +
theme(
plot.title = element_text(hjust = 0.5, margin = margin(b = 20))
)
