Surface Conditon
ggplot(data, aes(x = factor(SurfDry))) +
geom_bar(fill = "magenta" , width = .5, alpha = .5) +
labs(title = "Distribution of Road Surface Conditions",
x = "Surface Condition (1 = Dry, 0 = Not Dry)",
y = "Count") +
theme_minimal()

Frequency Polygon of Traffic Volume
ggplot(data, aes(x = TrafVol)) +
geom_freqpoly(binwidth = 1000, color = "black", size = 1.5) +
labs(title = "Frequency Polygon of Traffic Volume", x = "Traffic Volume", y = "Frequency")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Histogram and Frequency Polygon for Traffic Volume
ggplot(data, aes(x = TrafVol)) +
# Histogram
geom_histogram(aes(y = ..density..), bins = 30, fill = "lightblue", alpha = 0.6, color = "black") +
# Density curve (frequency polygon)
geom_density(aes(y = ..density..), color = "red", size = 1) +
labs(title = "Histogram and Frequency Polygon for Traffic Volume", x = "Traffic Volumes", y = "Frequency") +
theme_minimal()
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Boxplot of Traffic Volume by Age Group
ggplot(data, aes(x = Prsn_Age, y = TrafVol , fill = Prsn_Age)) +
geom_boxplot(alpha=0.3) +
scale_fill_manual(values = c("red", "blue", "green", "orange", "purple")) +
labs(title = "Traffic Volume by Age Group", x = "Age Group", y = "Traffic Volume") +
theme_minimal()

Injury Severity by Population Group
ggplot(data, aes(x = Pop_Group_ID, fill = Prsn_Injry_Sev_ID)) +
geom_bar() +
scale_fill_manual(values = c("yellow", "lightblue", "darkgreen")) +
labs(title = "Injury Severity by Population Group", x = "Population Group", y = "Count") +
theme_minimal()

Mean, Variance and Standard Deviation of Traffic Volume
# Extract the TrafVol column
traffic_volume <- data$TrafVol
# Mean of Traffic Volume
mean_traffic_volume <- mean(traffic_volume, na.rm = TRUE)
# Variance of Traffic Volume
variance_traffic_volume <- var(traffic_volume, na.rm = TRUE)
# Standard Deviation of Traffic Volume
sd_traffic_volume <- sd(traffic_volume, na.rm = TRUE)
# Mode of Traffic Volume
# Function to calculate mode
mode_function <- function(x) {
uniq_vals <- unique(x)
uniq_vals[which.max(tabulate(match(x, uniq_vals)))]
}
mode_traffic_volume <- mode_function(traffic_volume)
# 85th Percentile of Traffic Volume
percentile_85 <- quantile(traffic_volume, 0.85, na.rm = TRUE)
# 15th Percentile of Traffic Volume
percentile_15 <- quantile(traffic_volume, 0.15, na.rm = TRUE)
# Print the results
cat("Mean Traffic Volume: ", mean_traffic_volume, "\n")
## Mean Traffic Volume: 14273.95
cat("Variance of Traffic Volume: ", variance_traffic_volume, "\n")
## Variance of Traffic Volume: 70004422
cat("Standard Deviation of Traffic Volume: ", sd_traffic_volume, "\n")
## Standard Deviation of Traffic Volume: 8366.865
cat("Mode of Traffic Volume: ", mode_traffic_volume, "\n")
## Mode of Traffic Volume: 3160
cat("85th Percentile of Traffic Volume: ", percentile_85, "\n")
## 85th Percentile of Traffic Volume: 24275.6
cat("15th Percentile of Traffic Volume: ", percentile_15, "\n")
## 15th Percentile of Traffic Volume: 3985.9
Density of Crash Speed Limits
ggplot(data, aes(x = Crash_Speed_LimitCat, fill = Crash_Speed_LimitCat)) +
geom_density() +
scale_fill_manual(values = c("grey", "lightblue", "darkgreen", "yellow", "pink")) +
labs(title = "Density of Crash Speed Limits", x = "Speed Groups", y = "Count") +
theme(axis.text.x = element_text(angle = 45, hjust = .5))

Density of Age Group
ggplot(data, aes(x = Prsn_Age, fill = Prsn_Age)) +
geom_density() +
scale_fill_manual(values = c("red", "blue", "green", "orange", "purple")) +
labs(title = "Density of Person Age", x = "Age Group", y = "Count") +
theme(axis.text.x = element_text(angle = 90, hjust = .5))

Heat Map for Various Numeric Variables
# Load necessary libraries
library(ggcorrplot)
# Calculate the correlation matrix for numeric variables
numeric_columns <- data[, sapply(data, is.numeric)]
correlation_matrix <- cor(numeric_columns, use = "complete.obs")
# Create the heatmap
ggcorrplot(correlation_matrix, method = "circle", lab = TRUE, lab_size = 5,
title = "Correlation Heatmap of Numeric Variables",
colors = c("red", "green", "blue"))

Heat Map for Traffic Control and FHE Collision
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Summarize the data by counting combinations of Traffic_Cntl_ID and FHE_Collsn_ID
heatmap_data <- data %>%
group_by(Traffic_Cntl_ID, FHE_Collsn_ID) %>%
summarise(count = n()) %>%
ungroup()
## `summarise()` has grouped output by 'Traffic_Cntl_ID'. You can override using
## the `.groups` argument.
# Create the heatmap
ggplot(heatmap_data, aes(x = Traffic_Cntl_ID, y = FHE_Collsn_ID, fill = count)) +
geom_tile(color = "red") +
scale_fill_gradient(low = "lightblue", high = "darkblue") +
labs(title = "Heat Map of Traffic Control vs. FHE Collision",
x = "Traffic Control",
y = "FHE Collision",
fill = "Count") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
