# Load my dataset
car_accidents <- read.csv("US_Accidents_March23.csv")
# Count accidents by state
state_counts <- table(car_accidents$State)
# Sort in decreasing order
state_counts <- sort(state_counts, decreasing = TRUE)
barplot(state_counts,
main = "Number of Accidents by State",
xlab = "State",
ylab = "Number of Accidents",
las = 3, # rotate x-axis labels vertically
cex.names = .5, # reduce font size
col = "steelblue")
The bar plot shows which states have the most recorded accidents. California, Texas, and Florida have the highest counts, indicating higher traffic density or reporting.
# Calculate mean severity
mean_severity <- mean(car_accidents$Severity, na.rm = TRUE)
mean_severity
## [1] 2.212384
The mean severity (~2.21) indicates that most accidents are moderate in impact. The summary shows the distribution: most accidents are level 2, fewer are level 1 or 4.
# Correlation between Temperature(F) and Visibility(mi)
correlation <- cor(car_accidents$Temperature.F., car_accidents$Visibility.mi., use = "complete.obs")
correlation
## [1] 0.2171733
The correlation shows a slight relationship between temperature and visibility. The scatter plot confirms that higher temperatures have a minor effect on visibility during accidents.
# Histogram of Temperature
hist(car_accidents$Temperature.F.,
breaks = 40,
main = "Distribution of Temperature During Accidents",
xlab = "Temperature (F)",
ylab = "Number of Accidents",
col = "orange",
border = "black",
xlim = c(-50, 150))
Most accidents occur between moderate temperatures, with fewer accidents at extreme low or high temperatures. The histogram shows a roughly normal-like distribution with slight skew.
6: Day vs Night Accidents Severity Comparison (T-Test)
There is a statistically significant difference in accident
severity between day and night. On average, night accidents tend to be
slightly more severe than day accidents.
# Divide dataset into Day vs Night accidents
day_accidents <- car_accidents[car_accidents$Sunrise_Sunset == "Day", ]
night_accidents <- car_accidents[car_accidents$Sunrise_Sunset == "Night", ]
# T-test for Severity between Day and Night
t_test_result <- t.test(day_accidents$Severity, night_accidents$Severity)
t_test_result
##
## Welch Two Sample t-test
##
## data: day_accidents$Severity and night_accidents$Severity
## t = -25.029, df = 4376029, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.010430544 -0.008915583
## sample estimates:
## mean of x mean of y
## 2.209396 2.219069
# Base R boxplot
boxplot(day_accidents$Severity, night_accidents$Severity,
names = c("Day", "Night"),
main = "Accident Severity: Day vs Night",
ylab = "Severity",
col = c("lightblue", "salmon"),
border = "darkblue")