HW1_Sadia

Loading Libraries

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
library(stats)
library(ggplot2)
library(readxl)
setwd("C:/Users/Sadia Bhuiyan Shampa/Desktop/CE 7393")
df_1 <- read_excel("df_1.xlsx")      #The Structure of Data Frame

Read Data and Data Summarization

str(df_1)
## tibble [1,295 × 19] (S3: tbl_df/tbl/data.frame)
##  $ Wthr_Cond_ID        : chr [1:1295] "Clear" "Clear" "Clear" "Clear" ...
##  $ Light_Cond_ID       : chr [1:1295] "Dark, not lighted" "Dark, not lighted" "Daylight" "Daylight" ...
##  $ Road_Type_ID        : chr [1:1295] "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" "2 lane, 2 way" ...
##  $ Road_Algn_ID        : chr [1:1295] "Straight, level" "Straight, level" "Straight, level" "Straight, level" ...
##  $ SurfDry             : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Traffic_Cntl_ID     : chr [1:1295] "Marked lanes" "Center stripe/divider" "Marked lanes" "Center stripe/divider" ...
##  $ Harm_Evnt_ID        : chr [1:1295] "Motor vehicle in transport" "Motor vehicle in transport" "Motor vehicle in transport" "Fixed object" ...
##  $ Intrsct_Relat_ID    : chr [1:1295] "Non intersection" "Non intersection" "Intersection" "Non intersection" ...
##  $ FHE_Collsn_ID       : chr [1:1295] "Sd both going straight-rear end" "Sd both going straight-rear end" "Other" "Omv vehicle going straight" ...
##  $ Road_Part_Adj_ID    : chr [1:1295] "Main/proper lane" "Main/proper lane" "Main/proper lane" "Main/proper lane" ...
##  $ Road_Cls_ID         : chr [1:1295] "Farm to market" "Us & state highways" "Farm to market" "Us & state highways" ...
##  $ Pop_Group_ID        : chr [1:1295] "10,000 - 24,999 pop" "Rural" "Other" "Rural" ...
##  $ Crash_Speed_LimitCat: chr [1:1295] "30-40 mph" "65-70 mph" "45-60 mph" "65-70 mph" ...
##  $ Veh_Body_Styl_ID    : chr [1:1295] "Farm equipment" "Farm equipment" "Farm equipment" "Farm equipment" ...
##  $ Prsn_Ethnicity_ID   : chr [1:1295] "White" "White" "White" "White" ...
##  $ GenMale             : num [1:1295] 1 1 1 1 1 1 1 1 1 1 ...
##  $ TrafVol             : num [1:1295] 7654 13770 11470 16972 413 ...
##  $ Prsn_Age            : chr [1:1295] "25-54 years" "25-54 years" "Other" "25-54 years" ...
##  $ Prsn_Injry_Sev_ID   : chr [1:1295] "O" "O" "O" "O" ...
summary(df_1)
##  Wthr_Cond_ID       Light_Cond_ID      Road_Type_ID       Road_Algn_ID      
##  Length:1295        Length:1295        Length:1295        Length:1295       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     SurfDry       Traffic_Cntl_ID    Harm_Evnt_ID       Intrsct_Relat_ID  
##  Min.   :0.0000   Length:1295        Length:1295        Length:1295       
##  1st Qu.:1.0000   Class :character   Class :character   Class :character  
##  Median :1.0000   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :0.9143                                                           
##  3rd Qu.:1.0000                                                           
##  Max.   :1.0000                                                           
##  FHE_Collsn_ID      Road_Part_Adj_ID   Road_Cls_ID        Pop_Group_ID      
##  Length:1295        Length:1295        Length:1295        Length:1295       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Crash_Speed_LimitCat Veh_Body_Styl_ID   Prsn_Ethnicity_ID     GenMale      
##  Length:1295          Length:1295        Length:1295        Min.   :0.0000  
##  Class :character     Class :character   Class :character   1st Qu.:1.0000  
##  Mode  :character     Mode  :character   Mode  :character   Median :1.0000  
##                                                             Mean   :0.8842  
##                                                             3rd Qu.:1.0000  
##                                                             Max.   :1.0000  
##     TrafVol        Prsn_Age         Prsn_Injry_Sev_ID 
##  Min.   :  215   Length:1295        Length:1295       
##  1st Qu.: 6765   Class :character   Class :character  
##  Median :14170   Mode  :character   Mode  :character  
##  Mean   :14417                                        
##  3rd Qu.:22010                                        
##  Max.   :28970

Data Visualization

Bar plot of Crash_Speed_LimitCat

barplot(table(df_1$Crash_Speed_LimitCat), main="Bar plot of Crash_Speed_LimitCat", 
       xlab="Category of Data among 1295 Entries", ylab="Frequency", 
       col = rgb(0.2,0.5,0.8),
       ylim=c(0,600))

Bar plot of Road_Cls_ID

frequency_table<-table(df_1$Road_Cls_ID)
colors<-c("burlywood", "lightblue", "lightgreen", "salmon", "gold")
barplot(frequency_table, 
        main="Bar plot of Road_Cls_ID",
        xlab="", ylab="Frequency",
        col= c("burlywood", "turquoise", "lightgreen", "salmon", "gold"),
        ylim=c(0,500), las=2)

Bar plot of Wthr_Cond_ID

table(df_1$Wthr_Cond_ID)
## 
##  Clear Cloudy    Fog  Other   Rain 
##   1084    146     10     10     45
barplot(table(df_1$Wthr_Cond_ID), main="Bar plot of Weather Condition ID",
        xlab="Category of Data among 1295 Entries", ylab="Frequency",
        col="burlywood",
        ylim=c(0,1300))

Bar plot of Surface Condition

frequency_table <- table(df_1$SurfDry)
names(frequency_table) <- gsub("0", "Wet", names(frequency_table))
names(frequency_table) <- gsub("1", "Dry", names(frequency_table))
frequency_table
##  Wet  Dry 
##  111 1184
barplot(frequency_table,  
        main = "Road Surface Condition", xlab = "Surface Type",
        ylab  = "Frequency",  
        col = c("limegreen", "maroon"), 
        ylim = c(0,1200))

Density Plot of Crash Speed Limit Category

table(df_1$Crash_Speed_LimitCat)
## 
##  > 70 mph 30-40 mph 45-60 mph 65-70 mph     Other 
##       203       336       471       223        62
ggplot(df_1, aes(x = factor(Crash_Speed_LimitCat),fill=Crash_Speed_LimitCat)) + 
geom_density(alpha = 0.5) + scale_fill_manual(values=c("darkcyan","ivory", "brown", "chocolate", "seashell4"))+
labs(title = "Density Plot of Crash Speed Limit Category",x = "Crash Speed Limit",y = "Density") +theme_minimal()

Density Plot of Traffic Volume

ggplot(df_1, aes(x = TrafVol)) + 
  geom_density(alpha = 0.5, fill = "orange2") +
  labs(title = "Density Plot of Traffic Volume",
       x = "Traffic Volume",
       y = "Density") +
  theme_minimal()

Boxplot of Traffic Volume by Road Class ID

library(ggplot2)

ggplot(df_1, aes(x = factor(Road_Cls_ID), y = TrafVol, fill = factor(Road_Cls_ID))) +
  geom_boxplot(outlier.colour = "red", alpha = 0.5) +
  labs(title = "Box Plot of Traffic Volume by Road Class ID",
       x = "Road Class ID",
       y = "Traffic Volume") +
  scale_fill_manual(values = c("maroon", "magenta", "lightsalmon", "indianred4", "seagreen")) +
  theme_minimal()

Boxplot of Traffic Volume by Road Type

ggplot(df_1, aes(x = TrafVol, y = factor(Road_Type_ID))) +
  geom_boxplot(fill = c("peru", "green", "saddlebrown", "tan1", "turquoise"), outlier.colour = "red", alpha = 0.5) +
  labs(title = "Box Plot of Traffic Volume by Road Type",
       x = "Traffic Volume",
       y = "Road Type") +
  theme_minimal()

Histogram of Traffic Volume

hist(df_1$TrafVol, xlab = "Traffic Volume", col = "turquoise", 
     main = "Histogram of Traffic Volume",
     ylim = c(0,130))