SAE Automation Level

Data Exploration

library(Matrix)
library(arules)
library(arulesViz)
library(frequency)
library(grid)
library(ggplot2)
library(ggrepel)
library(DataExplorer)
library(writexl)
library(readr)
library(readxl)
library(DT)
library(dplyr)

#Sets the working directory to the same folder as the R script is located in.
setwd('C:/Users/jfk28/OneDrive - Texas State University/AV_Levels_CRIS/0_DataCode/5_ARM')

#Read the data
dat1 <- read_excel("AV_ARM_v1.xlsx")

dim(dat1)
## [1] 4649   22
names(dat1)
##  [1] "Rpt_Autonomous_Level_Engaged_ID" "Prsn_Injry_Sev_ID0"             
##  [3] "Prsn_Injry_Sev_ID"               "Veh_Body_Styl_ID"               
##  [5] "Contrib_Factr_1_ID"              "Crash_Speed_Limit"              
##  [7] "Wthr_Cond_ID"                    "Light_Cond_ID"                  
##  [9] "Entr_Road_ID"                    "Road_Type_ID"                   
## [11] "Road_Algn_ID"                    "Surf_Cond_ID"                   
## [13] "Traffic_Cntl_ID"                 "Harm_Evnt_ID"                   
## [15] "Intrsct_Relat_ID"                "FHE_Collsn_ID"                  
## [17] "Obj_Struck_ID"                   "Road_Cls_ID"                    
## [19] "Pop_Group_ID"                    "Prsn_Age"                       
## [21] "Prsn_Ethnicity_ID"               "Prsn_Gndr_ID"
#primary dimension of the data = 4649*25
dat2 <- dat1[, c("Rpt_Autonomous_Level_Engaged_ID", "Prsn_Injry_Sev_ID", "Veh_Body_Styl_ID",  "Crash_Speed_Limit", "Wthr_Cond_ID", "Light_Cond_ID", "Road_Type_ID", "Road_Algn_ID", "Surf_Cond_ID", "Traffic_Cntl_ID", "Intrsct_Relat_ID", "FHE_Collsn_ID", "Obj_Struck_ID", "Road_Cls_ID", "Pop_Group_ID", "Prsn_Age", "Prsn_Ethnicity_ID", "Prsn_Gndr_ID")]
dim(dat2)
## [1] 4649   18
names(dat2)
##  [1] "Rpt_Autonomous_Level_Engaged_ID" "Prsn_Injry_Sev_ID"              
##  [3] "Veh_Body_Styl_ID"                "Crash_Speed_Limit"              
##  [5] "Wthr_Cond_ID"                    "Light_Cond_ID"                  
##  [7] "Road_Type_ID"                    "Road_Algn_ID"                   
##  [9] "Surf_Cond_ID"                    "Traffic_Cntl_ID"                
## [11] "Intrsct_Relat_ID"                "FHE_Collsn_ID"                  
## [13] "Obj_Struck_ID"                   "Road_Cls_ID"                    
## [15] "Pop_Group_ID"                    "Prsn_Age"                       
## [17] "Prsn_Ethnicity_ID"               "Prsn_Gndr_ID"
#after removal = 4649*18

#Step: removing variables that are highly skewed, setting threshold as x = 0.85
x = 0.85
dat3= dat2[ sapply(dat2, function(i) max(prop.table(table(i)))) < x ]
dim(dat3)
## [1] 4649   15
names(dat3)
##  [1] "Rpt_Autonomous_Level_Engaged_ID" "Prsn_Injry_Sev_ID"              
##  [3] "Veh_Body_Styl_ID"                "Crash_Speed_Limit"              
##  [5] "Wthr_Cond_ID"                    "Light_Cond_ID"                  
##  [7] "Road_Type_ID"                    "Traffic_Cntl_ID"                
##  [9] "Intrsct_Relat_ID"                "FHE_Collsn_ID"                  
## [11] "Road_Cls_ID"                     "Pop_Group_ID"                   
## [13] "Prsn_Age"                        "Prsn_Ethnicity_ID"              
## [15] "Prsn_Gndr_ID"
#at x=0.85 "Road_Type_ID", "Surf_Cond_ID" & "Obj_Struck_ID" removed

Assisted Driving

# Subset for Over Assisted Driving
table(dat3$Rpt_Autonomous_Level_Engaged_ID)
## 
##           Assisted Driving Autonomous Driving Systems 
##                       3345                        156 
##         Partial Automation 
##                       1148
autolevel <- subset(dat3, Rpt_Autonomous_Level_Engaged_ID == "Assisted Driving")
dim(autolevel)
## [1] 3345   15
table(autolevel$Rpt_Autonomous_Level_Engaged_ID)
## 
## Assisted Driving 
##             3345
#Converting into transactions
trans1 <- as(autolevel, "transactions")
trans1
## transactions in sparse format with
##  3345 transactions (rows) and
##  64 items (columns)
itemLabels(trans1)
##  [1] "Rpt_Autonomous_Level_Engaged_ID=Assisted Driving"
##  [2] "Prsn_Injry_Sev_ID=BC"                            
##  [3] "Prsn_Injry_Sev_ID=KA"                            
##  [4] "Prsn_Injry_Sev_ID=O"                             
##  [5] "Veh_Body_Styl_ID=Others"                         
##  [6] "Veh_Body_Styl_ID=Passenger Car"                  
##  [7] "Veh_Body_Styl_ID=Pickup"                         
##  [8] "Veh_Body_Styl_ID=Sport Utility Vehicle"          
##  [9] "Crash_Speed_Limit=25 MPH or less"                
## [10] "Crash_Speed_Limit=30-45 MPH"                     
## [11] "Crash_Speed_Limit=50-65 MPH"                     
## [12] "Crash_Speed_Limit=70 MPH and Over"               
## [13] "Wthr_Cond_ID=Clear"                              
## [14] "Wthr_Cond_ID=Cloudy"                             
## [15] "Wthr_Cond_ID=Others"                             
## [16] "Wthr_Cond_ID=Rain"                               
## [17] "Light_Cond_ID=Dark, Lighted"                     
## [18] "Light_Cond_ID=Dark, Not Lighted"                 
## [19] "Light_Cond_ID=Daylight"                          
## [20] "Light_Cond_ID=Other"                             
## [21] "Road_Type_ID=2 Lane, 2 Way"                      
## [22] "Road_Type_ID=4 Or More Lanes, Divided"           
## [23] "Road_Type_ID=4 Or More Lanes, Undivided"         
## [24] "Road_Type_ID=Not Applicable"                     
## [25] "Traffic_Cntl_ID=Marked Lanes"                    
## [26] "Traffic_Cntl_ID=None"                            
## [27] "Traffic_Cntl_ID=Others"                          
## [28] "Traffic_Cntl_ID=Signal Light"                    
## [29] "Traffic_Cntl_ID=Stop Sign"                       
## [30] "Intrsct_Relat_ID=Driveway Access"                
## [31] "Intrsct_Relat_ID=Intersection"                   
## [32] "Intrsct_Relat_ID=Intersection Related"           
## [33] "Intrsct_Relat_ID=Non Intersection"               
## [34] "FHE_Collsn_ID=Angle"                             
## [35] "FHE_Collsn_ID=One Motor Vehicle"                 
## [36] "FHE_Collsn_ID=Opposite Direction"                
## [37] "FHE_Collsn_ID=Others"                            
## [38] "FHE_Collsn_ID=Same Direction"                    
## [39] "Road_Cls_ID=City Street"                         
## [40] "Road_Cls_ID=Farm To Market"                      
## [41] "Road_Cls_ID=Interstate"                          
## [42] "Road_Cls_ID=Other Roads"                         
## [43] "Road_Cls_ID=US & State Hwys"                     
## [44] "Pop_Group_ID=100,000 - 249,999 Pop"              
## [45] "Pop_Group_ID=25,000 - 49,999 Pop"                
## [46] "Pop_Group_ID=250,000 Pop And Over"               
## [47] "Pop_Group_ID=50,000 - 99,999 Pop"                
## [48] "Pop_Group_ID=Rural"                              
## [49] "Pop_Group_ID=Town Under 2,4999 Pop"              
## [50] "Prsn_Age=>64 years"                              
## [51] "Prsn_Age=15-24 years"                            
## [52] "Prsn_Age=25-34 years"                            
## [53] "Prsn_Age=35-44 years"                            
## [54] "Prsn_Age=45-54 years"                            
## [55] "Prsn_Age=55-64 years"                            
## [56] "Prsn_Age=Unknown"                                
## [57] "Prsn_Ethnicity_ID=Black"                         
## [58] "Prsn_Ethnicity_ID=Hispanic"                      
## [59] "Prsn_Ethnicity_ID=Other"                         
## [60] "Prsn_Ethnicity_ID=Unknown"                       
## [61] "Prsn_Ethnicity_ID=White"                         
## [62] "Prsn_Gndr_ID=Female"                             
## [63] "Prsn_Gndr_ID=Male"                               
## [64] "Prsn_Gndr_ID=Unknown"
summary(trans1)
## transactions as itemMatrix in sparse format with
##  3345 rows (elements/itemsets/transactions) and
##  64 columns (items) and a density of 0.234375 
## 
## most frequent items:
## Rpt_Autonomous_Level_Engaged_ID=Assisted Driving 
##                                             3345 
##                              Prsn_Injry_Sev_ID=O 
##                                             2787 
##                               Wthr_Cond_ID=Clear 
##                                             2522 
##                           Light_Cond_ID=Daylight 
##                                             2515 
##                      Crash_Speed_Limit=30-45 MPH 
##                                             1988 
##                                          (Other) 
##                                            37018 
## 
## element (itemset/transaction) length distribution:
## sizes
##   15 
## 3345 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      15      15      15      15      15      15 
## 
## includes extended item information - examples:
##                                             labels
## 1 Rpt_Autonomous_Level_Engaged_ID=Assisted Driving
## 2                             Prsn_Injry_Sev_ID=BC
## 3                             Prsn_Injry_Sev_ID=KA
##                         variables           levels
## 1 Rpt_Autonomous_Level_Engaged_ID Assisted Driving
## 2               Prsn_Injry_Sev_ID               BC
## 3               Prsn_Injry_Sev_ID               KA
## 
## includes extended transaction information - examples:
##   transactionID
## 1             1
## 2             2
## 3             3
#setting seed
set.seed(1234)

KA_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.0006, conf=0.3),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=KA", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 43 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 16
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
library(ggplot2)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

BC_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.001, conf=0.5),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=BC", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 1076 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 659
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

O_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.05, conf=0.5),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=O", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 2858 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 463
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

Partial Automation

# Subset for Partial Automation
table(dat3$Rpt_Autonomous_Level_Engaged_ID)
## 
##           Assisted Driving Autonomous Driving Systems 
##                       3345                        156 
##         Partial Automation 
##                       1148
autolevel <- subset(dat3, Rpt_Autonomous_Level_Engaged_ID == "Partial Automation")
dim(autolevel)
## [1] 1148   15
table(autolevel$Rpt_Autonomous_Level_Engaged_ID)
## 
## Partial Automation 
##               1148
#Converting into transactions
trans1 <- as(autolevel, "transactions")
trans1
## transactions in sparse format with
##  1148 transactions (rows) and
##  64 items (columns)
itemLabels(trans1)
##  [1] "Rpt_Autonomous_Level_Engaged_ID=Partial Automation"
##  [2] "Prsn_Injry_Sev_ID=BC"                              
##  [3] "Prsn_Injry_Sev_ID=KA"                              
##  [4] "Prsn_Injry_Sev_ID=O"                               
##  [5] "Veh_Body_Styl_ID=Others"                           
##  [6] "Veh_Body_Styl_ID=Passenger Car"                    
##  [7] "Veh_Body_Styl_ID=Pickup"                           
##  [8] "Veh_Body_Styl_ID=Sport Utility Vehicle"            
##  [9] "Crash_Speed_Limit=25 MPH or less"                  
## [10] "Crash_Speed_Limit=30-45 MPH"                       
## [11] "Crash_Speed_Limit=50-65 MPH"                       
## [12] "Crash_Speed_Limit=70 MPH and Over"                 
## [13] "Wthr_Cond_ID=Clear"                                
## [14] "Wthr_Cond_ID=Cloudy"                               
## [15] "Wthr_Cond_ID=Others"                               
## [16] "Wthr_Cond_ID=Rain"                                 
## [17] "Light_Cond_ID=Dark, Lighted"                       
## [18] "Light_Cond_ID=Dark, Not Lighted"                   
## [19] "Light_Cond_ID=Daylight"                            
## [20] "Light_Cond_ID=Other"                               
## [21] "Road_Type_ID=2 Lane, 2 Way"                        
## [22] "Road_Type_ID=4 Or More Lanes, Divided"             
## [23] "Road_Type_ID=4 Or More Lanes, Undivided"           
## [24] "Road_Type_ID=Not Applicable"                       
## [25] "Traffic_Cntl_ID=Marked Lanes"                      
## [26] "Traffic_Cntl_ID=None"                              
## [27] "Traffic_Cntl_ID=Others"                            
## [28] "Traffic_Cntl_ID=Signal Light"                      
## [29] "Traffic_Cntl_ID=Stop Sign"                         
## [30] "Intrsct_Relat_ID=Driveway Access"                  
## [31] "Intrsct_Relat_ID=Intersection"                     
## [32] "Intrsct_Relat_ID=Intersection Related"             
## [33] "Intrsct_Relat_ID=Non Intersection"                 
## [34] "FHE_Collsn_ID=Angle"                               
## [35] "FHE_Collsn_ID=One Motor Vehicle"                   
## [36] "FHE_Collsn_ID=Opposite Direction"                  
## [37] "FHE_Collsn_ID=Others"                              
## [38] "FHE_Collsn_ID=Same Direction"                      
## [39] "Road_Cls_ID=City Street"                           
## [40] "Road_Cls_ID=Farm To Market"                        
## [41] "Road_Cls_ID=Interstate"                            
## [42] "Road_Cls_ID=Other Roads"                           
## [43] "Road_Cls_ID=US & State Hwys"                       
## [44] "Pop_Group_ID=100,000 - 249,999 Pop"                
## [45] "Pop_Group_ID=25,000 - 49,999 Pop"                  
## [46] "Pop_Group_ID=250,000 Pop And Over"                 
## [47] "Pop_Group_ID=50,000 - 99,999 Pop"                  
## [48] "Pop_Group_ID=Rural"                                
## [49] "Pop_Group_ID=Town Under 2,4999 Pop"                
## [50] "Prsn_Age=>64 years"                                
## [51] "Prsn_Age=15-24 years"                              
## [52] "Prsn_Age=25-34 years"                              
## [53] "Prsn_Age=35-44 years"                              
## [54] "Prsn_Age=45-54 years"                              
## [55] "Prsn_Age=55-64 years"                              
## [56] "Prsn_Age=Unknown"                                  
## [57] "Prsn_Ethnicity_ID=Black"                           
## [58] "Prsn_Ethnicity_ID=Hispanic"                        
## [59] "Prsn_Ethnicity_ID=Other"                           
## [60] "Prsn_Ethnicity_ID=Unknown"                         
## [61] "Prsn_Ethnicity_ID=White"                           
## [62] "Prsn_Gndr_ID=Female"                               
## [63] "Prsn_Gndr_ID=Male"                                 
## [64] "Prsn_Gndr_ID=Unknown"
summary(trans1)
## transactions as itemMatrix in sparse format with
##  1148 rows (elements/itemsets/transactions) and
##  64 columns (items) and a density of 0.234375 
## 
## most frequent items:
## Rpt_Autonomous_Level_Engaged_ID=Partial Automation 
##                                               1148 
##                                 Wthr_Cond_ID=Clear 
##                                               1002 
##                                Prsn_Injry_Sev_ID=O 
##                                                989 
##                             Light_Cond_ID=Daylight 
##                                                846 
##                        Crash_Speed_Limit=30-45 MPH 
##                                                752 
##                                            (Other) 
##                                              12483 
## 
## element (itemset/transaction) length distribution:
## sizes
##   15 
## 1148 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      15      15      15      15      15      15 
## 
## includes extended item information - examples:
##                                               labels
## 1 Rpt_Autonomous_Level_Engaged_ID=Partial Automation
## 2                               Prsn_Injry_Sev_ID=BC
## 3                               Prsn_Injry_Sev_ID=KA
##                         variables             levels
## 1 Rpt_Autonomous_Level_Engaged_ID Partial Automation
## 2               Prsn_Injry_Sev_ID                 BC
## 3               Prsn_Injry_Sev_ID                 KA
## 
## includes extended transaction information - examples:
##   transactionID
## 1             1
## 2             2
## 3             3
#setting seed
set.seed(1234)

KA_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.0006, conf=0.3),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=KA", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 963 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 559
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

BC_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.001, conf=0.5),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=BC", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 4315 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 1683
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

O_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.05, conf=0.5),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=O", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 3902 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 463
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

Autonomous Driving Systems

# Autonomous Driving Systems
# Subset for Partial Automation
table(dat3$Rpt_Autonomous_Level_Engaged_ID)
## 
##           Assisted Driving Autonomous Driving Systems 
##                       3345                        156 
##         Partial Automation 
##                       1148
autolevel <- subset(dat3, Rpt_Autonomous_Level_Engaged_ID == "Autonomous Driving Systems")
dim(autolevel)
## [1] 156  15
table(autolevel$Rpt_Autonomous_Level_Engaged_ID)
## 
## Autonomous Driving Systems 
##                        156
#Converting into transactions
trans1 <- as(autolevel, "transactions")
trans1
## transactions in sparse format with
##  156 transactions (rows) and
##  62 items (columns)
itemLabels(trans1)
##  [1] "Rpt_Autonomous_Level_Engaged_ID=Autonomous Driving Systems"
##  [2] "Prsn_Injry_Sev_ID=BC"                                      
##  [3] "Prsn_Injry_Sev_ID=O"                                       
##  [4] "Veh_Body_Styl_ID=Others"                                   
##  [5] "Veh_Body_Styl_ID=Passenger Car"                            
##  [6] "Veh_Body_Styl_ID=Pickup"                                   
##  [7] "Veh_Body_Styl_ID=Sport Utility Vehicle"                    
##  [8] "Crash_Speed_Limit=25 MPH or less"                          
##  [9] "Crash_Speed_Limit=30-45 MPH"                               
## [10] "Crash_Speed_Limit=50-65 MPH"                               
## [11] "Crash_Speed_Limit=70 MPH and Over"                         
## [12] "Wthr_Cond_ID=Clear"                                        
## [13] "Wthr_Cond_ID=Cloudy"                                       
## [14] "Wthr_Cond_ID=Others"                                       
## [15] "Wthr_Cond_ID=Rain"                                         
## [16] "Light_Cond_ID=Dark, Lighted"                               
## [17] "Light_Cond_ID=Dark, Not Lighted"                           
## [18] "Light_Cond_ID=Daylight"                                    
## [19] "Light_Cond_ID=Other"                                       
## [20] "Road_Type_ID=2 Lane, 2 Way"                                
## [21] "Road_Type_ID=4 Or More Lanes, Divided"                     
## [22] "Road_Type_ID=4 Or More Lanes, Undivided"                   
## [23] "Road_Type_ID=Not Applicable"                               
## [24] "Traffic_Cntl_ID=Marked Lanes"                              
## [25] "Traffic_Cntl_ID=None"                                      
## [26] "Traffic_Cntl_ID=Others"                                    
## [27] "Traffic_Cntl_ID=Signal Light"                              
## [28] "Traffic_Cntl_ID=Stop Sign"                                 
## [29] "Intrsct_Relat_ID=Driveway Access"                          
## [30] "Intrsct_Relat_ID=Intersection"                             
## [31] "Intrsct_Relat_ID=Intersection Related"                     
## [32] "Intrsct_Relat_ID=Non Intersection"                         
## [33] "FHE_Collsn_ID=Angle"                                       
## [34] "FHE_Collsn_ID=One Motor Vehicle"                           
## [35] "FHE_Collsn_ID=Opposite Direction"                          
## [36] "FHE_Collsn_ID=Same Direction"                              
## [37] "Road_Cls_ID=City Street"                                   
## [38] "Road_Cls_ID=Farm To Market"                                
## [39] "Road_Cls_ID=Interstate"                                    
## [40] "Road_Cls_ID=Other Roads"                                   
## [41] "Road_Cls_ID=US & State Hwys"                               
## [42] "Pop_Group_ID=100,000 - 249,999 Pop"                        
## [43] "Pop_Group_ID=25,000 - 49,999 Pop"                          
## [44] "Pop_Group_ID=250,000 Pop And Over"                         
## [45] "Pop_Group_ID=50,000 - 99,999 Pop"                          
## [46] "Pop_Group_ID=Rural"                                        
## [47] "Pop_Group_ID=Town Under 2,4999 Pop"                        
## [48] "Prsn_Age=>64 years"                                        
## [49] "Prsn_Age=15-24 years"                                      
## [50] "Prsn_Age=25-34 years"                                      
## [51] "Prsn_Age=35-44 years"                                      
## [52] "Prsn_Age=45-54 years"                                      
## [53] "Prsn_Age=55-64 years"                                      
## [54] "Prsn_Age=Unknown"                                          
## [55] "Prsn_Ethnicity_ID=Black"                                   
## [56] "Prsn_Ethnicity_ID=Hispanic"                                
## [57] "Prsn_Ethnicity_ID=Other"                                   
## [58] "Prsn_Ethnicity_ID=Unknown"                                 
## [59] "Prsn_Ethnicity_ID=White"                                   
## [60] "Prsn_Gndr_ID=Female"                                       
## [61] "Prsn_Gndr_ID=Male"                                         
## [62] "Prsn_Gndr_ID=Unknown"
summary(trans1)
## transactions as itemMatrix in sparse format with
##  156 rows (elements/itemsets/transactions) and
##  62 columns (items) and a density of 0.2419355 
## 
## most frequent items:
## Rpt_Autonomous_Level_Engaged_ID=Autonomous Driving Systems 
##                                                        156 
##                                        Prsn_Injry_Sev_ID=O 
##                                                        137 
##                                         Wthr_Cond_ID=Clear 
##                                                        128 
##                                     Light_Cond_ID=Daylight 
##                                                        109 
##                             Veh_Body_Styl_ID=Passenger Car 
##                                                         99 
##                                                    (Other) 
##                                                       1711 
## 
## element (itemset/transaction) length distribution:
## sizes
##  15 
## 156 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##      15      15      15      15      15      15 
## 
## includes extended item information - examples:
##                                                       labels
## 1 Rpt_Autonomous_Level_Engaged_ID=Autonomous Driving Systems
## 2                                       Prsn_Injry_Sev_ID=BC
## 3                                        Prsn_Injry_Sev_ID=O
##                         variables                     levels
## 1 Rpt_Autonomous_Level_Engaged_ID Autonomous Driving Systems
## 2               Prsn_Injry_Sev_ID                         BC
## 3               Prsn_Injry_Sev_ID                          O
## 
## includes extended transaction information - examples:
##   transactionID
## 1             1
## 2             2
## 3             3
#setting seed
set.seed(1234)

BC_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.001, conf=0.5),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=BC", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 9238 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 1335
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)

O_Rules

rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.05, conf=0.5),
                    appearance = list(rhs="Prsn_Injry_Sev_ID=O", default="lhs"),
                    control = list(verbose=F))
rules_F
## set of 3906 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 529
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)

#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)


datatable(
  rules_df, extensions = c('Select', 'Buttons'), options = list(
    select = list(style = 'os', items = 'row'),
    dom = 'Blfrtip',
    rowId = 0,
    buttons = c('selectRows', 'csv', 'excel')
  ),
  selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
  scale_color_gradient(low = "purple", high = "red1") +
  labs(x = "Support", y = "Confidence", color = "Lift") +
  theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing =  "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
     control = list(
       edges = ggraph::geom_edge_link(
         end_cap = ggraph::circle(4, "mm"),
         start_cap = ggraph::circle(4, "mm"),
         color = "black",
         arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
         alpha = .2
       ))) +
  scale_color_gradient(low = "purple", high = "red1")+
  theme_bw(base_size=24)