Data Exploration
library(Matrix)
library(arules)
library(arulesViz)
library(frequency)
library(grid)
library(ggplot2)
library(ggrepel)
library(DataExplorer)
library(writexl)
library(readr)
library(readxl)
library(DT)
library(dplyr)
#Sets the working directory to the same folder as the R script is located in.
setwd('C:/Users/jfk28/OneDrive - Texas State University/AV_Levels_CRIS/0_DataCode/5_ARM')
#Read the data
dat1 <- read_excel("AV_ARM_v1.xlsx")
dim(dat1)
## [1] 4649 22
## [1] "Rpt_Autonomous_Level_Engaged_ID" "Prsn_Injry_Sev_ID0"
## [3] "Prsn_Injry_Sev_ID" "Veh_Body_Styl_ID"
## [5] "Contrib_Factr_1_ID" "Crash_Speed_Limit"
## [7] "Wthr_Cond_ID" "Light_Cond_ID"
## [9] "Entr_Road_ID" "Road_Type_ID"
## [11] "Road_Algn_ID" "Surf_Cond_ID"
## [13] "Traffic_Cntl_ID" "Harm_Evnt_ID"
## [15] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [17] "Obj_Struck_ID" "Road_Cls_ID"
## [19] "Pop_Group_ID" "Prsn_Age"
## [21] "Prsn_Ethnicity_ID" "Prsn_Gndr_ID"
#primary dimension of the data = 4649*25
dat2 <- dat1[, c("Rpt_Autonomous_Level_Engaged_ID", "Prsn_Injry_Sev_ID", "Veh_Body_Styl_ID", "Crash_Speed_Limit", "Wthr_Cond_ID", "Light_Cond_ID", "Road_Type_ID", "Road_Algn_ID", "Surf_Cond_ID", "Traffic_Cntl_ID", "Intrsct_Relat_ID", "FHE_Collsn_ID", "Obj_Struck_ID", "Road_Cls_ID", "Pop_Group_ID", "Prsn_Age", "Prsn_Ethnicity_ID", "Prsn_Gndr_ID")]
dim(dat2)
## [1] 4649 18
## [1] "Rpt_Autonomous_Level_Engaged_ID" "Prsn_Injry_Sev_ID"
## [3] "Veh_Body_Styl_ID" "Crash_Speed_Limit"
## [5] "Wthr_Cond_ID" "Light_Cond_ID"
## [7] "Road_Type_ID" "Road_Algn_ID"
## [9] "Surf_Cond_ID" "Traffic_Cntl_ID"
## [11] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [13] "Obj_Struck_ID" "Road_Cls_ID"
## [15] "Pop_Group_ID" "Prsn_Age"
## [17] "Prsn_Ethnicity_ID" "Prsn_Gndr_ID"
#after removal = 4649*18
#Step: removing variables that are highly skewed, setting threshold as x = 0.85
x = 0.85
dat3= dat2[ sapply(dat2, function(i) max(prop.table(table(i)))) < x ]
dim(dat3)
## [1] 4649 15
## [1] "Rpt_Autonomous_Level_Engaged_ID" "Prsn_Injry_Sev_ID"
## [3] "Veh_Body_Styl_ID" "Crash_Speed_Limit"
## [5] "Wthr_Cond_ID" "Light_Cond_ID"
## [7] "Road_Type_ID" "Traffic_Cntl_ID"
## [9] "Intrsct_Relat_ID" "FHE_Collsn_ID"
## [11] "Road_Cls_ID" "Pop_Group_ID"
## [13] "Prsn_Age" "Prsn_Ethnicity_ID"
## [15] "Prsn_Gndr_ID"
#at x=0.85 "Road_Type_ID", "Surf_Cond_ID" & "Obj_Struck_ID" removed
Assisted Driving
# Subset for Over Assisted Driving
table(dat3$Rpt_Autonomous_Level_Engaged_ID)
##
## Assisted Driving Autonomous Driving Systems
## 3345 156
## Partial Automation
## 1148
autolevel <- subset(dat3, Rpt_Autonomous_Level_Engaged_ID == "Assisted Driving")
dim(autolevel)
## [1] 3345 15
table(autolevel$Rpt_Autonomous_Level_Engaged_ID)
##
## Assisted Driving
## 3345
#Converting into transactions
trans1 <- as(autolevel, "transactions")
trans1
## transactions in sparse format with
## 3345 transactions (rows) and
## 64 items (columns)
## [1] "Rpt_Autonomous_Level_Engaged_ID=Assisted Driving"
## [2] "Prsn_Injry_Sev_ID=BC"
## [3] "Prsn_Injry_Sev_ID=KA"
## [4] "Prsn_Injry_Sev_ID=O"
## [5] "Veh_Body_Styl_ID=Others"
## [6] "Veh_Body_Styl_ID=Passenger Car"
## [7] "Veh_Body_Styl_ID=Pickup"
## [8] "Veh_Body_Styl_ID=Sport Utility Vehicle"
## [9] "Crash_Speed_Limit=25 MPH or less"
## [10] "Crash_Speed_Limit=30-45 MPH"
## [11] "Crash_Speed_Limit=50-65 MPH"
## [12] "Crash_Speed_Limit=70 MPH and Over"
## [13] "Wthr_Cond_ID=Clear"
## [14] "Wthr_Cond_ID=Cloudy"
## [15] "Wthr_Cond_ID=Others"
## [16] "Wthr_Cond_ID=Rain"
## [17] "Light_Cond_ID=Dark, Lighted"
## [18] "Light_Cond_ID=Dark, Not Lighted"
## [19] "Light_Cond_ID=Daylight"
## [20] "Light_Cond_ID=Other"
## [21] "Road_Type_ID=2 Lane, 2 Way"
## [22] "Road_Type_ID=4 Or More Lanes, Divided"
## [23] "Road_Type_ID=4 Or More Lanes, Undivided"
## [24] "Road_Type_ID=Not Applicable"
## [25] "Traffic_Cntl_ID=Marked Lanes"
## [26] "Traffic_Cntl_ID=None"
## [27] "Traffic_Cntl_ID=Others"
## [28] "Traffic_Cntl_ID=Signal Light"
## [29] "Traffic_Cntl_ID=Stop Sign"
## [30] "Intrsct_Relat_ID=Driveway Access"
## [31] "Intrsct_Relat_ID=Intersection"
## [32] "Intrsct_Relat_ID=Intersection Related"
## [33] "Intrsct_Relat_ID=Non Intersection"
## [34] "FHE_Collsn_ID=Angle"
## [35] "FHE_Collsn_ID=One Motor Vehicle"
## [36] "FHE_Collsn_ID=Opposite Direction"
## [37] "FHE_Collsn_ID=Others"
## [38] "FHE_Collsn_ID=Same Direction"
## [39] "Road_Cls_ID=City Street"
## [40] "Road_Cls_ID=Farm To Market"
## [41] "Road_Cls_ID=Interstate"
## [42] "Road_Cls_ID=Other Roads"
## [43] "Road_Cls_ID=US & State Hwys"
## [44] "Pop_Group_ID=100,000 - 249,999 Pop"
## [45] "Pop_Group_ID=25,000 - 49,999 Pop"
## [46] "Pop_Group_ID=250,000 Pop And Over"
## [47] "Pop_Group_ID=50,000 - 99,999 Pop"
## [48] "Pop_Group_ID=Rural"
## [49] "Pop_Group_ID=Town Under 2,4999 Pop"
## [50] "Prsn_Age=>64 years"
## [51] "Prsn_Age=15-24 years"
## [52] "Prsn_Age=25-34 years"
## [53] "Prsn_Age=35-44 years"
## [54] "Prsn_Age=45-54 years"
## [55] "Prsn_Age=55-64 years"
## [56] "Prsn_Age=Unknown"
## [57] "Prsn_Ethnicity_ID=Black"
## [58] "Prsn_Ethnicity_ID=Hispanic"
## [59] "Prsn_Ethnicity_ID=Other"
## [60] "Prsn_Ethnicity_ID=Unknown"
## [61] "Prsn_Ethnicity_ID=White"
## [62] "Prsn_Gndr_ID=Female"
## [63] "Prsn_Gndr_ID=Male"
## [64] "Prsn_Gndr_ID=Unknown"
## transactions as itemMatrix in sparse format with
## 3345 rows (elements/itemsets/transactions) and
## 64 columns (items) and a density of 0.234375
##
## most frequent items:
## Rpt_Autonomous_Level_Engaged_ID=Assisted Driving
## 3345
## Prsn_Injry_Sev_ID=O
## 2787
## Wthr_Cond_ID=Clear
## 2522
## Light_Cond_ID=Daylight
## 2515
## Crash_Speed_Limit=30-45 MPH
## 1988
## (Other)
## 37018
##
## element (itemset/transaction) length distribution:
## sizes
## 15
## 3345
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15 15 15 15 15 15
##
## includes extended item information - examples:
## labels
## 1 Rpt_Autonomous_Level_Engaged_ID=Assisted Driving
## 2 Prsn_Injry_Sev_ID=BC
## 3 Prsn_Injry_Sev_ID=KA
## variables levels
## 1 Rpt_Autonomous_Level_Engaged_ID Assisted Driving
## 2 Prsn_Injry_Sev_ID BC
## 3 Prsn_Injry_Sev_ID KA
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
#setting seed
set.seed(1234)
KA_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.0006, conf=0.3),
appearance = list(rhs="Prsn_Injry_Sev_ID=KA", default="lhs"),
control = list(verbose=F))
rules_F
## set of 43 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 16
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
library(ggplot2)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

BC_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.001, conf=0.5),
appearance = list(rhs="Prsn_Injry_Sev_ID=BC", default="lhs"),
control = list(verbose=F))
rules_F
## set of 1076 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 659
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

O_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.05, conf=0.5),
appearance = list(rhs="Prsn_Injry_Sev_ID=O", default="lhs"),
control = list(verbose=F))
rules_F
## set of 2858 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 463
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

Partial Automation
# Subset for Partial Automation
table(dat3$Rpt_Autonomous_Level_Engaged_ID)
##
## Assisted Driving Autonomous Driving Systems
## 3345 156
## Partial Automation
## 1148
autolevel <- subset(dat3, Rpt_Autonomous_Level_Engaged_ID == "Partial Automation")
dim(autolevel)
## [1] 1148 15
table(autolevel$Rpt_Autonomous_Level_Engaged_ID)
##
## Partial Automation
## 1148
#Converting into transactions
trans1 <- as(autolevel, "transactions")
trans1
## transactions in sparse format with
## 1148 transactions (rows) and
## 64 items (columns)
## [1] "Rpt_Autonomous_Level_Engaged_ID=Partial Automation"
## [2] "Prsn_Injry_Sev_ID=BC"
## [3] "Prsn_Injry_Sev_ID=KA"
## [4] "Prsn_Injry_Sev_ID=O"
## [5] "Veh_Body_Styl_ID=Others"
## [6] "Veh_Body_Styl_ID=Passenger Car"
## [7] "Veh_Body_Styl_ID=Pickup"
## [8] "Veh_Body_Styl_ID=Sport Utility Vehicle"
## [9] "Crash_Speed_Limit=25 MPH or less"
## [10] "Crash_Speed_Limit=30-45 MPH"
## [11] "Crash_Speed_Limit=50-65 MPH"
## [12] "Crash_Speed_Limit=70 MPH and Over"
## [13] "Wthr_Cond_ID=Clear"
## [14] "Wthr_Cond_ID=Cloudy"
## [15] "Wthr_Cond_ID=Others"
## [16] "Wthr_Cond_ID=Rain"
## [17] "Light_Cond_ID=Dark, Lighted"
## [18] "Light_Cond_ID=Dark, Not Lighted"
## [19] "Light_Cond_ID=Daylight"
## [20] "Light_Cond_ID=Other"
## [21] "Road_Type_ID=2 Lane, 2 Way"
## [22] "Road_Type_ID=4 Or More Lanes, Divided"
## [23] "Road_Type_ID=4 Or More Lanes, Undivided"
## [24] "Road_Type_ID=Not Applicable"
## [25] "Traffic_Cntl_ID=Marked Lanes"
## [26] "Traffic_Cntl_ID=None"
## [27] "Traffic_Cntl_ID=Others"
## [28] "Traffic_Cntl_ID=Signal Light"
## [29] "Traffic_Cntl_ID=Stop Sign"
## [30] "Intrsct_Relat_ID=Driveway Access"
## [31] "Intrsct_Relat_ID=Intersection"
## [32] "Intrsct_Relat_ID=Intersection Related"
## [33] "Intrsct_Relat_ID=Non Intersection"
## [34] "FHE_Collsn_ID=Angle"
## [35] "FHE_Collsn_ID=One Motor Vehicle"
## [36] "FHE_Collsn_ID=Opposite Direction"
## [37] "FHE_Collsn_ID=Others"
## [38] "FHE_Collsn_ID=Same Direction"
## [39] "Road_Cls_ID=City Street"
## [40] "Road_Cls_ID=Farm To Market"
## [41] "Road_Cls_ID=Interstate"
## [42] "Road_Cls_ID=Other Roads"
## [43] "Road_Cls_ID=US & State Hwys"
## [44] "Pop_Group_ID=100,000 - 249,999 Pop"
## [45] "Pop_Group_ID=25,000 - 49,999 Pop"
## [46] "Pop_Group_ID=250,000 Pop And Over"
## [47] "Pop_Group_ID=50,000 - 99,999 Pop"
## [48] "Pop_Group_ID=Rural"
## [49] "Pop_Group_ID=Town Under 2,4999 Pop"
## [50] "Prsn_Age=>64 years"
## [51] "Prsn_Age=15-24 years"
## [52] "Prsn_Age=25-34 years"
## [53] "Prsn_Age=35-44 years"
## [54] "Prsn_Age=45-54 years"
## [55] "Prsn_Age=55-64 years"
## [56] "Prsn_Age=Unknown"
## [57] "Prsn_Ethnicity_ID=Black"
## [58] "Prsn_Ethnicity_ID=Hispanic"
## [59] "Prsn_Ethnicity_ID=Other"
## [60] "Prsn_Ethnicity_ID=Unknown"
## [61] "Prsn_Ethnicity_ID=White"
## [62] "Prsn_Gndr_ID=Female"
## [63] "Prsn_Gndr_ID=Male"
## [64] "Prsn_Gndr_ID=Unknown"
## transactions as itemMatrix in sparse format with
## 1148 rows (elements/itemsets/transactions) and
## 64 columns (items) and a density of 0.234375
##
## most frequent items:
## Rpt_Autonomous_Level_Engaged_ID=Partial Automation
## 1148
## Wthr_Cond_ID=Clear
## 1002
## Prsn_Injry_Sev_ID=O
## 989
## Light_Cond_ID=Daylight
## 846
## Crash_Speed_Limit=30-45 MPH
## 752
## (Other)
## 12483
##
## element (itemset/transaction) length distribution:
## sizes
## 15
## 1148
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15 15 15 15 15 15
##
## includes extended item information - examples:
## labels
## 1 Rpt_Autonomous_Level_Engaged_ID=Partial Automation
## 2 Prsn_Injry_Sev_ID=BC
## 3 Prsn_Injry_Sev_ID=KA
## variables levels
## 1 Rpt_Autonomous_Level_Engaged_ID Partial Automation
## 2 Prsn_Injry_Sev_ID BC
## 3 Prsn_Injry_Sev_ID KA
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
#setting seed
set.seed(1234)
KA_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.0006, conf=0.3),
appearance = list(rhs="Prsn_Injry_Sev_ID=KA", default="lhs"),
control = list(verbose=F))
rules_F
## set of 963 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 559
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

BC_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.001, conf=0.5),
appearance = list(rhs="Prsn_Injry_Sev_ID=BC", default="lhs"),
control = list(verbose=F))
rules_F
## set of 4315 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 1683
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

O_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.05, conf=0.5),
appearance = list(rhs="Prsn_Injry_Sev_ID=O", default="lhs"),
control = list(verbose=F))
rules_F
## set of 3902 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 463
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

Autonomous Driving Systems
# Autonomous Driving Systems
# Subset for Partial Automation
table(dat3$Rpt_Autonomous_Level_Engaged_ID)
##
## Assisted Driving Autonomous Driving Systems
## 3345 156
## Partial Automation
## 1148
autolevel <- subset(dat3, Rpt_Autonomous_Level_Engaged_ID == "Autonomous Driving Systems")
dim(autolevel)
## [1] 156 15
table(autolevel$Rpt_Autonomous_Level_Engaged_ID)
##
## Autonomous Driving Systems
## 156
#Converting into transactions
trans1 <- as(autolevel, "transactions")
trans1
## transactions in sparse format with
## 156 transactions (rows) and
## 62 items (columns)
## [1] "Rpt_Autonomous_Level_Engaged_ID=Autonomous Driving Systems"
## [2] "Prsn_Injry_Sev_ID=BC"
## [3] "Prsn_Injry_Sev_ID=O"
## [4] "Veh_Body_Styl_ID=Others"
## [5] "Veh_Body_Styl_ID=Passenger Car"
## [6] "Veh_Body_Styl_ID=Pickup"
## [7] "Veh_Body_Styl_ID=Sport Utility Vehicle"
## [8] "Crash_Speed_Limit=25 MPH or less"
## [9] "Crash_Speed_Limit=30-45 MPH"
## [10] "Crash_Speed_Limit=50-65 MPH"
## [11] "Crash_Speed_Limit=70 MPH and Over"
## [12] "Wthr_Cond_ID=Clear"
## [13] "Wthr_Cond_ID=Cloudy"
## [14] "Wthr_Cond_ID=Others"
## [15] "Wthr_Cond_ID=Rain"
## [16] "Light_Cond_ID=Dark, Lighted"
## [17] "Light_Cond_ID=Dark, Not Lighted"
## [18] "Light_Cond_ID=Daylight"
## [19] "Light_Cond_ID=Other"
## [20] "Road_Type_ID=2 Lane, 2 Way"
## [21] "Road_Type_ID=4 Or More Lanes, Divided"
## [22] "Road_Type_ID=4 Or More Lanes, Undivided"
## [23] "Road_Type_ID=Not Applicable"
## [24] "Traffic_Cntl_ID=Marked Lanes"
## [25] "Traffic_Cntl_ID=None"
## [26] "Traffic_Cntl_ID=Others"
## [27] "Traffic_Cntl_ID=Signal Light"
## [28] "Traffic_Cntl_ID=Stop Sign"
## [29] "Intrsct_Relat_ID=Driveway Access"
## [30] "Intrsct_Relat_ID=Intersection"
## [31] "Intrsct_Relat_ID=Intersection Related"
## [32] "Intrsct_Relat_ID=Non Intersection"
## [33] "FHE_Collsn_ID=Angle"
## [34] "FHE_Collsn_ID=One Motor Vehicle"
## [35] "FHE_Collsn_ID=Opposite Direction"
## [36] "FHE_Collsn_ID=Same Direction"
## [37] "Road_Cls_ID=City Street"
## [38] "Road_Cls_ID=Farm To Market"
## [39] "Road_Cls_ID=Interstate"
## [40] "Road_Cls_ID=Other Roads"
## [41] "Road_Cls_ID=US & State Hwys"
## [42] "Pop_Group_ID=100,000 - 249,999 Pop"
## [43] "Pop_Group_ID=25,000 - 49,999 Pop"
## [44] "Pop_Group_ID=250,000 Pop And Over"
## [45] "Pop_Group_ID=50,000 - 99,999 Pop"
## [46] "Pop_Group_ID=Rural"
## [47] "Pop_Group_ID=Town Under 2,4999 Pop"
## [48] "Prsn_Age=>64 years"
## [49] "Prsn_Age=15-24 years"
## [50] "Prsn_Age=25-34 years"
## [51] "Prsn_Age=35-44 years"
## [52] "Prsn_Age=45-54 years"
## [53] "Prsn_Age=55-64 years"
## [54] "Prsn_Age=Unknown"
## [55] "Prsn_Ethnicity_ID=Black"
## [56] "Prsn_Ethnicity_ID=Hispanic"
## [57] "Prsn_Ethnicity_ID=Other"
## [58] "Prsn_Ethnicity_ID=Unknown"
## [59] "Prsn_Ethnicity_ID=White"
## [60] "Prsn_Gndr_ID=Female"
## [61] "Prsn_Gndr_ID=Male"
## [62] "Prsn_Gndr_ID=Unknown"
## transactions as itemMatrix in sparse format with
## 156 rows (elements/itemsets/transactions) and
## 62 columns (items) and a density of 0.2419355
##
## most frequent items:
## Rpt_Autonomous_Level_Engaged_ID=Autonomous Driving Systems
## 156
## Prsn_Injry_Sev_ID=O
## 137
## Wthr_Cond_ID=Clear
## 128
## Light_Cond_ID=Daylight
## 109
## Veh_Body_Styl_ID=Passenger Car
## 99
## (Other)
## 1711
##
## element (itemset/transaction) length distribution:
## sizes
## 15
## 156
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15 15 15 15 15 15
##
## includes extended item information - examples:
## labels
## 1 Rpt_Autonomous_Level_Engaged_ID=Autonomous Driving Systems
## 2 Prsn_Injry_Sev_ID=BC
## 3 Prsn_Injry_Sev_ID=O
## variables levels
## 1 Rpt_Autonomous_Level_Engaged_ID Autonomous Driving Systems
## 2 Prsn_Injry_Sev_ID BC
## 3 Prsn_Injry_Sev_ID O
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
#setting seed
set.seed(1234)
BC_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.001, conf=0.5),
appearance = list(rhs="Prsn_Injry_Sev_ID=BC", default="lhs"),
control = list(verbose=F))
rules_F
## set of 9238 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 1335
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)

O_Rules
rules_F <- apriori(trans1, parameter = list(minlen=3, maxlen = 5, supp=0.05, conf=0.5),
appearance = list(rhs="Prsn_Injry_Sev_ID=O", default="lhs"),
control = list(verbose=F))
rules_F
## set of 3906 rules
#Pruning rules to get rid of redundant rules
subset.matrix = is.subset(rules_F,rules_F)
subset.matrix[lower.tri(subset.matrix, diag = T)]<-F
redundant = apply(subset.matrix,2,any)
rules.pruned = rules_F[!redundant]
length(rules.pruned)
## [1] 529
# Sort pruned rules by lift in decreasing order
rules.pruned.lift <- sort(rules.pruned, by="lift", decreasing=TRUE)
#Save rules to dataframe
rules_df <- as(rules.pruned.lift, "data.frame")
rules_df <- rules_df %>% mutate_if(is.numeric, round, 3)
datatable(
rules_df, extensions = c('Select', 'Buttons'), options = list(
select = list(style = 'os', items = 'row'),
dom = 'Blfrtip',
rowId = 0,
buttons = c('selectRows', 'csv', 'excel')
),
selection = 'none'
)
##Scatter Plot##
#Lift
plot(rules_F, jitter=0, shading = "lift", engine = "ggplot2", main = NULL) +
scale_color_gradient(low = "purple", high = "red1") +
labs(x = "Support", y = "Confidence", color = "Lift") +
theme_bw(base_size=24)

#Network Graph
#Lift
rules_F.sorted <- sort(rules_F, by= "lift", decreasing = "True")
Top15_F = subset(rules_F.sorted)[1:15]
plot(Top15_F, shading = "lift", method = "graph",
control = list(
edges = ggraph::geom_edge_link(
end_cap = ggraph::circle(4, "mm"),
start_cap = ggraph::circle(4, "mm"),
color = "black",
arrow = arrow(length = unit(2, "mm"), angle = 20, type = "closed"),
alpha = .2
))) +
scale_color_gradient(low = "purple", high = "red1")+
theme_bw(base_size=24)
