This report presents an Association Rules Mining (ARM) analysis on the ATP W152 survey dataset. The target variable is DrCell, which captures respondents’ perception of distracted driving via cell phone use as a problem.
DrCell categories analysed:
| Category | Description |
|---|---|
| Major problem | Respondent views cell-phone distracted driving as a major problem |
| Minor problem | Respondent views it as a minor problem |
| Not a problem | Respondent does not view it as a problem |
Two complementary ARM approaches are used:
# Run once to install – not re-run during knitting
install.packages(c("rmarkdown", "knitr", "ggplot2", "randomForest",
"tree", "readr", "magrittr", "dplyr",
"arules", "arulesViz", "RColorBrewer"))
library(ggplot2)
library(randomForest)
library(tree)
library(readr)
library(magrittr)
library(dplyr)
library(arules)
library(arulesViz)
library(RColorBrewer)
# Set working directory (adjust path if needed)
setwd("C:/Users/asr171/OneDrive - Texas State University/Das, Subasish's files - PewSurvey/W152/Arka/V3")
data1 <- read.csv(
"ATP W152 Data_cleanedFin_AC.csv",
header = TRUE,
sep = ","
)
cat("Dimensions:", dim(data1), "\n")
## Dimensions: 5410 15
cat("Column names:\n")
## Column names:
colnames(data1)
## [1] "Area" "Regn" "Land" "Age" "Gender" "Edu"
## [7] "Race" "Born" "Marr" "Politics" "Income" "Ideology"
## [13] "IntUse" "Wght" "DrCell"
# Convert all character columns to factors
data2 <- data1 %>% mutate_if(is.character, as.factor)
# Wght (col 14) = numeric survey weight -> excluded from all transactions
# DrCell (col 15) = target variable
cat("Summary of target variable DrCell:\n")
## Summary of target variable DrCell:
summary(data2$DrCell)
## Major problem Minor problem No Response Not a problem
## 4253 996 13 148
Excludes only
Wght(col 14).DrCellstays in the transaction matrix.
data3 <- as(data2[, -14], "transactions")
cat("Transaction matrix dimensions:\n")
## Transaction matrix dimensions:
print(data3)
## transactions in sparse format with
## 5410 transactions (rows) and
## 77 items (columns)
rules_full <- apriori(data3)
summary(rules_full)
## set of 2619 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4 5 6 7
## 3 89 540 1051 756 176 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 4.00 4.00 4.15 5.00 7.00
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.1000 Min. :0.8000 Min. :0.1022 Min. :0.9164
## 1st Qu.:0.1131 1st Qu.:0.8296 1st Qu.:0.1299 1st Qu.:1.0017
## Median :0.1342 Median :0.8709 Median :0.1542 Median :1.0340
## Mean :0.1598 Mean :0.8736 Mean :0.1838 Mean :1.0348
## 3rd Qu.:0.1750 3rd Qu.:0.9098 3rd Qu.:0.2018 3rd Qu.:1.0700
## Max. :0.8754 Max. :0.9880 Max. :1.0000 Max. :1.4973
## count
## Min. : 541.0
## 1st Qu.: 612.0
## Median : 726.0
## Mean : 864.5
## 3rd Qu.: 947.0
## Max. :4736.0
##
## mining info:
## data ntransactions support confidence call
## data3 5410 0.1 0.8 apriori(data = data3)
cat("Total rules generated:", length(rules_full), "\n")
## Total rules generated: 2619
options(digits = 2)
cat("Top 20 rules by lift:\n")
## Top 20 rules by lift:
inspect(head(rules_full, n = 20, by = "lift"))
## lhs rhs support confidence coverage lift count
## [1] {Income=$100,000 or more,
## IntUse=Several times a day} => {Marr=Married} 0.10 0.81 0.13 1.5 555
## [2] {Land=Rural,
## Race=No Response,
## Politics=No Response} => {Born=U.S. } 0.12 0.96 0.12 1.2 639
## [3] {Land=Rural,
## Race=No Response,
## IntUse=Several times a day} => {Born=U.S. } 0.10 0.96 0.11 1.2 565
## [4] {Land=Rural,
## Race=No Response,
## DrCell=Major problem} => {Born=U.S. } 0.15 0.96 0.16 1.2 823
## [5] {Area=Non-metropolitan,
## Race=No Response} => {Born=U.S. } 0.11 0.95 0.12 1.1 599
## [6] {Land=Rural,
## Race=No Response} => {Born=U.S. } 0.20 0.95 0.21 1.1 1067
## [7] {Land=Rural,
## Gender=A woman,
## Race=No Response} => {Born=U.S. } 0.11 0.95 0.12 1.1 593
## [8] {Land=Rural,
## Race=No Response,
## Marr=Married} => {Born=U.S. } 0.12 0.95 0.13 1.1 660
## [9] {Edu=H.S. graduate or less,
## Race=No Response,
## Politics=No Response} => {Born=U.S. } 0.12 0.95 0.13 1.1 660
## [10] {Land=Rural,
## Politics=No Response} => {Born=U.S. } 0.12 0.94 0.13 1.1 670
## [11] {Age=65+,
## Born=U.S. ,
## IntUse=Several times a day} => {Race=No Response} 0.13 0.96 0.13 1.1 697
## [12] {Area=Metropolitan,
## Land=Rural,
## Race=No Response} => {Born=U.S. } 0.11 0.94 0.12 1.1 612
## [13] {Gender=A man,
## Edu=H.S. graduate or less,
## Race=No Response} => {Born=U.S. } 0.10 0.94 0.11 1.1 568
## [14] {Land=Rural,
## IntUse=Several times a day} => {Born=U.S. } 0.11 0.94 0.12 1.1 593
## [15] {Edu=H.S. graduate or less,
## Race=No Response,
## DrCell=Major problem} => {Born=U.S. } 0.16 0.94 0.17 1.1 849
## [16] {Age=65+,
## Born=U.S. ,
## Politics=No Response,
## DrCell=Major problem} => {Race=No Response} 0.11 0.96 0.12 1.1 615
## [17] {Age=65+,
## Born=U.S. ,
## Marr=Married,
## DrCell=Major problem} => {Race=No Response} 0.10 0.96 0.11 1.1 548
## [18] {Age=65+,
## Gender=A woman,
## Born=U.S. } => {Race=No Response} 0.11 0.96 0.12 1.1 606
## [19] {Area=Metropolitan,
## Age=65+,
## Born=U.S. ,
## IntUse=Several times a day} => {Race=No Response} 0.11 0.96 0.12 1.1 598
## [20] {Area=Non-metropolitan} => {Born=U.S. } 0.12 0.94 0.12 1.1 630
par(mar = c(3, 10, 2, 2))
itemFrequencyPlot(data3,
topN = 20,
type = "absolute",
cex.names = 0.85,
las = 1,
xlab = "Absolute Frequency",
main = "Top 20 Items – Absolute Frequency",
col = brewer.pal(8, "Pastel2"),
horiz = TRUE)
iF20 <- rev(tail(sort(itemFrequency(data3)), 20))
par(mar = c(12, 4, 2, 1))
barplot(iF20,
las = 2,
cex.names = 0.85,
ylab = "Relative Frequency",
main = "Top 20 Items – Relative Frequency",
col = brewer.pal(8, "Pastel2"),
ylim = c(0, 1))
plot(rules_full, jitter = 0,
main = "All Rules – Support vs Confidence (colour = Lift)")
plot(rules_full, method = "grouped", control = list(k = 10),
main = "Grouped Matrix Plot")
## Available control parameters (with default values):
## k = 20
## aggr.fun = function (x, ...) UseMethod("mean")
## rhs_max = 10
## lhs_label_items = 2
## col = c("#EE0000FF", "#EEEEEEFF")
## groups = NULL
## engine = ggplot2
## verbose = FALSE
Rows are filtered by DrCell category.
WghtandDrCellare both removed from the transaction matrix. ARM mines factor co-occurrence patterns within each response group.
set.seed(1234)
data4_major <- subset(data2, DrCell == "Major problem")
cat("Rows in Major problem subset:", nrow(data4_major), "\n")
## Rows in Major problem subset: 4253
data5_major <- as(data4_major[, c(-14, -15)], "transactions")
summary(data5_major)
## transactions as itemMatrix in sparse format with
## 4253 rows (elements/itemsets/transactions) and
## 73 columns (items) and a density of 0.18
##
## most frequent items:
## Area=Metropolitan Race=No Response Born=U.S.
## 3746 3570 3531
## Politics=No Response Marr=Married (Other)
## 2419 2365 39658
##
## element (itemset/transaction) length distribution:
## sizes
## 13
## 4253
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13 13 13 13 13 13
##
## includes extended item information - examples:
## labels variables levels
## 1 Area=Metropolitan Area Metropolitan
## 2 Area=No Response Area No Response
## 3 Area=Non-metropolitan Area Non-metropolitan
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 3
## 3 4
rules_major_2 <- apriori(data5_major,
parameter = list(minlen = 2, maxlen = 2,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_major_2.sorted <- sort(subset(rules_major_2, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_major_2.sorted), "\n")
## Rules found: 290
inspect(head(rules_major_2.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Area=Non-metropolitan} => {Land=Rural} 0.090 0.75 0.12 3.4 381
## [2] {Land=Rural} => {Area=Non-metropolitan} 0.090 0.40 0.22 3.4 381
## [3] {Age=18-29} => {Marr=Never been married} 0.081 0.60 0.14 3.1 346
## [4] {Marr=Never been married} => {Age=18-29} 0.081 0.41 0.20 3.1 346
## [5] {Income=Less than $30,000} => {Edu=H.S. graduate or less} 0.077 0.55 0.14 2.1 326
## [6] {Income=Less than $30,000} => {Marr=Never been married} 0.054 0.38 0.14 1.9 229
## [7] {Income=$100,000 or more} => {Edu=College graduate+} 0.222 0.67 0.33 1.6 943
## [8] {Edu=College graduate+} => {Income=$100,000 or more} 0.222 0.52 0.42 1.6 943
## [9] {Land=Urban} => {Marr=Never been married} 0.074 0.30 0.24 1.5 314
## [10] {Marr=Never been married} => {Land=Urban} 0.074 0.38 0.20 1.5 314
## [11] {Age=18-29} => {IntUse=Almost constantly} 0.102 0.75 0.14 1.5 433
## [12] {Land=Rural} => {Edu=H.S. graduate or less} 0.088 0.39 0.22 1.5 374
## [13] {Edu=H.S. graduate or less} => {Land=Rural} 0.088 0.34 0.26 1.5 374
## [14] {Born=Foreign} => {Land=Urban} 0.060 0.36 0.16 1.5 254
## [15] {Politics=The Democratic Party} => {Ideology=Moderate} 0.119 0.57 0.21 1.4 506
## [16] {Ideology=Conservative} => {Age=65+} 0.085 0.35 0.24 1.4 360
## [17] {Age=65+} => {Ideology=Conservative} 0.085 0.34 0.25 1.4 360
## [18] {Age=65+} => {IntUse=Several times a day} 0.143 0.57 0.25 1.4 609
## [19] {IntUse=Several times a day} => {Age=65+} 0.143 0.35 0.41 1.4 609
## [20] {Born=Foreign} => {IntUse=Almost constantly} 0.112 0.68 0.16 1.4 478
write(rules_major_2.sorted, file = "result_Major_2itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
if (length(rules_major_2.sorted) > 0) {
plot(rules_major_2.sorted, jitter = 0, main = "Major Problem – 2-Itemset Rules")
plot(rules_major_2.sorted, method = "graph",
main = "Major Problem – 2-Itemset Network")
}
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
rules_major_3 <- apriori(data5_major,
parameter = list(minlen = 3, maxlen = 3,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_major_3.sorted <- sort(subset(rules_major_3, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_major_3.sorted), "\n")
## Rules found: 2062
inspect(head(rules_major_3.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.065 0.52 0.125 3.9 278
## [2] {Land=Rural,
## Race=No Response} => {Area=Non-metropolitan} 0.085 0.42 0.202 3.5 362
## [3] {Area=Non-metropolitan,
## Marr=Married} => {Land=Rural} 0.058 0.78 0.074 3.5 246
## [4] {Land=Rural,
## Marr=Married} => {Area=Non-metropolitan} 0.058 0.42 0.139 3.5 246
## [5] {Area=Non-metropolitan,
## Politics=No Response} => {Land=Rural} 0.053 0.77 0.069 3.5 226
## [6] {Land=Rural,
## Born=U.S. } => {Area=Non-metropolitan} 0.085 0.41 0.207 3.4 362
## [7] {Land=Rural,
## Gender=A woman} => {Area=Non-metropolitan} 0.052 0.41 0.128 3.4 222
## [8] {Land=Rural,
## Politics=No Response} => {Area=Non-metropolitan} 0.053 0.41 0.130 3.4 226
## [9] {Area=Non-metropolitan,
## Born=U.S. } => {Land=Rural} 0.085 0.76 0.112 3.4 362
## [10] {Area=Non-metropolitan,
## Gender=A woman} => {Land=Rural} 0.052 0.76 0.069 3.4 222
## [11] {Area=Non-metropolitan,
## Race=No Response} => {Land=Rural} 0.085 0.76 0.112 3.4 362
## [12] {Age=18-29,
## IntUse=Almost constantly} => {Marr=Never been married} 0.065 0.64 0.102 3.3 278
## [13] {Area=Metropolitan,
## Age=18-29} => {Marr=Never been married} 0.077 0.63 0.122 3.2 326
## [14] {Area=Metropolitan,
## Marr=Never been married} => {Age=18-29} 0.077 0.42 0.182 3.1 326
## [15] {Born=U.S. ,
## Marr=Never been married} => {Age=18-29} 0.068 0.41 0.168 3.0 291
## [16] {Age=18-29,
## Born=U.S. } => {Marr=Never been married} 0.068 0.59 0.117 3.0 291
## [17] {Age=18-29,
## Race=No Response} => {Marr=Never been married} 0.059 0.58 0.102 3.0 252
## [18] {Race=No Response,
## Marr=Never been married} => {Age=18-29} 0.059 0.38 0.157 2.8 252
## [19] {Born=U.S. ,
## Income=Less than $30,000} => {Edu=H.S. graduate or less} 0.068 0.56 0.122 2.1 289
## [20] {Area=Metropolitan,
## Income=Less than $30,000} => {Edu=H.S. graduate or less} 0.062 0.54 0.116 2.0 264
write(rules_major_3.sorted, file = "result_Major_3itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_major_4 <- apriori(data5_major,
parameter = list(minlen = 4, maxlen = 4,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_major_4.sorted <- sort(subset(rules_major_4, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_major_4.sorted), "\n")
## Rules found: 4852
inspect(head(rules_major_4.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Area=Metropolitan,
## Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.061 0.52 0.117 3.9 259
## [2] {Born=U.S. ,
## Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.054 0.52 0.103 3.8 229
## [3] {Land=Rural,
## Race=No Response,
## Marr=Married} => {Area=Non-metropolitan} 0.056 0.43 0.128 3.6 237
## [4] {Land=Rural,
## Race=No Response,
## Born=U.S. } => {Area=Non-metropolitan} 0.082 0.42 0.194 3.6 349
## [5] {Area=Non-metropolitan,
## Born=U.S. ,
## Marr=Married} => {Land=Rural} 0.055 0.79 0.069 3.6 232
## [6] {Land=Rural,
## Born=U.S. ,
## Marr=Married} => {Area=Non-metropolitan} 0.055 0.42 0.129 3.6 232
## [7] {Area=Non-metropolitan,
## Race=No Response,
## Marr=Married} => {Land=Rural} 0.056 0.79 0.071 3.5 237
## [8] {Land=Rural,
## Race=No Response,
## Politics=No Response} => {Area=Non-metropolitan} 0.051 0.42 0.122 3.5 218
## [9] {Land=Rural,
## Born=U.S. ,
## Politics=No Response} => {Area=Non-metropolitan} 0.051 0.41 0.124 3.5 218
## [10] {Area=Non-metropolitan,
## Race=No Response,
## Politics=No Response} => {Land=Rural} 0.051 0.77 0.066 3.5 218
## [11] {Area=Non-metropolitan,
## Born=U.S. ,
## Politics=No Response} => {Land=Rural} 0.051 0.77 0.067 3.5 218
## [12] {Area=Non-metropolitan,
## Race=No Response,
## Born=U.S. } => {Land=Rural} 0.082 0.77 0.107 3.4 349
## [13] {Area=Metropolitan,
## Age=18-29,
## IntUse=Almost constantly} => {Marr=Never been married} 0.061 0.65 0.093 3.3 259
## [14] {Age=18-29,
## Born=U.S. ,
## IntUse=Almost constantly} => {Marr=Never been married} 0.054 0.63 0.086 3.2 229
## [15] {Area=Metropolitan,
## Age=18-29,
## Born=U.S. } => {Marr=Never been married} 0.064 0.62 0.104 3.1 273
## [16] {Area=Metropolitan,
## Age=18-29,
## Race=No Response} => {Marr=Never been married} 0.055 0.61 0.091 3.1 236
## [17] {Area=Metropolitan,
## Born=U.S. ,
## Marr=Never been married} => {Age=18-29} 0.064 0.41 0.155 3.1 273
## [18] {Area=Metropolitan,
## Race=No Response,
## Marr=Never been married} => {Age=18-29} 0.055 0.39 0.144 2.9 236
## [19] {Age=50-64,
## Edu=College graduate+,
## Marr=Married} => {Income=$100,000 or more} 0.053 0.73 0.074 2.2 227
## [20] {Edu=College graduate+,
## Marr=Married,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.102 0.71 0.143 2.2 433
write(rules_major_4.sorted, file = "result_Major_4itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
data4_minor <- subset(data2, DrCell == "Minor problem")
cat("Rows in Minor problem subset:", nrow(data4_minor), "\n")
## Rows in Minor problem subset: 996
data5_minor <- as(data4_minor[, c(-14, -15)], "transactions")
summary(data5_minor)
## transactions as itemMatrix in sparse format with
## 996 rows (elements/itemsets/transactions) and
## 73 columns (items) and a density of 0.18
##
## most frequent items:
## Race=No Response Area=Metropolitan Born=U.S.
## 877 855 832
## Politics=No Response Land=Suburban (Other)
## 550 527 9307
##
## element (itemset/transaction) length distribution:
## sizes
## 13
## 996
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13 13 13 13 13 13
##
## includes extended item information - examples:
## labels variables levels
## 1 Area=Metropolitan Area Metropolitan
## 2 Area=No Response Area No Response
## 3 Area=Non-metropolitan Area Non-metropolitan
##
## includes extended transaction information - examples:
## transactionID
## 1 2
## 2 7
## 3 11
rules_minor_2 <- apriori(data5_minor,
parameter = list(minlen = 2, maxlen = 2,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_minor_2.sorted <- sort(subset(rules_minor_2, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_minor_2.sorted), "\n")
## Rules found: 310
inspect(head(rules_minor_2.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Area=Non-metropolitan} => {Land=Rural} 0.097 0.70 0.140 2.9 97
## [2] {Land=Rural} => {Area=Non-metropolitan} 0.097 0.41 0.238 2.9 97
## [3] {Age=18-29} => {Marr=Never been married} 0.089 0.59 0.153 2.7 89
## [4] {Marr=Never been married} => {Age=18-29} 0.089 0.41 0.220 2.7 89
## [5] {Income=Less than $30,000} => {Edu=H.S. graduate or less} 0.085 0.52 0.166 1.9 85
## [6] {Edu=H.S. graduate or less} => {Income=Less than $30,000} 0.085 0.31 0.272 1.9 85
## [7] {Area=Non-metropolitan} => {Regn=Midwest} 0.052 0.37 0.140 1.9 52
## [8] {Income=Less than $30,000} => {Marr=Never been married} 0.062 0.38 0.166 1.7 62
## [9] {Age=18-29} => {Politics=The Democratic Party} 0.053 0.35 0.153 1.6 53
## [10] {Income=Less than $30,000} => {Land=Urban} 0.059 0.36 0.166 1.6 59
## [11] {Area=Non-metropolitan} => {Edu=H.S. graduate or less} 0.060 0.43 0.140 1.6 60
## [12] {Born=Foreign} => {Land=Urban} 0.055 0.35 0.157 1.6 55
## [13] {Land=Rural} => {Edu=H.S. graduate or less} 0.102 0.43 0.238 1.6 102
## [14] {Edu=H.S. graduate or less} => {Land=Rural} 0.102 0.38 0.272 1.6 102
## [15] {Politics=The Democratic Party} => {Ideology=Moderate} 0.122 0.56 0.217 1.6 122
## [16] {Ideology=Moderate} => {Politics=The Democratic Party} 0.122 0.34 0.359 1.6 122
## [17] {Income=$100,000 or more} => {Edu=College graduate+} 0.228 0.71 0.321 1.5 227
## [18] {Edu=College graduate+} => {Income=$100,000 or more} 0.228 0.50 0.460 1.5 227
## [19] {Born=Foreign} => {Edu=College graduate+} 0.109 0.70 0.157 1.5 109
## [20] {Ideology=Very liberal} => {IntUse=Almost constantly} 0.059 0.69 0.085 1.5 59
write(rules_minor_2.sorted, file = "result_Minor_2itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
if (length(rules_minor_2.sorted) > 0) {
plot(rules_minor_2.sorted, jitter = 0, main = "Minor Problem – 2-Itemset Rules")
plot(rules_minor_2.sorted, method = "graph",
main = "Minor Problem – 2-Itemset Network")
}
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
rules_minor_3 <- apriori(data5_minor,
parameter = list(minlen = 3, maxlen = 3,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_minor_3.sorted <- sort(subset(rules_minor_3, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_minor_3.sorted), "\n")
## Rules found: 2170
inspect(head(rules_minor_3.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Land=Suburban,
## Marr=Never been married} => {Age=18-29} 0.056 0.51 0.110 3.3 56
## [2] {Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.067 0.50 0.135 3.3 67
## [3] {Area=Non-metropolitan,
## Marr=Married} => {Land=Rural} 0.054 0.75 0.072 3.2 54
## [4] {Age=18-29,
## Gender=A man} => {Marr=Never been married} 0.051 0.67 0.076 3.1 51
## [5] {Land=Rural,
## IntUse=Several times a day} => {Area=Non-metropolitan} 0.051 0.42 0.121 3.0 51
## [6] {Land=Rural,
## Race=No Response} => {Area=Non-metropolitan} 0.091 0.42 0.217 3.0 91
## [7] {Area=Non-metropolitan,
## Race=No Response} => {Land=Rural} 0.091 0.72 0.128 3.0 91
## [8] {Land=Rural,
## Born=U.S. } => {Area=Non-metropolitan} 0.091 0.41 0.221 3.0 91
## [9] {Area=Non-metropolitan,
## Born=U.S. } => {Land=Rural} 0.091 0.70 0.131 2.9 91
## [10] {Land=Suburban,
## Age=18-29} => {Marr=Never been married} 0.056 0.64 0.087 2.9 56
## [11] {Area=Non-metropolitan,
## Politics=No Response} => {Land=Rural} 0.052 0.69 0.075 2.9 52
## [12] {Age=18-29,
## IntUse=Almost constantly} => {Marr=Never been married} 0.067 0.64 0.105 2.9 67
## [13] {Area=Metropolitan,
## Marr=Never been married} => {Age=18-29} 0.082 0.43 0.190 2.8 82
## [14] {Land=Rural,
## Politics=No Response} => {Area=Non-metropolitan} 0.052 0.39 0.133 2.8 52
## [15] {Land=Rural,
## Marr=Married} => {Area=Non-metropolitan} 0.054 0.39 0.139 2.8 54
## [16] {Area=Non-metropolitan,
## IntUse=Several times a day} => {Land=Rural} 0.051 0.66 0.077 2.8 51
## [17] {Area=Metropolitan,
## Age=18-29} => {Marr=Never been married} 0.082 0.60 0.137 2.7 82
## [18] {Age=18-29,
## Race=No Response} => {Marr=Never been married} 0.072 0.60 0.121 2.7 72
## [19] {Born=U.S. ,
## Marr=Never been married} => {Age=18-29} 0.077 0.40 0.193 2.6 77
## [20] {Race=No Response,
## Marr=Never been married} => {Age=18-29} 0.072 0.40 0.182 2.6 72
write(rules_minor_3.sorted, file = "result_Minor_3itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_minor_4 <- apriori(data5_minor,
parameter = list(minlen = 4, maxlen = 4,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_minor_4.sorted <- sort(subset(rules_minor_4, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_minor_4.sorted), "\n")
## Rules found: 4869
inspect(head(rules_minor_4.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Area=Metropolitan,
## Land=Suburban,
## Marr=Never been married} => {Age=18-29} 0.053 0.52 0.101 3.4 53
## [2] {Area=Metropolitan,
## Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.064 0.52 0.123 3.4 64
## [3] {Race=No Response,
## Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.057 0.51 0.112 3.3 57
## [4] {Area=Non-metropolitan,
## Race=No Response,
## Marr=Married} => {Land=Rural} 0.051 0.78 0.065 3.3 51
## [5] {Born=U.S. ,
## Marr=Never been married,
## IntUse=Almost constantly} => {Age=18-29} 0.056 0.48 0.116 3.2 56
## [6] {Land=Rural,
## Race=No Response,
## Born=U.S. } => {Area=Non-metropolitan} 0.086 0.42 0.204 3.0 86
## [7] {Age=18-29,
## Race=No Response,
## IntUse=Almost constantly} => {Marr=Never been married} 0.057 0.66 0.086 3.0 57
## [8] {Area=Non-metropolitan,
## Race=No Response,
## Born=U.S. } => {Land=Rural} 0.086 0.71 0.121 3.0 86
## [9] {Area=Metropolitan,
## Age=18-29,
## IntUse=Almost constantly} => {Marr=Never been married} 0.064 0.65 0.098 3.0 64
## [10] {Area=Non-metropolitan,
## Race=No Response,
## Politics=No Response} => {Land=Rural} 0.050 0.70 0.071 3.0 50
## [11] {Area=Metropolitan,
## Land=Suburban,
## Age=18-29} => {Marr=Never been married} 0.053 0.64 0.083 2.9 53
## [12] {Land=Rural,
## Race=No Response,
## Politics=No Response} => {Area=Non-metropolitan} 0.050 0.40 0.124 2.9 50
## [13] {Land=Rural,
## Race=No Response,
## Marr=Married} => {Area=Non-metropolitan} 0.051 0.40 0.128 2.9 51
## [14] {Area=Metropolitan,
## Born=U.S. ,
## Marr=Never been married} => {Age=18-29} 0.070 0.43 0.164 2.8 70
## [15] {Area=Metropolitan,
## Race=No Response,
## Marr=Never been married} => {Age=18-29} 0.066 0.43 0.155 2.8 66
## [16] {Area=Metropolitan,
## Age=18-29,
## Race=No Response} => {Marr=Never been married} 0.066 0.62 0.107 2.8 66
## [17] {Age=18-29,
## Born=U.S. ,
## IntUse=Almost constantly} => {Marr=Never been married} 0.056 0.62 0.091 2.8 56
## [18] {Area=Metropolitan,
## Age=18-29,
## Born=U.S. } => {Marr=Never been married} 0.070 0.59 0.119 2.7 70
## [19] {Age=18-29,
## Race=No Response,
## Born=U.S. } => {Marr=Never been married} 0.061 0.58 0.106 2.6 61
## [20] {Race=No Response,
## Born=U.S. ,
## Marr=Never been married} => {Age=18-29} 0.061 0.39 0.159 2.5 61
write(rules_minor_4.sorted, file = "result_Minor_4itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
data4_not <- subset(data2, DrCell == "Not a problem")
cat("Rows in Not a problem subset:", nrow(data4_not), "\n")
## Rows in Not a problem subset: 148
data5_not <- as(data4_not[, c(-14, -15)], "transactions")
summary(data5_not)
## transactions as itemMatrix in sparse format with
## 148 rows (elements/itemsets/transactions) and
## 73 columns (items) and a density of 0.18
##
## most frequent items:
## Area=Metropolitan Born=U.S. Race=No Response Gender=A woman
## 123 122 115 77
## Ideology=Moderate (Other)
## 70 1417
##
## element (itemset/transaction) length distribution:
## sizes
## 13
## 148
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 13 13 13 13 13 13
##
## includes extended item information - examples:
## labels variables levels
## 1 Area=Metropolitan Area Metropolitan
## 2 Area=No Response Area No Response
## 3 Area=Non-metropolitan Area Non-metropolitan
##
## includes extended transaction information - examples:
## transactionID
## 1 63
## 2 67
## 3 90
rules_not_2 <- apriori(data5_not,
parameter = list(minlen = 2, maxlen = 2,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_not_2.sorted <- sort(subset(rules_not_2, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_not_2.sorted), "\n")
## Rules found: 385
inspect(head(rules_not_2.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Ideology=Very liberal} => {Age=18-29} 0.054 0.62 0.088 2.8 8
## [2] {Born=Foreign} => {Income=$100,000 or more} 0.054 0.35 0.155 2.7 8
## [3] {Income=$100,000 or more} => {Born=Foreign} 0.054 0.42 0.128 2.7 8
## [4] {Edu=College graduate+} => {Income=$100,000 or more} 0.095 0.34 0.277 2.7 14
## [5] {Income=$100,000 or more} => {Edu=College graduate+} 0.095 0.74 0.128 2.7 14
## [6] {Area=Non-metropolitan} => {Land=Rural} 0.128 0.76 0.169 2.4 19
## [7] {Land=Rural} => {Area=Non-metropolitan} 0.128 0.40 0.318 2.4 19
## [8] {IntUse=About once a day} => {Ideology=Conservative} 0.054 0.38 0.142 2.1 8
## [9] {Born=Foreign} => {Age=50-64} 0.061 0.39 0.155 2.1 9
## [10] {Age=50-64} => {Born=Foreign} 0.061 0.32 0.189 2.1 9
## [11] {Income=$100,000 or more} => {Regn=Northeast} 0.061 0.47 0.128 2.1 9
## [12] {Marr=Living with a partner} => {Age=18-29} 0.061 0.43 0.142 2.0 9
## [13] {Income=$80,000 to less than $90,000} => {Politics=No Response} 0.054 0.89 0.061 2.0 8
## [14] {Income=$100,000 or more} => {Marr=Married} 0.088 0.68 0.128 1.9 13
## [15] {Born=Foreign} => {Marr=Married} 0.101 0.65 0.155 1.9 15
## [16] {Income=$40,000 to less than $50,000} => {IntUse=Several times a day} 0.074 0.69 0.108 1.8 11
## [17] {Age=18-29} => {Marr=Never been married} 0.128 0.59 0.216 1.8 19
## [18] {Marr=Never been married} => {Age=18-29} 0.128 0.38 0.338 1.8 19
## [19] {Area=Non-metropolitan} => {Ideology=Conservative} 0.054 0.32 0.169 1.8 8
## [20] {Born=Foreign} => {Edu=College graduate+} 0.074 0.48 0.155 1.7 11
write(rules_not_2.sorted, file = "result_NotProblem_2itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
if (length(rules_not_2.sorted) > 0) {
plot(rules_not_2.sorted, jitter = 0, main = "Not a Problem – 2-Itemset Rules")
plot(rules_not_2.sorted, method = "graph",
main = "Not a Problem – 2-Itemset Network")
}
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
rules_not_3 <- apriori(data5_not,
parameter = list(minlen = 3, maxlen = 3,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_not_3.sorted <- sort(subset(rules_not_3, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_not_3.sorted), "\n")
## Rules found: 2440
inspect(head(rules_not_3.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Regn=Northeast,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.62 0.088 4.8 8
## [2] {Edu=College graduate+,
## Marr=Married} => {Income=$100,000 or more} 0.068 0.53 0.128 4.1 10
## [3] {Marr=Married,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.068 0.53 0.128 4.1 10
## [4] {Age=30-49,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.50 0.108 3.9 8
## [5] {Land=Suburban,
## Marr=Married} => {Born=Foreign} 0.074 0.58 0.128 3.7 11
## [6] {Land=Rural,
## IntUse=Several times a day} => {Area=Non-metropolitan} 0.074 0.61 0.122 3.6 11
## [7] {Land=Suburban,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.44 0.122 3.5 8
## [8] {Edu=College graduate+,
## Ideology=Moderate} => {Income=$100,000 or more} 0.054 0.44 0.122 3.5 8
## [9] {Race=No Response,
## Born=Foreign} => {Income=$100,000 or more} 0.054 0.42 0.128 3.3 8
## [10] {Land=Suburban,
## Marr=Married} => {Income=$100,000 or more} 0.054 0.42 0.128 3.3 8
## [11] {Edu=College graduate+,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.068 0.42 0.162 3.2 10
## [12] {Regn=Northeast,
## Income=$100,000 or more} => {Edu=College graduate+} 0.054 0.89 0.061 3.2 8
## [13] {Area=Non-metropolitan,
## Edu=H.S. graduate or less} => {Land=Rural} 0.068 1.00 0.068 3.1 10
## [14] {Regn=Northeast,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.054 0.40 0.135 3.1 8
## [15] {Age=30-49,
## Marr=Married} => {Income=$100,000 or more} 0.054 0.40 0.135 3.1 8
## [16] {Land=Rural,
## Marr=Married} => {Area=Non-metropolitan} 0.081 0.52 0.155 3.1 12
## [17] {Area=Metropolitan,
## Ideology=Very liberal} => {Age=18-29} 0.054 0.67 0.081 3.1 8
## [18] {Edu=H.S. graduate or less,
## IntUse=Several times a day} => {Income=$40,000 to less than $50,000} 0.061 0.33 0.182 3.1 9
## [19] {Race=No Response,
## Income=$100,000 or more} => {Born=Foreign} 0.054 0.47 0.115 3.0 8
## [20] {Marr=Married,
## Politics=The Republican Party} => {Age=50-64} 0.054 0.57 0.095 3.0 8
write(rules_not_3.sorted, file = "result_NotProblem_3itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_not_4 <- apriori(data5_not,
parameter = list(minlen = 4, maxlen = 4,
supp = 0.05, conf = 0.3,
target = "rules"))
rules_not_4.sorted <- sort(subset(rules_not_4, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_not_4.sorted), "\n")
## Rules found: 4332
inspect(head(rules_not_4.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Edu=College graduate+,
## Marr=Married,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.054 0.73 0.074 5.7 8
## [2] {Area=Metropolitan,
## Edu=College graduate+,
## Marr=Married} => {Income=$100,000 or more} 0.068 0.62 0.108 4.9 10
## [3] {Area=Metropolitan,
## Marr=Married,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.068 0.62 0.108 4.9 10
## [4] {Area=Metropolitan,
## Regn=Northeast,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.62 0.088 4.8 8
## [5] {Age=30-49,
## Edu=College graduate+,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.054 0.62 0.088 4.8 8
## [6] {Land=Suburban,
## Race=No Response,
## Marr=Married} => {Born=Foreign} 0.068 0.67 0.101 4.3 10
## [7] {Edu=College graduate+,
## Race=No Response,
## Marr=Married} => {Income=$100,000 or more} 0.054 0.53 0.101 4.2 8
## [8] {Area=Metropolitan,
## Age=30-49,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.53 0.101 4.2 8
## [9] {Area=Metropolitan,
## Edu=College graduate+,
## Ideology=Moderate} => {Income=$100,000 or more} 0.054 0.50 0.108 3.9 8
## [10] {Area=Metropolitan,
## Age=30-49,
## Marr=Married} => {Income=$100,000 or more} 0.054 0.50 0.108 3.9 8
## [11] {Race=No Response,
## Marr=Married,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.054 0.50 0.108 3.9 8
## [12] {Land=Rural,
## Race=No Response,
## IntUse=Several times a day} => {Area=Non-metropolitan} 0.068 0.62 0.108 3.7 10
## [13] {Land=Rural,
## Born=U.S. ,
## IntUse=Several times a day} => {Area=Non-metropolitan} 0.068 0.62 0.108 3.7 10
## [14] {Area=Metropolitan,
## Race=No Response,
## Born=Foreign} => {Income=$100,000 or more} 0.054 0.47 0.115 3.7 8
## [15] {Edu=College graduate+,
## Race=No Response,
## IntUse=Almost constantly} => {Income=$100,000 or more} 0.054 0.47 0.115 3.7 8
## [16] {Area=Metropolitan,
## Land=Suburban,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.47 0.115 3.7 8
## [17] {Area=Metropolitan,
## Land=Suburban,
## Marr=Married} => {Born=Foreign} 0.068 0.56 0.122 3.6 10
## [18] {Area=Metropolitan,
## Gender=A man,
## Edu=College graduate+} => {Income=$100,000 or more} 0.054 0.44 0.122 3.5 8
## [19] {Area=Metropolitan,
## Land=Suburban,
## Marr=Married} => {Income=$100,000 or more} 0.054 0.44 0.122 3.5 8
## [20] {Edu=Some College,
## Race=No Response,
## Politics=No Response} => {Age=65+} 0.061 0.64 0.095 3.4 9
write(rules_not_4.sorted, file = "result_NotProblem_4itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
All rows are kept.
DrCellremains in the transaction matrix as the RHS target. Rules take the form: {demographic/attitude factors} ⟹ {DrCell = X}
data3_full <- as(data2[, -14], "transactions")
cat("Full transaction matrix:\n")
## Full transaction matrix:
print(data3_full)
## transactions in sparse format with
## 5410 transactions (rows) and
## 77 items (columns)
set.seed(1234)
rules_pred_maj_2 <- apriori(
data3_full,
parameter = list(minlen = 2, maxlen = 2, supp = 0.001, conf = 0.05,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Major problem")
)
rules_pred_maj_2.sorted <- sort(subset(rules_pred_maj_2, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_maj_2.sorted), "\n")
## Rules found: 32
inspect(head(rules_pred_maj_2.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Race=Cuban} => {DrCell=Major problem} 0.0083 0.87 0.0096 1.1 45
## [2] {Race=Other Central American} => {DrCell=Major problem} 0.0067 0.86 0.0078 1.1 36
## [3] {Race=Mexican} => {DrCell=Major problem} 0.0671 0.83 0.0804 1.1 363
## [4] {Income=$80,000 to less than $90,000} => {DrCell=Major problem} 0.0464 0.83 0.0562 1.1 251
## [5] {Race=Other South American} => {DrCell=Major problem} 0.0137 0.82 0.0166 1.0 74
## [6] {Age=50-64} => {DrCell=Major problem} 0.2039 0.82 0.2481 1.0 1103
## [7] {Race=Other country} => {DrCell=Major problem} 0.0033 0.82 0.0041 1.0 18
## [8] {Age=Refused} => {DrCell=Major problem} 0.0041 0.81 0.0050 1.0 22
## [9] {Income=$50,000 to less than $60,000} => {DrCell=Major problem} 0.0569 0.81 0.0699 1.0 308
## [10] {Edu=Some College} => {DrCell=Major problem} 0.2445 0.81 0.3013 1.0 1323
## [11] {Marr=Married} => {DrCell=Major problem} 0.4372 0.81 0.5427 1.0 2365
## [12] {Income=$100,000 or more} => {DrCell=Major problem} 0.2586 0.80 0.3218 1.0 1399
## [13] {IntUse=Almost constantly} => {DrCell=Major problem} 0.3893 0.80 0.4860 1.0 2106
## [14] {Regn=South} => {DrCell=Major problem} 0.3074 0.80 0.3837 1.0 1663
## [15] {Race=Salvadoran} => {DrCell=Major problem} 0.0037 0.80 0.0046 1.0 20
## [16] {Gender=A woman} => {DrCell=Major problem} 0.3932 0.80 0.4930 1.0 2127
## [17] {Ideology=Moderate} => {DrCell=Major problem} 0.3135 0.80 0.3932 1.0 1696
## [18] {Land=Urban} => {DrCell=Major problem} 0.1917 0.80 0.2407 1.0 1037
## [19] {Income=Refused} => {DrCell=Major problem} 0.0383 0.80 0.0481 1.0 207
## [20] {Politics=No Response} => {DrCell=Major problem} 0.4471 0.79 0.5628 1.0 2419
write(rules_pred_maj_2.sorted, file = "result_Pred_Major_2itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
if (length(rules_pred_maj_2.sorted) > 0) {
plot(rules_pred_maj_2.sorted, jitter = 0,
main = "Predict Major Problem – 2-Itemset Rules")
plot(rules_pred_maj_2.sorted, method = "grouped", control = list(k = 10),
main = "Predict Major Problem – Grouped Matrix")
plot(rules_pred_maj_2.sorted, method = "graph",
main = "Predict Major Problem – Network Graph")
}
## Available control parameters (with default values):
## k = 20
## aggr.fun = function (x, ...) UseMethod("mean")
## rhs_max = 10
## lhs_label_items = 2
## col = c("#EE0000FF", "#EEEEEEFF")
## groups = NULL
## engine = ggplot2
## verbose = FALSE
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
rules_pred_maj_3 <- apriori(
data3_full,
parameter = list(minlen = 3, maxlen = 3, supp = 0.001, conf = 0.1,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Major problem")
)
rules_pred_maj_3.sorted <- sort(subset(rules_pred_maj_3, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_maj_3.sorted), "\n")
## Rules found: 728
inspect(head(rules_pred_maj_3.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Race=Other country,
## Ideology=Conservative} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [2] {Age=50-64,
## Race=Other country} => {DrCell=Major problem} 0.0018 1 0.0018 1.3 10
## [3] {Edu=College graduate+,
## Race=Other country} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [4] {Race=Salvadoran,
## Politics=The Republican Party} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [5] {Edu=Some College,
## Race=Salvadoran} => {DrCell=Major problem} 0.0020 1 0.0020 1.3 11
## [6] {Race=Salvadoran,
## Ideology=Moderate} => {DrCell=Major problem} 0.0018 1 0.0018 1.3 10
## [7] {Land=Urban,
## Age=Refused} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [8] {Regn=South,
## Age=Refused} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [9] {Age=Refused,
## IntUse=Several times a day} => {DrCell=Major problem} 0.0015 1 0.0015 1.3 8
## [10] {Race=Mexican,
## Born=Refused} => {DrCell=Major problem} 0.0015 1 0.0015 1.3 8
## [11] {Edu=Some College,
## Born=Refused} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [12] {Race=Other Central American,
## Marr=Living with a partner} => {DrCell=Major problem} 0.0015 1 0.0015 1.3 8
## [13] {Race=Other Central American,
## Ideology=Liberal} => {DrCell=Major problem} 0.0015 1 0.0015 1.3 8
## [14] {Race=Other Central American,
## Income=$100,000 or more} => {DrCell=Major problem} 0.0017 1 0.0017 1.3 9
## [15] {Race=Other Central American,
## Politics=No Response} => {DrCell=Major problem} 0.0026 1 0.0026 1.3 14
## [16] {Race=Spanish,
## Ideology=Liberal} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [17] {Race=Cuban,
## Ideology=Very conservative} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [18] {Race=Cuban,
## Born=Foreign} => {DrCell=Major problem} 0.0044 1 0.0044 1.3 24
## [19] {Age=50-64,
## Race=Cuban} => {DrCell=Major problem} 0.0020 1 0.0020 1.3 11
## [20] {Race=Cuban,
## Income=$100,000 or more} => {DrCell=Major problem} 0.0024 1 0.0024 1.3 13
write(rules_pred_maj_3.sorted, file = "result_Pred_Major_3itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_pred_maj_4 <- apriori(
data3_full,
parameter = list(minlen = 4, maxlen = 4, supp = 0.001, conf = 0.1,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Major problem")
)
rules_pred_maj_4.sorted <- sort(subset(rules_pred_maj_4, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_maj_4.sorted), "\n")
## Rules found: 7454
inspect(head(rules_pred_maj_4.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Age=50-64,
## Race=Other country,
## Income=Less than $30,000} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [2] {Area=Metropolitan,
## Race=Other country,
## Politics=The Democratic Party} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [3] {Regn=West,
## Age=50-64,
## Race=Other country} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [4] {Area=Metropolitan,
## Regn=West,
## Race=Other country} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [5] {Age=50-64,
## Race=Other country,
## Ideology=Conservative} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [6] {Land=Suburban,
## Race=Other country,
## Ideology=Conservative} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [7] {Race=Other country,
## Born=U.S. ,
## Ideology=Conservative} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [8] {Area=Metropolitan,
## Race=Other country,
## Ideology=Conservative} => {DrCell=Major problem} 0.0013 1 0.0013 1.3 7
## [9] {Age=50-64,
## Race=Other country,
## IntUse=Almost constantly} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [10] {Age=50-64,
## Gender=A man,
## Race=Other country} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [11] {Land=Suburban,
## Age=50-64,
## Race=Other country} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [12] {Age=50-64,
## Race=Other country,
## Politics=No Response} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [13] {Age=50-64,
## Race=Other country,
## Born=U.S. } => {DrCell=Major problem} 0.0017 1 0.0017 1.3 9
## [14] {Area=Metropolitan,
## Age=50-64,
## Race=Other country} => {DrCell=Major problem} 0.0018 1 0.0018 1.3 10
## [15] {Edu=College graduate+,
## Race=Other country,
## IntUse=Almost constantly} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [16] {Area=Metropolitan,
## Edu=College graduate+,
## Race=Other country} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [17] {Area=Metropolitan,
## Gender=A woman,
## Race=Other country} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [18] {Land=Suburban,
## Race=Other country,
## Marr=Married} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
## [19] {Area=Metropolitan,
## Race=Other country,
## Marr=Married} => {DrCell=Major problem} 0.0017 1 0.0017 1.3 9
## [20] {Gender=Refused,
## Race=No Response,
## IntUse=Almost constantly} => {DrCell=Major problem} 0.0011 1 0.0011 1.3 6
write(rules_pred_maj_4.sorted, file = "result_Pred_Major_4itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_pred_min_2 <- apriori(
data3_full,
parameter = list(minlen = 2, maxlen = 2, supp = 0.001, conf = 0.05,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Minor problem")
)
rules_pred_min_2.sorted <- sort(subset(rules_pred_min_2, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_min_2.sorted), "\n")
## Rules found: 34
inspect(head(rules_pred_min_2.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0043 0.27 0.0155 1.5 23
## [2] {Marr=Refused} => {DrCell=Minor problem} 0.0013 0.24 0.0054 1.3 7
## [3] {Race=Dominican} => {DrCell=Minor problem} 0.0013 0.24 0.0054 1.3 7
## [4] {Income=$90,000 to less than $100,000} => {DrCell=Minor problem} 0.0142 0.24 0.0604 1.3 77
## [5] {Born=Refused} => {DrCell=Minor problem} 0.0015 0.24 0.0063 1.3 8
## [6] {Race=Spanish} => {DrCell=Minor problem} 0.0020 0.23 0.0087 1.3 11
## [7] {IntUse=About once a day} => {DrCell=Minor problem} 0.0098 0.23 0.0420 1.3 53
## [8] {Gender=In some other way} => {DrCell=Minor problem} 0.0013 0.23 0.0057 1.2 7
## [9] {Marr=Separated} => {DrCell=Minor problem} 0.0037 0.22 0.0170 1.2 20
## [10] {Age=65+} => {DrCell=Minor problem} 0.0560 0.22 0.2579 1.2 303
## [11] {Marr=Widowed} => {DrCell=Minor problem} 0.0096 0.21 0.0464 1.1 52
## [12] {Area=Non-metropolitan} => {DrCell=Minor problem} 0.0257 0.21 0.1240 1.1 139
## [13] {Marr=Divorced} => {DrCell=Minor problem} 0.0189 0.21 0.0913 1.1 102
## [14] {Regn=Northeast} => {DrCell=Minor problem} 0.0357 0.21 0.1728 1.1 193
## [15] {Ideology=Very liberal} => {DrCell=Minor problem} 0.0157 0.20 0.0767 1.1 85
## [16] {Ideology=Very conservative} => {DrCell=Minor problem} 0.0176 0.20 0.0858 1.1 95
## [17] {Income=Less than $30,000} => {DrCell=Minor problem} 0.0305 0.20 0.1495 1.1 165
## [18] {Income=$70,000 to less than $80,000} => {DrCell=Minor problem} 0.0139 0.20 0.0684 1.1 75
## [19] {IntUse=Several times a week} => {DrCell=Minor problem} 0.0044 0.20 0.0220 1.1 24
## [20] {Ideology=Liberal} => {DrCell=Minor problem} 0.0362 0.20 0.1815 1.1 196
write(rules_pred_min_2.sorted, file = "result_Pred_Minor_2itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
if (length(rules_pred_min_2.sorted) > 0) {
plot(rules_pred_min_2.sorted, jitter = 0,
main = "Predict Minor Problem – 2-Itemset Rules")
plot(rules_pred_min_2.sorted, method = "grouped", control = list(k = 10),
main = "Predict Minor Problem – Grouped Matrix")
plot(rules_pred_min_2.sorted, method = "graph",
main = "Predict Minor Problem – Network Graph")
}
## Available control parameters (with default values):
## k = 20
## aggr.fun = function (x, ...) UseMethod("mean")
## rhs_max = 10
## lhs_label_items = 2
## col = c("#EE0000FF", "#EEEEEEFF")
## groups = NULL
## engine = ggplot2
## verbose = FALSE
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
rules_pred_min_3 <- apriori(
data3_full,
parameter = list(minlen = 3, maxlen = 3, supp = 0.001, conf = 0.1,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Minor problem")
)
rules_pred_min_3.sorted <- sort(subset(rules_pred_min_3, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_min_3.sorted), "\n")
## Rules found: 609
inspect(head(rules_pred_min_3.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Age=18-29,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0017 0.47 0.0035 2.6 9
## [2] {Edu=College graduate+,
## Born=Refused} => {DrCell=Minor problem} 0.0013 0.44 0.0030 2.4 7
## [3] {Marr=Divorced,
## Income=$90,000 to less than $100,000} => {DrCell=Minor problem} 0.0017 0.43 0.0039 2.3 9
## [4] {Edu=Some College,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0013 0.41 0.0031 2.2 7
## [5] {Regn=Northeast,
## IntUse=Several times a week} => {DrCell=Minor problem} 0.0011 0.40 0.0028 2.2 6
## [6] {Land=Suburban,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0020 0.39 0.0052 2.1 11
## [7] {Ideology=Liberal,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0022 0.39 0.0057 2.1 12
## [8] {Marr=Widowed,
## Politics=The Republican Party} => {DrCell=Minor problem} 0.0018 0.37 0.0050 2.0 10
## [9] {Marr=Widowed,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0013 0.37 0.0035 2.0 7
## [10] {Area=Non-metropolitan,
## Income=$90,000 to less than $100,000} => {DrCell=Minor problem} 0.0030 0.36 0.0083 1.9 16
## [11] {Age=18-29,
## Gender=In some other way} => {DrCell=Minor problem} 0.0011 0.35 0.0031 1.9 6
## [12] {Regn=South,
## Race=Spanish} => {DrCell=Minor problem} 0.0011 0.35 0.0031 1.9 6
## [13] {Edu=H.S. graduate or less,
## Marr=Separated} => {DrCell=Minor problem} 0.0024 0.35 0.0068 1.9 13
## [14] {Income=$40,000 to less than $50,000,
## IntUse=Less often} => {DrCell=Minor problem} 0.0013 0.35 0.0037 1.9 7
## [15] {Income=$100,000 or more,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0017 0.35 0.0048 1.9 9
## [16] {Ideology=Liberal,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0011 0.33 0.0033 1.8 6
## [17] {Land=Urban,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0015 0.33 0.0044 1.8 8
## [18] {Marr=Married,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0015 0.33 0.0044 1.8 8
## [19] {Politics=The Democratic Party,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0022 0.33 0.0067 1.8 12
## [20] {Income=$90,000 to less than $100,000,
## Ideology=Very conservative} => {DrCell=Minor problem} 0.0020 0.32 0.0063 1.8 11
write(rules_pred_min_3.sorted, file = "result_Pred_Minor_3itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_pred_min_4 <- apriori(
data3_full,
parameter = list(minlen = 4, maxlen = 4, supp = 0.001, conf = 0.1,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Minor problem")
)
rules_pred_min_4.sorted <- sort(subset(rules_pred_min_4, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_min_4.sorted), "\n")
## Rules found: 4844
inspect(head(rules_pred_min_4.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Marr=Divorced,
## Income=$90,000 to less than $100,000,
## IntUse=Almost constantly} => {DrCell=Minor problem} 0.0013 0.70 0.0018 3.8 7
## [2] {Edu=H.S. graduate or less,
## Marr=Widowed,
## IntUse=Several times a week} => {DrCell=Minor problem} 0.0011 0.55 0.0020 3.0 6
## [3] {Area=Non-metropolitan,
## Regn=South,
## Income=Refused} => {DrCell=Minor problem} 0.0011 0.55 0.0020 3.0 6
## [4] {Marr=Living with a partner,
## Income=Less than $30,000,
## Ideology=Very liberal} => {DrCell=Minor problem} 0.0011 0.55 0.0020 3.0 6
## [5] {Area=Non-metropolitan,
## Regn=West,
## Ideology=Very conservative} => {DrCell=Minor problem} 0.0011 0.55 0.0020 3.0 6
## [6] {Land=Suburban,
## Edu=H.S. graduate or less,
## Marr=Separated} => {DrCell=Minor problem} 0.0015 0.53 0.0028 2.9 8
## [7] {Area=Non-metropolitan,
## Edu=H.S. graduate or less,
## Income=$90,000 to less than $100,000} => {DrCell=Minor problem} 0.0015 0.53 0.0028 2.9 8
## [8] {Area=Metropolitan,
## Marr=Widowed,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0013 0.50 0.0026 2.7 7
## [9] {Land=Suburban,
## Gender=A man,
## IntUse=Do not use the internet} => {DrCell=Minor problem} 0.0011 0.50 0.0022 2.7 6
## [10] {Land=Suburban,
## Age=18-29,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0011 0.50 0.0022 2.7 6
## [11] {Area=Metropolitan,
## Age=18-29,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0017 0.50 0.0033 2.7 9
## [12] {Edu=H.S. graduate or less,
## Ideology=Liberal,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0013 0.50 0.0026 2.7 7
## [13] {Gender=A woman,
## Ideology=Liberal,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0017 0.50 0.0033 2.7 9
## [14] {Marr=Married,
## Ideology=Liberal,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0011 0.50 0.0022 2.7 6
## [15] {Area=Non-metropolitan,
## Regn=Midwest,
## Income=$90,000 to less than $100,000} => {DrCell=Minor problem} 0.0017 0.50 0.0033 2.7 9
## [16] {Age=65+,
## Income=$50,000 to less than $60,000,
## Ideology=Very conservative} => {DrCell=Minor problem} 0.0011 0.50 0.0022 2.7 6
## [17] {Area=Non-metropolitan,
## Marr=Never been married,
## Ideology=Liberal} => {DrCell=Minor problem} 0.0015 0.50 0.0030 2.7 8
## [18] {Land=Rural,
## Marr=Never been married,
## Ideology=Liberal} => {DrCell=Minor problem} 0.0017 0.50 0.0033 2.7 9
## [19] {Regn=West,
## Age=65+,
## Income=$40,000 to less than $50,000} => {DrCell=Minor problem} 0.0022 0.48 0.0046 2.6 12
## [20] {Land=Suburban,
## Politics=The Democratic Party,
## IntUse=About once a day} => {DrCell=Minor problem} 0.0017 0.47 0.0035 2.6 9
write(rules_pred_min_4.sorted, file = "result_Pred_Minor_4itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_pred_not_2 <- apriori(
data3_full,
parameter = list(minlen = 2, maxlen = 2, supp = 0.001, conf = 0.05,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Not a problem")
)
rules_pred_not_2.sorted <- sort(subset(rules_pred_not_2, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_not_2.sorted), "\n")
## Rules found: 5
inspect(head(rules_pred_not_2.sorted, 20))
## lhs rhs support confidence
## [1] {IntUse=About once a day} => {DrCell=Not a problem} 0.0039 0.093
## [2] {Race=Puerto Rican} => {DrCell=Not a problem} 0.0013 0.089
## [3] {IntUse=Less often} => {DrCell=Not a problem} 0.0011 0.065
## [4] {Income=Less than $30,000} => {DrCell=Not a problem} 0.0085 0.057
## [5] {IntUse=Several times a week} => {DrCell=Not a problem} 0.0011 0.050
## coverage lift count
## [1] 0.042 3.4 21
## [2] 0.015 3.2 7
## [3] 0.017 2.4 6
## [4] 0.150 2.1 46
## [5] 0.022 1.8 6
write(rules_pred_not_2.sorted, file = "result_Pred_NotProblem_2itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
if (length(rules_pred_not_2.sorted) > 0) {
plot(rules_pred_not_2.sorted, jitter = 0,
main = "Predict Not a Problem – 2-Itemset Rules")
plot(rules_pred_not_2.sorted, method = "grouped", control = list(k = 10),
main = "Predict Not a Problem – Grouped Matrix")
plot(rules_pred_not_2.sorted, method = "graph",
main = "Predict Not a Problem – Network Graph")
}
## Available control parameters (with default values):
## k = 20
## aggr.fun = function (x, ...) UseMethod("mean")
## rhs_max = 10
## lhs_label_items = 2
## col = c("#EE0000FF", "#EEEEEEFF")
## groups = NULL
## engine = ggplot2
## verbose = FALSE
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
rules_pred_not_3 <- apriori(
data3_full,
parameter = list(minlen = 3, maxlen = 3, supp = 0.001, conf = 0.1,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Not a problem")
)
rules_pred_not_3.sorted <- sort(subset(rules_pred_not_3, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_not_3.sorted), "\n")
## Rules found: 14
inspect(head(rules_pred_not_3.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Age=30-49,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0015 0.28 0.0054 10.1 8
## [2] {Politics=The Democratic Party,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0013 0.19 0.0067 7.1 7
## [3] {Race=Puerto Rican,
## IntUse=Almost constantly} => {DrCell=Not a problem} 0.0011 0.14 0.0081 5.0 6
## [4] {Land=Rural,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0017 0.13 0.0131 4.6 9
## [5] {Edu=H.S. graduate or less,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0024 0.13 0.0190 4.6 13
## [6] {Edu=H.S. graduate or less,
## Ideology=Very liberal} => {DrCell=Not a problem} 0.0017 0.12 0.0135 4.5 9
## [7] {Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0041 0.12 0.0333 4.5 22
## [8] {Regn=West,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0011 0.12 0.0092 4.4 6
## [9] {Politics=The Democratic Party,
## Ideology=Conservative} => {DrCell=Not a problem} 0.0013 0.12 0.0109 4.3 7
## [10] {Income=Less than $30,000,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0011 0.12 0.0094 4.3 6
## [11] {Marr=Never been married,
## Ideology=Very conservative} => {DrCell=Not a problem} 0.0013 0.11 0.0118 4.0 7
## [12] {Marr=Living with a partner,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0017 0.11 0.0153 4.0 9
## [13] {Edu=H.S. graduate or less,
## Politics=The Democratic Party} => {DrCell=Not a problem} 0.0052 0.11 0.0488 3.9 28
## [14] {Regn=South,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0015 0.10 0.0146 3.7 8
write(rules_pred_not_3.sorted, file = "result_Pred_NotProblem_3itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
rules_pred_not_4 <- apriori(
data3_full,
parameter = list(minlen = 4, maxlen = 4, supp = 0.001, conf = 0.1,
target = "rules"),
appearance = list(default = "lhs", rhs = "DrCell=Not a problem")
)
rules_pred_not_4.sorted <- sort(subset(rules_pred_not_4, subset = lift > 1),
by = "lift", decreasing = TRUE)
cat("Rules found:", length(rules_pred_not_4.sorted), "\n")
## Rules found: 96
inspect(head(rules_pred_not_4.sorted, 20))
## lhs rhs support confidence coverage lift count
## [1] {Age=30-49,
## Edu=H.S. graduate or less,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0013 0.41 0.0031 15.1 7
## [2] {Area=Metropolitan,
## Age=30-49,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0015 0.30 0.0050 10.8 8
## [3] {Age=30-49,
## Born=U.S. ,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0011 0.24 0.0046 8.8 6
## [4] {Regn=Northeast,
## Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0011 0.19 0.0057 7.1 6
## [5] {Regn=West,
## Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0011 0.19 0.0057 7.1 6
## [6] {Gender=A woman,
## Politics=The Democratic Party,
## Ideology=Conservative} => {DrCell=Not a problem} 0.0011 0.19 0.0057 7.1 6
## [7] {Edu=H.S. graduate or less,
## Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0031 0.19 0.0163 7.1 17
## [8] {Edu=H.S. graduate or less,
## Marr=Never been married,
## Politics=The Democratic Party} => {DrCell=Not a problem} 0.0026 0.19 0.0135 7.0 14
## [9] {Area=Metropolitan,
## Politics=The Democratic Party,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0011 0.19 0.0059 6.9 6
## [10] {Edu=H.S. graduate or less,
## Marr=Divorced,
## Politics=The Democratic Party} => {DrCell=Not a problem} 0.0011 0.19 0.0059 6.9 6
## [11] {Age=18-29,
## Marr=Living with a partner,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0011 0.19 0.0059 6.9 6
## [12] {Land=Rural,
## Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0015 0.19 0.0079 6.8 8
## [13] {Area=Metropolitan,
## Land=Rural,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0015 0.17 0.0085 6.4 8
## [14] {Edu=H.S. graduate or less,
## Marr=Never been married,
## Ideology=Very liberal} => {DrCell=Not a problem} 0.0011 0.17 0.0065 6.3 6
## [15] {Age=18-29,
## Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0013 0.17 0.0076 6.2 7
## [16] {Marr=Never been married,
## Politics=The Democratic Party,
## Income=Less than $30,000} => {DrCell=Not a problem} 0.0024 0.16 0.0152 5.8 13
## [17] {Age=18-29,
## Edu=H.S. graduate or less,
## Politics=The Democratic Party} => {DrCell=Not a problem} 0.0017 0.16 0.0107 5.7 9
## [18] {Regn=South,
## Gender=A woman,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0011 0.15 0.0072 5.6 6
## [19] {Regn=Northeast,
## Edu=H.S. graduate or less,
## Politics=The Democratic Party} => {DrCell=Not a problem} 0.0013 0.15 0.0085 5.6 7
## [20] {Land=Rural,
## Edu=H.S. graduate or less,
## IntUse=About once a day} => {DrCell=Not a problem} 0.0011 0.15 0.0076 5.3 6
write(rules_pred_not_4.sorted, file = "result_Pred_NotProblem_4itemset.csv",
sep = ",", quote = TRUE, row.names = FALSE)
sessionInfo()
## R version 4.5.2 (2025-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_United States.utf8
## [2] LC_CTYPE=English_United States.utf8
## [3] LC_MONETARY=English_United States.utf8
## [4] LC_NUMERIC=C
## [5] LC_TIME=English_United States.utf8
##
## time zone: America/Chicago
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] RColorBrewer_1.1-3 arulesViz_1.5.4 arules_1.7-11
## [4] Matrix_1.7-4 dplyr_1.1.4 magrittr_2.0.4
## [7] readr_2.1.6 tree_1.0-45 randomForest_4.7-1.2
## [10] ggplot2_4.0.1
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.55 bslib_0.9.0 ggrepel_0.9.6
## [5] lattice_0.22-7 tzdb_0.5.0 vctrs_0.6.5 tools_4.5.2
## [9] generics_0.1.4 ca_0.71.1 tibble_3.3.0 pkgconfig_2.0.3
## [13] S7_0.2.1 lifecycle_1.0.4 compiler_4.5.2 farver_2.1.2
## [17] ggforce_0.5.0 graphlayouts_1.2.2 codetools_0.2-20 seriation_1.5.8
## [21] htmltools_0.5.9 sass_0.4.10 yaml_2.3.12 pillar_1.11.1
## [25] jquerylib_0.1.4 tidyr_1.3.1 MASS_7.3-65 cachem_1.1.0
## [29] iterators_1.0.14 viridis_0.6.5 TSP_1.2.6 foreach_1.5.2
## [33] tidyselect_1.2.1 digest_0.6.39 purrr_1.2.0 labeling_0.4.3
## [37] polyclip_1.10-7 fastmap_1.2.0 grid_4.5.2 cli_3.6.5
## [41] ggraph_2.2.2 tidygraph_1.3.1 withr_3.0.2 scales_1.4.0
## [45] registry_0.5-1 rmarkdown_2.30 igraph_2.2.1 otel_0.2.0
## [49] gridExtra_2.3 hms_1.1.4 memoise_2.0.1 evaluate_1.0.5
## [53] knitr_1.51 viridisLite_0.4.2 rlang_1.1.6 Rcpp_1.1.0
## [57] glue_1.8.0 tweenr_2.0.3 rstudioapi_0.17.1 jsonlite_2.0.0
## [61] R6_2.6.1