Project_250617

library(mlbench)

## Warning: package 'mlbench' was built under R version 4.4.3

library(readr)

## Warning: package 'readr' was built under R version 4.4.3

library(data.table)

## Warning: package 'data.table' was built under R version 4.4.3

library(stats)
library(arules)

## Loading required package: Matrix

## Warning: package 'Matrix' was built under R version 4.4.3

## 
## Attaching package: 'arules'

## The following objects are masked from 'package:base':
## 
##     abbreviate, write

library(arulesViz)

## Warning: package 'arulesViz' was built under R version 4.4.3

library(kernlab)

## 
## Attaching package: 'kernlab'

## The following object is masked from 'package:arules':
## 
##     size

library(Matrix)
##Take a little bit look at the data, the following is the top ##5 rows of the data set
bank_full_1 = read.csv("C:\\Users\\shann\\Downloads\\bank-full.csv")
bank_full=bank_full_1[,c(1,2,3,4,6,7,8,11,12)]

##extract the most important 8 (out of 17) columns from this ##data set to analyze 

str(bank_full)

## 'data.frame':    45211 obs. of  9 variables:
##  $ age      : int  58 44 33 47 33 35 28 42 58 43 ...
##  $ job      : chr  "management" "technician" "entrepreneur" "blue-collar" ...
##  $ marital  : chr  "married" "single" "married" "married" ...
##  $ education: chr  "tertiary" "secondary" "secondary" "unknown" ...
##  $ balance  : int  2143 29 2 1506 1 231 447 2 121 593 ...
##  $ housing  : chr  "yes" "yes" "yes" "yes" ...
##  $ loan     : chr  "no" "no" "yes" "no" ...
##  $ month    : chr  "may" "may" "may" "may" ...
##  $ duration : int  261 151 76 92 198 139 217 380 50 55 ...

##More in depth information
require(lattice)

## Loading required package: lattice

## Warning: package 'lattice' was built under R version 4.4.3

require(ggplot2)

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 4.4.3

## 
## Attaching package: 'ggplot2'

## The following object is masked from 'package:kernlab':
## 
##     alpha

ggplot(bank_full, aes(x=age)) + geom_bar()

ggplot(bank_full, aes(x=job)) + geom_bar() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

ggplot(bank_full, aes(x=marital)) + geom_bar()

ggplot(bank_full, aes(x=education)) + geom_bar() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

ggplot(bank_full, aes(x=housing)) + geom_bar()

ggplot(bank_full, aes(x=loan)) + geom_bar()

bank_full$month <- factor(bank_full$month, levels = c("jan", "feb", "mar","apr", "may","jun","jul","aug","sep","oct","nov","dec"))
ggplot(bank_full, aes(x=month)) + geom_bar()

ggplot(bank_full, aes(age)) + geom_bar(fill="skyblue") +  facet_wrap(~job)

ggplot(bank_full, aes(education)) + geom_bar(fill="skyblue") +  facet_wrap(~job) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

ggplot(bank_full, aes(marital)) + geom_bar(fill="skyblue") +  facet_wrap(~job)

ggplot(bank_full, aes(housing)) + geom_bar(fill="skyblue") +  facet_wrap(~loan)

ggplot(bank_full, aes(marital)) + geom_bar(fill="skyblue") +  facet_wrap(~loan)

ggplot(bank_full, aes(job)) + geom_bar(fill="skyblue") +  facet_wrap(~loan) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

ggplot(bank_full, aes(education)) + geom_bar(fill="skyblue") +  facet_wrap(~loan) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

balance <- ggplot(bank_full, aes(factor(job), balance))
balance + geom_boxplot() + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

##Visualizing of balance with outliers

balance + geom_boxplot(outlier.shape = NA) + scale_y_continuous(limits = quantile(bank_full$balance, c(0.1, 0.9))) + theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1))

## Warning: Removed 8287 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

##Visualizing of balance without outliers
bankrules <- apriori(bank_full, parameter=list(support=0.025, confidence=0.75,minlen = 2))

## Warning: Column(s) 1, 2, 3, 4, 5, 6, 7, 9 not logical or factor. Applying
## default discretization (see '? discretizeDF').

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.75    0.1    1 none FALSE            TRUE       5   0.025      2
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 1130 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[44 item(s), 45211 transaction(s)] done [0.01s].
## sorting and recoding items ... [38 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 done [0.02s].
## writing ... [1502 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

bankrules

## set of 1502 rules

## Items/itemsets must appear in at least 2.5% of transactions

## Rules must have at least 75% confidence (strong rules)

## Rules must have at least 2 items 

  
inspect(bankrules[1:10])

##      lhs                    rhs               support    confidence coverage  
## [1]  {job=unemployed}    => {loan=no}         0.02640950 0.9163469  0.02882042
## [2]  {month=jan}         => {loan=no}         0.02634315 0.8488952  0.03103227
## [3]  {job=entrepreneur}  => {loan=no}         0.02501604 0.7605918  0.03289023
## [4]  {job=self-employed} => {loan=no}         0.02985999 0.8549715  0.03492513
## [5]  {education=unknown} => {loan=no}         0.03813231 0.9283791  0.04107407
## [6]  {job=retired}       => {age=[45,95]}     0.04859437 0.9704064  0.05007631
## [7]  {job=retired}       => {housing=no}      0.03921612 0.7831272  0.05007631
## [8]  {job=retired}       => {marital=married} 0.03828714 0.7645760  0.05007631
## [9]  {job=retired}       => {loan=no}         0.04324169 0.8635159  0.05007631
## [10] {month=feb}         => {loan=no}         0.04996572 0.8527746  0.05859194
##      lift      count
## [1]  1.0911834 1194 
## [2]  1.0108621 1191 
## [3]  0.9057106 1131 
## [4]  1.0180977 1350 
## [5]  1.1055113 1724 
## [6]  2.8008837 2197 
## [7]  1.7631574 1773 
## [8]  1.2702008 1731 
## [9]  1.0282724 1955 
## [10] 1.0154817 2259

##Display the first 10 association rules from bankrules 

bankrules <- apriori(bank_full, parameter=list(support=0.025, confidence=0.75,minlen = 3))

## Warning: Column(s) 1, 2, 3, 4, 5, 6, 7, 9 not logical or factor. Applying
## default discretization (see '? discretizeDF').

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.75    0.1    1 none FALSE            TRUE       5   0.025      3
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 1130 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[44 item(s), 45211 transaction(s)] done [0.01s].
## sorting and recoding items ... [38 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 done [0.02s].
## writing ... [1459 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

bankrules

## set of 1459 rules

inspect(bankrules[1:10])

##      lhs                               rhs               support    confidence
## [1]  {age=[45,95], job=retired}     => {housing=no}      0.03826502 0.7874374 
## [2]  {job=retired, housing=no}      => {age=[45,95]}     0.03826502 0.9757473 
## [3]  {age=[45,95], job=retired}     => {marital=married} 0.03742452 0.7701411 
## [4]  {job=retired, marital=married} => {age=[45,95]}     0.03742452 0.9774697 
## [5]  {age=[45,95], job=retired}     => {loan=no}         0.04200305 0.8643605 
## [6]  {job=retired, loan=no}         => {age=[45,95]}     0.04200305 0.9713555 
## [7]  {job=retired, housing=no}      => {marital=married} 0.03043507 0.7760857 
## [8]  {job=retired, marital=married} => {housing=no}      0.03043507 0.7949162 
## [9]  {job=retired, housing=no}      => {loan=no}         0.03461547 0.8826847 
## [10] {job=retired, loan=no}         => {housing=no}      0.03461547 0.8005115 
##      coverage   lift     count
## [1]  0.04859437 1.772862 1730 
## [2]  0.03921612 2.816299 1730 
## [3]  0.04859437 1.279446 1692 
## [4]  0.03828714 2.821271 1692 
## [5]  0.04859437 1.029278 1899 
## [6]  0.04324169 2.803623 1899 
## [7]  0.03921612 1.289322 1376 
## [8]  0.03828714 1.789700 1376 
## [9]  0.03921612 1.051099 1565 
## [10] 0.04324169 1.802297 1565

bankrules <- sort(bankrules, by = "confidence", decreasing = TRUE)

##This command sorts your association rules by confidence in ##descending order, so the rules with the highest confidence ##(strongest predictive power) will appear first

bankrules.sorted = sort(bankrules, by = "lift")

##Sorts rules by lift in ascending order (lowest lift first)

subset.matrix = is.subset(bankrules.sorted, bankrules.sorted)

##Creates a matrix showing which rules are subsets of others

subset.matrix[lower.tri(subset.matrix, diag = TRUE)] = NA

## Warning in `[<-`(`*tmp*`, as.vector(i), value = NA): x[.] <- val: x is
## "ngTMatrix", val not in {TRUE, FALSE} is coerced; NA |--> TRUE.

##Sets the lower triangle and diagonal to NA
##Keeps only the upper triangle to avoid duplicate ##comparisons


redundant = colSums(subset.matrix, na.rm = TRUE) >= 100

##Counts how many rules are subsets of each rule (column-wise)
##Marks rules as redundant if they have 100+ subset rules
##This is a very high threshold - these would be extremely ##general rules


##which(redundant)

##Shows the positions/indices of the redundant rules
##If this returns an empty result, no rules had 100+ subsets



bankrules.pruned = bankrules.sorted[!redundant]

bankrules.pruned

## set of 95 rules

##Keeps only the non-redundant rules (using the negation !)
##Creates a cleaner, more focused rule set

inspect(bankrules.pruned[1:10])

##      lhs                           rhs          support confidence   coverage      lift count
## [1]  {age=[45,95],                                                                           
##       job=admin.}               => {loan=no} 0.02722789  0.7952196 0.03423945 0.9469454  1231
## [2]  {job=blue-collar,                                                                       
##       housing=no}               => {loan=no} 0.04717878  0.7947094 0.05936608 0.9463378  2133
## [3]  {marital=married,                                                                       
##       education=secondary,                                                                   
##       duration=[126,258)}       => {loan=no} 0.08409458  0.7945664 0.10583708 0.9461674  3802
## [4]  {age=[18,35),                                                                           
##       marital=married,                                                                       
##       duration=[126,258)}       => {loan=no} 0.03729181  0.7941592 0.04695760 0.9456826  1686
## [5]  {balance=[-8.02e+03,175),                                                               
##       housing=no}               => {loan=no} 0.11293712  0.7937199 0.14228838 0.9451595  5106
## [6]  {marital=married,                                                                       
##       balance=[-8.02e+03,175),                                                               
##       housing=no}               => {loan=no} 0.06403309  0.7935855 0.08068833 0.9449995  2895
## [7]  {age=[35,45),                                                                           
##       balance=[-8.02e+03,175),                                                               
##       duration=[0,126)}         => {loan=no} 0.03065626  0.7933600 0.03864104 0.9447310  1386
## [8]  {age=[18,35),                                                                           
##       job=blue-collar,                                                                       
##       education=secondary}      => {loan=no} 0.03724757  0.7924706 0.04700184 0.9436718  1684
## [9]  {age=[45,95],                                                                           
##       education=secondary,                                                                   
##       duration=[0,126)}         => {loan=no} 0.04352923  0.7919517 0.05496450 0.9430539  1968
## [10] {age=[45,95],                                                                           
##       marital=married,                                                                       
##       balance=[-8.02e+03,175),                                                               
##       housing=no}               => {loan=no} 0.03275751  0.7915553 0.04138373 0.9425819  1481

##Displays 1st 10 remaining rules after pruning

bankrules.pruned <- sort(bankrules.pruned, by = "lift", decreasing = TRUE)
inspect(bankrules.pruned[1:6])

##     lhs                           rhs          support confidence   coverage      lift count
## [1] {age=[45,95],                                                                           
##      job=admin.}               => {loan=no} 0.02722789  0.7952196 0.03423945 0.9469454  1231
## [2] {job=blue-collar,                                                                       
##      housing=no}               => {loan=no} 0.04717878  0.7947094 0.05936608 0.9463378  2133
## [3] {marital=married,                                                                       
##      education=secondary,                                                                   
##      duration=[126,258)}       => {loan=no} 0.08409458  0.7945664 0.10583708 0.9461674  3802
## [4] {age=[18,35),                                                                           
##      marital=married,                                                                       
##      duration=[126,258)}       => {loan=no} 0.03729181  0.7941592 0.04695760 0.9456826  1686
## [5] {balance=[-8.02e+03,175),                                                               
##      housing=no}               => {loan=no} 0.11293712  0.7937199 0.14228838 0.9451595  5106
## [6] {marital=married,                                                                       
##      balance=[-8.02e+03,175),                                                               
##      housing=no}               => {loan=no} 0.06403309  0.7935855 0.08068833 0.9449995  2895

##sorts the association rules by lift in descending order 
## and displays the top 6 rules with the highest lift values

summary(bankrules.pruned)

## set of 95 rules
## 
## rule length distribution (lhs + rhs):sizes
##  3  4  5  6 
## 20 50 24  1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.000   4.000   4.063   5.000   6.000 
## 
## summary of quality measures:
##     support          confidence        coverage            lift       
##  Min.   :0.02508   Min.   :0.7507   Min.   :0.03218   Min.   :0.8939  
##  1st Qu.:0.02982   1st Qu.:0.7737   1st Qu.:0.03863   1st Qu.:0.9213  
##  Median :0.03729   Median :0.7790   Median :0.04725   Median :0.9276  
##  Mean   :0.04783   Mean   :0.7786   Mean   :0.06137   Mean   :0.9271  
##  3rd Qu.:0.05391   3rd Qu.:0.7871   3rd Qu.:0.06953   3rd Qu.:0.9373  
##  Max.   :0.15120   Max.   :0.7952   Max.   :0.19497   Max.   :0.9469  
##      count     
##  Min.   :1134  
##  1st Qu.:1348  
##  Median :1686  
##  Mean   :2162  
##  3rd Qu.:2438  
##  Max.   :6836  
## 
## mining info:
##       data ntransactions support confidence
##  bank_full         45211   0.025       0.75
##                                                                                         call
##  apriori(data = bank_full, parameter = list(support = 0.025, confidence = 0.75, minlen = 3))

## provides overview statistics:

##Total number of rules found
##Distribution of rule lengths (how many items in antecedent ##+ consequent)
##Summary statistics for support, confidence, coverage, lift, ##and count
##Quartiles and mean values for each metric


inspect(sort(bankrules.pruned, by = "confidence")[1:5])

##     lhs                           rhs          support confidence   coverage      lift count
## [1] {age=[45,95],                                                                           
##      job=admin.}               => {loan=no} 0.02722789  0.7952196 0.03423945 0.9469454  1231
## [2] {job=blue-collar,                                                                       
##      housing=no}               => {loan=no} 0.04717878  0.7947094 0.05936608 0.9463378  2133
## [3] {marital=married,                                                                       
##      education=secondary,                                                                   
##      duration=[126,258)}       => {loan=no} 0.08409458  0.7945664 0.10583708 0.9461674  3802
## [4] {age=[18,35),                                                                           
##      marital=married,                                                                       
##      duration=[126,258)}       => {loan=no} 0.03729181  0.7941592 0.04695760 0.9456826  1686
## [5] {balance=[-8.02e+03,175),                                                               
##      housing=no}               => {loan=no} 0.11293712  0.7937199 0.14228838 0.9451595  5106

##shows the 5 rules with the lowest confidence

inspect(sort(bankrules.pruned, by = "lift", decreasing = TRUE) [1:5])

##     lhs                           rhs          support confidence   coverage      lift count
## [1] {age=[45,95],                                                                           
##      job=admin.}               => {loan=no} 0.02722789  0.7952196 0.03423945 0.9469454  1231
## [2] {job=blue-collar,                                                                       
##      housing=no}               => {loan=no} 0.04717878  0.7947094 0.05936608 0.9463378  2133
## [3] {marital=married,                                                                       
##      education=secondary,                                                                   
##      duration=[126,258)}       => {loan=no} 0.08409458  0.7945664 0.10583708 0.9461674  3802
## [4] {age=[18,35),                                                                           
##      marital=married,                                                                       
##      duration=[126,258)}       => {loan=no} 0.03729181  0.7941592 0.04695760 0.9456826  1686
## [5] {balance=[-8.02e+03,175),                                                               
##      housing=no}               => {loan=no} 0.11293712  0.7937199 0.14228838 0.9451595  5106

##This command shows the 5 association rules with the ##highest lift values - these are your strongest, most ##surprising patterns in the banking data.

plot(bankrules, measure = c("support", "lift"), shading = "confidence")

## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(bankrules.pruned, measure = c("support", "lift"), shading = "confidence")

##This creates a scatter plot visualization of your ##association rules using the arulesViz package. Here's what ##this plot shows:
##Plot Elements:

##X-axis: Support (how frequently the rule appears in your ##data)
##Y-axis: Lift (how much stronger the association is compared ##to random)
##Color/Shading: Confidence (how reliable each rule is)
##Each point: Represents one association rule

bankrules <- apriori(bank_full, parameter=list(support=0.025, confidence=0.75,minlen = 2)) bankrules

Items/itemsets must appear in at least 2.5% of transactions

Rules must have at least 75% confidence (strong rules)

Rules must have at least 2 items

inspect(bankrules[1:10])

##Display the first 10 association rules from bankrules

bankrules <- apriori(bank_full, parameter=list(support=0.025, confidence=0.75,minlen = 3)) bankrules

inspect(bankrules[1:10])

bankrules <- sort(bankrules, by = “confidence”, decreasing = TRUE)

##This command sorts your association rules by confidence in ##descending order, so the rules with the highest confidence ##(strongest predictive power) will appear first

bankrules.sorted = sort(bankrules, by = “lift”)

##Sorts rules by lift in ascending order (lowest lift first)

subset.matrix = is.subset(bankrules.sorted, bankrules.sorted)

##Creates a matrix showing which rules are subsets of others

subset.matrix[lower.tri(subset.matrix, diag = TRUE)] = NA

##Sets the lower triangle and diagonal to NA ##Keeps only the upper triangle to avoid duplicate ##comparisons

redundant = colSums(subset.matrix, na.rm = TRUE) >= 100

##Counts how many rules are subsets of each rule (column-wise) ##Marks rules as redundant if they have 100+ subset rules ##This is a very high threshold - these would be extremely ##general rules

which(redundant)

##Shows the positions/indices of the redundant rules ##If this returns an empty result, no rules had 100+ subsets

bankrules.pruned = bankrules.sorted[!redundant]

##Keeps only the non-redundant rules (using the negation !) ##Creates a cleaner, more focused rule set

inspect(bankrules.pruned)

##Displays all the remaining rules after pruning

bankrules <- sort(bankrules, by = “lift”, decreasing = TRUE) inspect(bankrules[1:6])

##sorts the association rules by lift in descending order ## and displays the top 6 rules with the highest lift values

summary(bankrules)

provides overview statistics:

##Total number of rules found ##Distribution of rule lengths (how many items in antecedent ##+ consequent) ##Summary statistics for support, confidence, coverage, lift, ##and count ##Quartiles and mean values for each metric

inspect(sort(bankrules, by = “confidence”)[1:5])

##shows the 5 rules with the lowest confidence

inspect(sort(bankrules, by = “lift”, decreasing = TRUE) [1:5])

##This command shows the 5 association rules with the ##highest lift values - these are your strongest, most ##surprising patterns in the banking data.

plot(bankrules, measure = c(“support”, “lift”), shading = “confidence”)

plot(bankrules.pruned, measure = c(“support”, “lift”), shading = “confidence”)

##This creates a scatter plot visualization of your ##association rules using the arulesViz package. Here’s what ##this plot shows: ##Plot Elements:

##X-axis: Support (how frequently the rule appears in your ##data) ##Y-axis: Lift (how much stronger the association is compared ##to random) ##Color/Shading: Confidence (how reliable each rule is) ##Each point: Represents one association rule


## R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see <http://rmarkdown.rstudio.com>.

When you click the **Knit** button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

library(mlbench) library(readr) library(data.table) library(stats) library(arules) library(arulesViz) library(kernlab) library(Matrix)

bank_full_1 = read.csv(“/Users/maxineharlemon/AIOpt/bank_full_2.csv”) summary(bank_full) head(bank_full, n=5)

##Take a little bit look at the data, the following is the top ##5 rows of the data set

bank_full=bank_full_1[,c(1,2,3,4,6,7,8,11)] bank_full=bank_full[,c(1,2,3,4,6,7,8,11)]

##extract the most important 8 (out of 17) columns from this ##data set to analyze

str(bank_full)

##More in depth information

require(lattice) require(ggplot2)

ggplot(bank_full, aes(x=age)) + geom_bar()

ggplot(bank_full, aes(x=job)) + geom_bar()

ggplot(bank_full, aes(x=marital)) + geom_bar()

ggplot(bank_full, aes(x=education)) + geom_bar()

ggplot(bank_full, aes(x=housing)) + geom_bar()

ggplot(bank_full, aes(x=loan)) + geom_bar()

ggplot(bank_full, aes(x=month)) + geom_bar()

ggplot(bank_full, aes(age)) + geom_bar(fill=“skyblue”) + facet_wrap(~job)

ggplot(bank_full, aes(education)) + geom_bar(fill=“skyblue”) + facet_wrap(~job)

ggplot(bank_full, aes(marital)) + geom_bar(fill=“skyblue”) + facet_wrap(~job)

ggplot(bank_full, aes(housing)) + geom_bar(fill=“skyblue”) + facet_wrap(~loan)

ggplot(bank_full, aes(marital)) + geom_bar(fill=“skyblue”) + facet_wrap(~loan)

ggplot(bank_full, aes(job)) + geom_bar(fill=“skyblue”) + facet_wrap(~loan)

ggplot(bank_full, aes(education)) + geom_bar(fill=“skyblue”) + facet_wrap(~loan)

balance <- ggplot(bank_full, aes(factor(job), balance)) balance + geom_boxplot()

##Visualizing of balance with outliers

balance + geom_boxplot(outlier.shape = NA) + scale_y_continuous(limits = quantile(bank_full$balance, c(0.1, 0.9)))

##Visualizing of balance without outliers


bankrules <- apriori(bank_full, parameter=list(support=0.025, confidence=0.75,minlen = 2))
bankrules

## Items/itemsets must appear in at least 2.5% of transactions

## Rules must have at least 75% confidence (strong rules)

## Rules must have at least 2 items 

  
inspect(bankrules[1:10])

##Display the first 10 association rules from bankrules 

bankrules <- apriori(bank_full, parameter=list(support=0.025, confidence=0.75,minlen = 3))
bankrules

inspect(bankrules[1:10])


bankrules <- sort(bankrules, by = "confidence", decreasing = TRUE)

##This command sorts your association rules by confidence in ##descending order, so the rules with the highest confidence ##(strongest predictive power) will appear first

bankrules.sorted = sort(bankrules, by = "lift")

##Sorts rules by lift in ascending order (lowest lift first)

subset.matrix = is.subset(bankrules.sorted, bankrules.sorted)

##Creates a matrix showing which rules are subsets of others

subset.matrix[lower.tri(subset.matrix, diag = TRUE)] = NA

##Sets the lower triangle and diagonal to NA
##Keeps only the upper triangle to avoid duplicate ##comparisons


redundant = colSums(subset.matrix, na.rm = TRUE) >= 100

##Counts how many rules are subsets of each rule (column-wise)
##Marks rules as redundant if they have 100+ subset rules
##This is a very high threshold - these would be extremely ##general rules


which(redundant)

##Shows the positions/indices of the redundant rules
##If this returns an empty result, no rules had 100+ subsets


bankrules.pruned = bankrules.sorted[!redundant]

##Keeps only the non-redundant rules (using the negation !)
##Creates a cleaner, more focused rule set

inspect(bankrules.pruned)

##Displays all the remaining rules after pruning

bankrules <- sort(bankrules, by = "lift", decreasing = TRUE)
inspect(bankrules[1:6])

##sorts the association rules by lift in descending order 
## and displays the top 6 rules with the highest lift values

summary(bankrules)

## provides overview statistics:

##Total number of rules found
##Distribution of rule lengths (how many items in antecedent ##+ consequent)
##Summary statistics for support, confidence, coverage, lift, ##and count
##Quartiles and mean values for each metric


inspect(sort(bankrules, by = "confidence")[1:5])

##shows the 5 rules with the lowest confidence

inspect(sort(bankrules, by = "lift", decreasing = TRUE) [1:5])

##This command shows the 5 association rules with the ##highest lift values - these are your strongest, most ##surprising patterns in the banking data.

plot(bankrules, measure = c("support", "lift"), shading = "confidence")

plot(bankrules.pruned, measure = c("support", "lift"), shading = "confidence")


##This creates a scatter plot visualization of your ##association rules using the arulesViz package. Here's what ##this plot shows:
##Plot Elements:

##X-axis: Support (how frequently the rule appears in your ##data)
##Y-axis: Lift (how much stronger the association is compared ##to random)
##Color/Shading: Confidence (how reliable each rule is)
##Each point: Represents one association rule

Project_250617

2025-06-18

Items/itemsets must appear in at least 2.5% of transactions

Rules must have at least 75% confidence (strong rules)

Rules must have at least 2 items

provides overview statistics: