This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the plot.
###### Code developed by Ahmed Hossain (adapted for ATP W152 dataset)
#######
install.packages(“rmarkdown”) install.packages(“knitr”) rm(list = ls()) getwd() setwd(“C:/Users/asr171/OneDrive - Texas State University/Das, Subasish’s files - PewSurvey/W152/Arka/V3”)
library(ggplot2) library(base) library(randomForest) library(tree) library(stats) library(readr) library(magrittr) library(dplyr) library(arules) library(arulesViz) library(RColorBrewer)
data1 <- read.csv( “C:/Users/asr171/OneDrive - Texas State University/Das, Subasish’s files - PewSurvey/W152/Arka/V3/ATP W152 Data_cleanedFin_AC.csv”, header = TRUE, sep = “,” )
colnames(data1) dim(data1) str(data1)
data2 <- data1 %>% mutate_if(is.character, as.factor) str(data2) colnames(data2) attach(data2) summary(data2)
data3 <- as(data2[, -14], “transactions”) class(data3)
rules <- apriori(data3) summary(rules) print(length(rules))
options(digits = 2) inspect(head(rules, n = 100, by = “lift”))
plot(rules, jitter = 0) plot(rules, method = “grouped”, control = list(k = 10)) plot(rules[1:50], method = “graph”)
itemLabels(data3) items_abs <- itemFrequency(data3, type = “absolute”) a <- head(sort(items_abs, decreasing = TRUE), n = 20) a write.csv(a, “absolute_frequency.csv”)
items_rel <- itemFrequency(data3, type = “relative”) r <- head(sort(items_rel, decreasing = TRUE), n = 20) r write.csv(r, “relative_frequency.csv”)
par(mar = c(3, 5, 2, 2) + .1) itemFrequencyPlot(data3, topN = 20, cex.names = 0.8, las = 2, cex.axis = 0.9, cex.lab = 0.9, ylab = ““, xlab =”Absolute frequency”, main = “Absolute Item Frequency Plot (Top 20 items)”, type = “absolute”, col = brewer.pal(8, ‘Pastel2’), horiz = TRUE)
iF20 <- rev(tail(sort(itemFrequency(data3)), 20)) par(mar = c(12, 4, 1, 1)) barplot(iF20, las = 2, cex.names = 0.9, cex.axis = 0.9, cex.lab = 0.9, xlab = ““, ylab =”Relative frequency”, main = “Relative Item Frequency Plot (Top 20 items)”, col = brewer.pal(8, ‘Pastel2’), ylim = c(0, 1))
set.seed(1234)
data4_major <- subset(data2, DrCell == “Major problem”) dim(data4_major)
data5_major <- as(data4_major[, c(-14, -15)], “transactions”) class(data5_major) itemLabels(data5_major) summary(data5_major)
rules_major_2 <- apriori(data5_major, parameter = list(minlen = 2, maxlen = 2, supp = 0.05, conf = 0.3, target = “rules”)) rules_major_2.sorted <- sort(subset(rules_major_2, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_major_2.sorted) write(rules_major_2.sorted, file = “result_Major_2itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
plot(rules_major_2.sorted, jitter = 0) plot(rules_major_2.sorted, method = “grouped”, control = list(k = 10)) plot(rules_major_2.sorted, method = “graph”)
rules_major_3 <- apriori(data5_major, parameter = list(minlen = 3, maxlen = 3, supp = 0.05, conf = 0.3, target = “rules”)) rules_major_3.sorted <- sort(subset(rules_major_3, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_major_3.sorted[1:100]) write(rules_major_3.sorted, file = “result_Major_3itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_major_4 <- apriori(data5_major, parameter = list(minlen = 4, maxlen = 4, supp = 0.05, conf = 0.3, target = “rules”)) rules_major_4.sorted <- sort(subset(rules_major_4, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_major_4.sorted[1:100]) write(rules_major_4.sorted, file = “result_Major_4itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
data4_minor <- subset(data2, DrCell == “Minor problem”) dim(data4_minor)
data5_minor <- as(data4_minor[, c(-14, -15)], “transactions”) class(data5_minor) itemLabels(data5_minor) summary(data5_minor)
rules_minor_2 <- apriori(data5_minor, parameter = list(minlen = 2, maxlen = 2, supp = 0.05, conf = 0.3, target = “rules”)) rules_minor_2.sorted <- sort(subset(rules_minor_2, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_minor_2.sorted) write(rules_minor_2.sorted, file = “result_Minor_2itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
plot(rules_minor_2.sorted, jitter = 0) plot(rules_minor_2.sorted, method = “grouped”, control = list(k = 10)) plot(rules_minor_2.sorted, method = “graph”)
rules_minor_3 <- apriori(data5_minor, parameter = list(minlen = 3, maxlen = 3, supp = 0.05, conf = 0.3, target = “rules”)) rules_minor_3.sorted <- sort(subset(rules_minor_3, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_minor_3.sorted[1:100]) write(rules_minor_3.sorted, file = “result_Minor_3itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_minor_4 <- apriori(data5_minor, parameter = list(minlen = 4, maxlen = 4, supp = 0.05, conf = 0.3, target = “rules”)) rules_minor_4.sorted <- sort(subset(rules_minor_4, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_minor_4.sorted[1:100]) write(rules_minor_4.sorted, file = “result_Minor_4itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
data4_not <- subset(data2, DrCell == “Not a problem”) dim(data4_not)
data5_not <- as(data4_not[, c(-14, -15)], “transactions”) class(data5_not) itemLabels(data5_not) summary(data5_not)
rules_not_2 <- apriori(data5_not, parameter = list(minlen = 2, maxlen = 2, supp = 0.05, conf = 0.3, target = “rules”)) rules_not_2.sorted <- sort(subset(rules_not_2, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_not_2.sorted) write(rules_not_2.sorted, file = “result_NotProblem_2itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
plot(rules_not_2.sorted, jitter = 0) plot(rules_not_2.sorted, method = “grouped”, control = list(k = 10)) plot(rules_not_2.sorted, method = “graph”)
rules_not_3 <- apriori(data5_not, parameter = list(minlen = 3, maxlen = 3, supp = 0.05, conf = 0.3, target = “rules”)) rules_not_3.sorted <- sort(subset(rules_not_3, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_not_3.sorted[1:100]) write(rules_not_3.sorted, file = “result_NotProblem_3itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_not_4 <- apriori(data5_not, parameter = list(minlen = 4, maxlen = 4, supp = 0.05, conf = 0.3, target = “rules”)) rules_not_4.sorted <- sort(subset(rules_not_4, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_not_4.sorted[1:100]) write(rules_not_4.sorted, file = “result_NotProblem_4itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
data3_full <- as(data2[, -14], “transactions”) summary(data3_full)
set.seed(1234)
rules_pred_maj_2 <- apriori(data3_full, parameter = list(minlen = 2, maxlen = 2, supp = 0.001, conf = 0.05, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Major problem”)) rules_pred_maj_2.sorted <- sort(subset(rules_pred_maj_2, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_maj_2.sorted) write(rules_pred_maj_2.sorted, file = “result_Pred_Major_2itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
plot(rules_pred_maj_2.sorted, jitter = 0) plot(rules_pred_maj_2.sorted, method = “grouped”, control = list(k = 10)) plot(rules_pred_maj_2.sorted, method = “graph”)
rules_pred_maj_3 <- apriori(data3_full, parameter = list(minlen = 3, maxlen = 3, supp = 0.001, conf = 0.1, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Major problem”)) rules_pred_maj_3.sorted <- sort(subset(rules_pred_maj_3, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_maj_3.sorted[1:100]) write(rules_pred_maj_3.sorted, file = “result_Pred_Major_3itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_pred_maj_4 <- apriori(data3_full, parameter = list(minlen = 4, maxlen = 4, supp = 0.001, conf = 0.1, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Major problem”)) rules_pred_maj_4.sorted <- sort(subset(rules_pred_maj_4, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_maj_4.sorted[1:100]) write(rules_pred_maj_4.sorted, file = “result_Pred_Major_4itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_pred_min_2 <- apriori(data3_full, parameter = list(minlen = 2, maxlen = 2, supp = 0.001, conf = 0.05, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Minor problem”)) rules_pred_min_2.sorted <- sort(subset(rules_pred_min_2, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_min_2.sorted) write(rules_pred_min_2.sorted, file = “result_Pred_Minor_2itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
plot(rules_pred_min_2.sorted, jitter = 0) plot(rules_pred_min_2.sorted, method = “grouped”, control = list(k = 10)) plot(rules_pred_min_2.sorted, method = “graph”)
rules_pred_min_3 <- apriori(data3_full, parameter = list(minlen = 3, maxlen = 3, supp = 0.001, conf = 0.1, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Minor problem”)) rules_pred_min_3.sorted <- sort(subset(rules_pred_min_3, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_min_3.sorted[1:100]) write(rules_pred_min_3.sorted, file = “result_Pred_Minor_3itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_pred_min_4 <- apriori(data3_full, parameter = list(minlen = 4, maxlen = 4, supp = 0.001, conf = 0.1, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Minor problem”)) rules_pred_min_4.sorted <- sort(subset(rules_pred_min_4, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_min_4.sorted[1:100]) write(rules_pred_min_4.sorted, file = “result_Pred_Minor_4itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_pred_not_2 <- apriori(data3_full, parameter = list(minlen = 2, maxlen = 2, supp = 0.001, conf = 0.05, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Not a problem”)) rules_pred_not_2.sorted <- sort(subset(rules_pred_not_2, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_not_2.sorted) write(rules_pred_not_2.sorted, file = “result_Pred_NotProblem_2itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
plot(rules_pred_not_2.sorted, jitter = 0) plot(rules_pred_not_2.sorted, method = “grouped”, control = list(k = 10)) plot(rules_pred_not_2.sorted, method = “graph”)
rules_pred_not_3 <- apriori(data3_full, parameter = list(minlen = 3, maxlen = 3, supp = 0.001, conf = 0.1, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Not a problem”)) rules_pred_not_3.sorted <- sort(subset(rules_pred_not_3, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_not_3.sorted[1:100]) write(rules_pred_not_3.sorted, file = “result_Pred_NotProblem_3itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)
rules_pred_not_4 <- apriori(data3_full, parameter = list(minlen = 4, maxlen = 4, supp = 0.001, conf = 0.1, target = “rules”), appearance = list(default = “lhs”, rhs = “DrCell=Not a problem”)) rules_pred_not_4.sorted <- sort(subset(rules_pred_not_4, subset = lift > 1), by = “lift”, decreasing = TRUE) inspect(rules_pred_not_4.sorted[1:100]) write(rules_pred_not_4.sorted, file = “result_Pred_NotProblem_4itemset.csv”, sep = “,”, quote = TRUE, row.names = FALSE)