I have installed two new pakages from library.’Arules’ which is used for mining association rules and frequent itemsets.’ArulesViz’ is for visualizing association rules and frequent itemsets.
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
smalldf <- smalldf %>%
select(-c(short_description_en,justification_en,date_end))
colnames(smalldf)
## [1] "category" "states_name_en" "region_en" "unique_number"
## [5] "id_no" "rev_bis" "name_en" "date_inscribed"
## [9] "secondary_dates" "danger" "danger_list" "longitude"
## [13] "latitude" "area_hectares" "criteria_txt" "category_short"
## [17] "iso_code" "udnp_code" "transboundary"
transactions(smalldf)
## Warning: Column(s) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
## 17, 18, 19 not logical or factor. Applying default discretization (see '?
## discretizeDF').
## Warning in discretize(x = c(1L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, : The calculated breaks are: 0, 0, 0, 1
## Only unique breaks are used reducing the number of intervals. Look at ? discretize for details.
## Warning in discretize(x = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, : The calculated breaks are: 0, 0, 0, 1
## Only unique breaks are used reducing the number of intervals. Look at ? discretize for details.
## transactions in sparse format with
## 45 transactions (rows) and
## 228 items (columns)
colnames(smalldf)[c(1,2,3,4,8,11,12)]
## [1] "category" "states_name_en" "region_en" "unique_number"
## [5] "date_inscribed" "danger_list" "longitude"
smalldf <- smalldf %>% mutate(
danger = (danger > 0),
date_inscribed = (date_inscribed >0)
)
trans <- transactions(smalldf)
## Warning: Column(s) 1, 2, 3, 4, 5, 6, 7, 9, 11, 12, 13, 14, 15, 16, 17, 18, 19
## not logical or factor. Applying default discretization (see '? discretizeDF').
## Warning in discretize(x = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, : The calculated breaks are: 0, 0, 0, 1
## Only unique breaks are used reducing the number of intervals. Look at ? discretize for details.
Here it shows errors! We can convert them into factors (or Boolean) for analysis.
as(df,"transactions")
## Warning: Column(s) 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17,
## 18, 19, 20, 21, 22 not logical or factor. Applying default discretization (see
## '? discretizeDF').
## Warning in discretize(x = c(1L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, : The calculated breaks are: 0, 0, 0, 1
## Only unique breaks are used reducing the number of intervals. Look at ? discretize for details.
## Warning in discretize(x = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, : The calculated breaks are: 0, 0, 0, 1
## Only unique breaks are used reducing the number of intervals. Look at ? discretize for details.
## transactions in sparse format with
## 1121 transactions (rows) and
## 3429 items (columns)
summary(trans)
## transactions as itemMatrix in sparse format with
## 45 rows (elements/itemsets/transactions) and
## 226 columns (items) and a density of 0.07984267
##
## most frequent items:
## date_inscribed transboundary=[0,1] secondary_dates= danger_list=
## 45 45 42 39
## rev_bis= (Other)
## 36 605
##
## element (itemset/transaction) length distribution:
## sizes
## 17 18 19
## 1 41 3
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.00 18.00 18.00 18.04 18.00 19.00
##
## includes extended item information - examples:
## labels variables levels
## 1 category=Cultural category Cultural
## 2 category=Mixed category Mixed
## 3 category=Natural category Natural
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
colnames(trans)
## [1] "category=Cultural"
## [2] "category=Mixed"
## [3] "category=Natural"
## [4] "states_name_en=Argentina"
## [5] "states_name_en=Australia"
## [6] "states_name_en=Brazil"
## [7] "states_name_en=Cambodia"
## [8] "states_name_en=Croatia"
## [9] "states_name_en=Egypt"
## [10] "states_name_en=Ethiopia"
## [11] "states_name_en=Finland,Sweden"
## [12] "states_name_en=France"
## [13] "states_name_en=Germany"
## [14] "states_name_en=Holy See,Italy"
## [15] "states_name_en=India"
## [16] "states_name_en=Iran (Islamic Republic of)"
## [17] "states_name_en=Italy"
## [18] "states_name_en=Italy,Switzerland"
## [19] "states_name_en=Japan"
## [20] "states_name_en=Lao People's Democratic Republic"
## [21] "states_name_en=Lebanon"
## [22] "states_name_en=Mali"
## [23] "states_name_en=Malta"
## [24] "states_name_en=Mexico"
## [25] "states_name_en=Myanmar"
## [26] "states_name_en=Namibia"
## [27] "states_name_en=Oman"
## [28] "states_name_en=Poland"
## [29] "states_name_en=Poland,Ukraine"
## [30] "states_name_en=Republic of Korea"
## [31] "states_name_en=Russian Federation"
## [32] "states_name_en=Senegal"
## [33] "states_name_en=Spain"
## [34] "states_name_en=Switzerland"
## [35] "states_name_en=Syrian Arab Republic"
## [36] "states_name_en=United Kingdom of Great Britain and Northern Ireland"
## [37] "states_name_en=United States of America"
## [38] "states_name_en=Uzbekistan"
## [39] "states_name_en=Viet Nam"
## [40] "region_en=Africa"
## [41] "region_en=Arab States"
## [42] "region_en=Asia and the Pacific"
## [43] "region_en=Europe and North America"
## [44] "region_en=Latin America and the Caribbean"
## [45] "unique_number=[12,1.05e+03)"
## [46] "unique_number=[1.05e+03,1.51e+03)"
## [47] "unique_number=[1.51e+03,2.32e+03]"
## [48] "id_no=[9,374)"
## [49] "id_no=[374,1.11e+03)"
## [50] "id_no=[1.11e+03,1.53e+03]"
## [51] "rev_bis="
## [52] "rev_bis=bis"
## [53] "rev_bis=Bis"
## [54] "rev_bis=rev"
## [55] "rev_bis=Rev"
## [56] "name_en=<i>Aflaj</i> Irrigation Systems of Oman"
## [57] "name_en=Agra Fort"
## [58] "name_en=Ancient City of Aleppo"
## [59] "name_en=Ancient City of Bosra"
## [60] "name_en=Brazilian Atlantic Islands: Fernando de Noronha and Atol das Rocas Reserves"
## [61] "name_en=Byblos"
## [62] "name_en=Cerrado Protected Areas: Chapada dos Veadeiros and Emas National Parks"
## [63] "name_en=Chaco Culture"
## [64] "name_en=City of Valletta"
## [65] "name_en=Cultural Landscape of the Serra de Tramuntana"
## [66] "name_en=El Pinacate and Gran Desierto de Altar Biosphere Reserve"
## [67] "name_en=Ensemble of the Novodevichy Convent"
## [68] "name_en=Genoa: <i>Le Strade Nuove</i> and the system of the<i> Palazzi dei Rolli</i>"
## [69] "name_en=High Coast / Kvarken Archipelago"
## [70] "name_en=Historic Centre of Rome, the Properties of the Holy See in that City Enjoying Extraterritorial Rights and San Paolo Fuori le Mura"
## [71] "name_en=Historic Villages of Shirakawa-go and Gokayama"
## [72] "name_en=Hoi An Ancient Town"
## [73] "name_en=Hwaseong Fortress"
## [74] "name_en=Iguaçu National Park"
## [75] "name_en=Los Glaciares National Park"
## [76] "name_en=Mantua and Sabbioneta"
## [77] "name_en=Monastic Island of Reichenau"
## [78] "name_en=Old City of Dubrovnik"
## [79] "name_en=Old Town of Cáceres"
## [80] "name_en=Palace of Westminster and Westminster Abbey including Saint Margaret’s Church"
## [81] "name_en=Pont du Gard (Roman Aqueduct)"
## [82] "name_en=Pyu Ancient Cities"
## [83] "name_en=Rhaetian Railway in the Albula / Bernina Landscapes"
## [84] "name_en=Royal Monastery of Santa María de Guadalupe"
## [85] "name_en=Saloum Delta"
## [86] "name_en=Samarkand – Crossroad of Cultures"
## [87] "name_en=Sheikh Safi al-din Khānegāh and Shrine Ensemble in Ardabil"
## [88] "name_en=Simien National Park"
## [89] "name_en=Swiss Tectonic Arena Sardona"
## [90] "name_en=Sydney Opera House"
## [91] "name_en=Tchogha Zanbil"
## [92] "name_en=Temple Zone of Sambor Prei Kuk, Archaeological Site of Ancient Ishanapura"
## [93] "name_en=Three Castles, Defensive Wall and Ramparts of the Market-Town of Bellinzona"
## [94] "name_en=Tomb of Askia"
## [95] "name_en=Town of Luang Prabang"
## [96] "name_en=Twyfelfontein or /Ui-//aes"
## [97] "name_en=Wadi Al-Hitan (Whale Valley)"
## [98] "name_en=Willandra Lakes Region"
## [99] "name_en=Wooden <em>Tserkvas</em> of the Carpathian Region in Poland and Ukraine"
## [100] "name_en=Wooden Churches of Southern Małopolska"
## [101] "date_inscribed"
## [102] "secondary_dates="
## [103] "secondary_dates=1990"
## [104] "secondary_dates=1994"
## [105] "secondary_dates=2006"
## [106] "danger"
## [107] "danger_list="
## [108] "danger_list=P 1991-1998"
## [109] "danger_list=P 1996-2017"
## [110] "danger_list=P 1999-2001"
## [111] "danger_list=Y 2012"
## [112] "danger_list=Y 2013"
## [113] "longitude=[-114,9.05)"
## [114] "longitude=[9.05,37.3)"
## [115] "longitude=[37.3,151]"
## [116] "latitude=[-50,25.8)"
## [117] "latitude=[25.8,39.7)"
## [118] "latitude=[39.7,63.3]"
## [119] "area_hectares=[0,12.1)"
## [120] "area_hectares=[12.1,1.45e+03)"
## [121] "area_hectares=[1.45e+03,7.27e+05]"
## [122] "criteria_txt=(i)"
## [123] "criteria_txt=(i)(ii)(iii)(iv)(vi)"
## [124] "criteria_txt=(i)(ii)(iv)"
## [125] "criteria_txt=(i)(iii)(iv)"
## [126] "criteria_txt=(i)(iii)(vi)"
## [127] "criteria_txt=(i)(iv)(vi)"
## [128] "criteria_txt=(i)(vi)"
## [129] "criteria_txt=(ii)(iii)"
## [130] "criteria_txt=(ii)(iii)(iv)"
## [131] "criteria_txt=(ii)(iii)(vi)"
## [132] "criteria_txt=(ii)(iv)"
## [133] "criteria_txt=(ii)(iv)(v)"
## [134] "criteria_txt=(ii)(v)"
## [135] "criteria_txt=(iii)"
## [136] "criteria_txt=(iii)(iv)"
## [137] "criteria_txt=(iii)(iv)(v)"
## [138] "criteria_txt=(iii)(iv)(vi)"
## [139] "criteria_txt=(iii)(v)"
## [140] "criteria_txt=(iii)(viii)"
## [141] "criteria_txt=(iv)"
## [142] "criteria_txt=(iv)(v)"
## [143] "criteria_txt=(iv)(vi)"
## [144] "criteria_txt=(ix)(x)"
## [145] "criteria_txt=(v)"
## [146] "criteria_txt=(vii)(ix)(x)"
## [147] "criteria_txt=(vii)(viii)"
## [148] "criteria_txt=(vii)(viii)(x)"
## [149] "criteria_txt=(vii)(x)"
## [150] "criteria_txt=(viii)"
## [151] "category_short=C"
## [152] "category_short=M"
## [153] "category_short=N"
## [154] "iso_code=ar"
## [155] "iso_code=au"
## [156] "iso_code=br"
## [157] "iso_code=ch"
## [158] "iso_code=de"
## [159] "iso_code=eg"
## [160] "iso_code=es"
## [161] "iso_code=et"
## [162] "iso_code=fi,se"
## [163] "iso_code=fr"
## [164] "iso_code=gb"
## [165] "iso_code=hr"
## [166] "iso_code=in"
## [167] "iso_code=ir"
## [168] "iso_code=it"
## [169] "iso_code=it,ch"
## [170] "iso_code=jp"
## [171] "iso_code=kh"
## [172] "iso_code=kr"
## [173] "iso_code=la"
## [174] "iso_code=lb"
## [175] "iso_code=ml"
## [176] "iso_code=mm"
## [177] "iso_code=mt"
## [178] "iso_code=mx"
## [179] "iso_code=na"
## [180] "iso_code=om"
## [181] "iso_code=pl"
## [182] "iso_code=pl,ua"
## [183] "iso_code=ru"
## [184] "iso_code=sn"
## [185] "iso_code=sy"
## [186] "iso_code=us"
## [187] "iso_code=uz"
## [188] "iso_code=va,it"
## [189] "iso_code=vn"
## [190] "udnp_code=arg"
## [191] "udnp_code=aus"
## [192] "udnp_code=bra"
## [193] "udnp_code=che"
## [194] "udnp_code=deu"
## [195] "udnp_code=egy"
## [196] "udnp_code=esp"
## [197] "udnp_code=eth"
## [198] "udnp_code=fin,swe"
## [199] "udnp_code=fra"
## [200] "udnp_code=gbr"
## [201] "udnp_code=hrv"
## [202] "udnp_code=ind"
## [203] "udnp_code=irn"
## [204] "udnp_code=ita"
## [205] "udnp_code=ita,che"
## [206] "udnp_code=jpn"
## [207] "udnp_code=khm"
## [208] "udnp_code=kor"
## [209] "udnp_code=lao"
## [210] "udnp_code=lbn"
## [211] "udnp_code=mex"
## [212] "udnp_code=mli"
## [213] "udnp_code=mlt"
## [214] "udnp_code=mmr"
## [215] "udnp_code=nam"
## [216] "udnp_code=omn"
## [217] "udnp_code=pol"
## [218] "udnp_code=pol,ukr"
## [219] "udnp_code=rus"
## [220] "udnp_code=sen"
## [221] "udnp_code=syr"
## [222] "udnp_code=usa"
## [223] "udnp_code=uzb"
## [224] "udnp_code=vat,ita"
## [225] "udnp_code=vnm"
## [226] "transboundary=[0,1]"
inspect(trans[1:3])
## items transactionID
## [1] {category=Cultural,
## states_name_en=Syrian Arab Republic,
## region_en=Arab States,
## unique_number=[12,1.05e+03),
## id_no=[9,374),
## rev_bis=,
## name_en=Ancient City of Aleppo,
## date_inscribed,
## secondary_dates=,
## danger,
## danger_list=Y 2013,
## longitude=[9.05,37.3),
## latitude=[25.8,39.7),
## area_hectares=[12.1,1.45e+03),
## criteria_txt=(iii)(iv),
## category_short=C,
## iso_code=sy,
## udnp_code=syr,
## transboundary=[0,1]} 1
## [2] {category=Cultural,
## states_name_en=Cambodia,
## region_en=Asia and the Pacific,
## unique_number=[1.51e+03,2.32e+03],
## id_no=[1.11e+03,1.53e+03],
## rev_bis=,
## name_en=Temple Zone of Sambor Prei Kuk, Archaeological Site of Ancient Ishanapura,
## date_inscribed,
## secondary_dates=,
## danger_list=,
## longitude=[37.3,151],
## latitude=[-50,25.8),
## area_hectares=[12.1,1.45e+03),
## criteria_txt=(ii)(iii)(vi),
## category_short=C,
## iso_code=kh,
## udnp_code=khm,
## transboundary=[0,1]} 2
## [3] {category=Cultural,
## states_name_en=Senegal,
## region_en=Africa,
## unique_number=[1.51e+03,2.32e+03],
## id_no=[1.11e+03,1.53e+03],
## rev_bis=,
## name_en=Saloum Delta,
## date_inscribed,
## secondary_dates=,
## danger_list=,
## longitude=[-114,9.05),
## latitude=[-50,25.8),
## area_hectares=[1.45e+03,7.27e+05],
## criteria_txt=(iii)(iv)(v),
## category_short=C,
## iso_code=sn,
## udnp_code=sen,
## transboundary=[0,1]} 3
image(trans)
itemFrequencyPlot(trans,topN = 20)
vertical <- as(trans, "tidLists")
as(vertical, "matrix")[1:10, 1:5]
## 1 2 3 4 5
## category=Cultural TRUE TRUE TRUE TRUE TRUE
## category=Mixed FALSE FALSE FALSE FALSE FALSE
## category=Natural FALSE FALSE FALSE FALSE FALSE
## states_name_en=Argentina FALSE FALSE FALSE FALSE FALSE
## states_name_en=Australia FALSE FALSE FALSE FALSE FALSE
## states_name_en=Brazil FALSE FALSE FALSE FALSE FALSE
## states_name_en=Cambodia FALSE TRUE FALSE FALSE FALSE
## states_name_en=Croatia FALSE FALSE FALSE FALSE FALSE
## states_name_en=Egypt FALSE FALSE FALSE FALSE FALSE
## states_name_en=Ethiopia FALSE FALSE FALSE FALSE FALSE
trans
## transactions in sparse format with
## 45 transactions (rows) and
## 226 items (columns)
its <- apriori(trans, parameter=list(target = "frequent"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.1 1
## maxlen target ext
## 10 frequent itemsets TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 4
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[226 item(s), 45 transaction(s)] done [0.00s].
## sorting and recoding items ... [29 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
## Warning in apriori(trans, parameter = list(target = "frequent")): Mining stopped
## (maxlen reached). Only patterns up to a length of 10 returned!
## done [0.00s].
## sorting transactions ... done [0.00s].
## writing ... [9451 set(s)] done [0.00s].
## creating S4 object ... done [0.00s].
its
## set of 9451 itemsets
inspect(head(its, n = 10))
## items support
## [1] {region_en=Latin America and the Caribbean} 0.1111111
## [2] {criteria_txt=(iii)(iv)} 0.1111111
## [3] {region_en=Arab States} 0.1111111
## [4] {category=Natural} 0.2000000
## [5] {category_short=N} 0.2000000
## [6] {region_en=Asia and the Pacific} 0.2666667
## [7] {area_hectares=[12.1,1.45e+03)} 0.3111111
## [8] {latitude=[39.7,63.3]} 0.3333333
## [9] {id_no=[374,1.11e+03)} 0.3333333
## [10] {area_hectares=[1.45e+03,7.27e+05]} 0.3333333
## transIdenticalToItemsets count
## [1] 0 5
## [2] 0 5
## [3] 0 5
## [4] 0 9
## [5] 0 9
## [6] 0 12
## [7] 0 14
## [8] 0 15
## [9] 0 15
## [10] 0 15
ggplot(tibble(`Itemset Size` = factor(size(its))), aes(`Itemset Size`)) + geom_bar()
rules <- apriori(trans, parameter = list(support = 0.07, confidence = 0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.07 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 3
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[226 item(s), 45 transaction(s)] done [0.00s].
## sorting and recoding items ... [30 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10
## Warning in apriori(trans, parameter = list(support = 0.07, confidence = 0.5)):
## Mining stopped (maxlen reached). Only patterns up to a length of 10 returned!
## done [0.00s].
## writing ... [70234 rule(s)] done [0.01s].
## creating S4 object ... done [0.01s].
inspect(head(rules))
## lhs rhs support confidence coverage lift count
## [1] {} => {category=Cultural} 0.7777778 0.7777778 1 1 35
## [2] {} => {category_short=C} 0.7777778 0.7777778 1 1 35
## [3] {} => {rev_bis=} 0.8000000 0.8000000 1 1 36
## [4] {} => {danger_list=} 0.8666667 0.8666667 1 1 39
## [5] {} => {secondary_dates=} 0.9333333 0.9333333 1 1 42
## [6] {} => {transboundary=[0,1]} 1.0000000 1.0000000 1 1 45
plot(rules,jitter = 1)
plot(rules, shading = "order")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.
plot(head(rules, n = 12), method = "graph")