## Rows: 541,909
## Columns: 8
## $ InvoiceNo <chr> "536365", "536365", "536365", "536365", "536365", "5363...
## $ StockCode <chr> "85123A", "71053", "84406B", "84029G", "84029E", "22752...
## $ Description <chr> "WHITE HANGING HEART T-LIGHT HOLDER", "WHITE METAL LANT...
## $ Quantity <dbl> 6, 6, 8, 6, 6, 2, 6, 6, 6, 32, 6, 6, 8, 6, 6, 3, 2, 3, ...
## $ InvoiceDate <dttm> 2010-12-01 08:26:00, 2010-12-01 08:26:00, 2010-12-01 0...
## $ UnitPrice <dbl> 2.55, 3.39, 2.75, 3.39, 3.39, 7.65, 4.25, 1.85, 1.85, 1...
## $ CustomerID <dbl> 17850, 17850, 17850, 17850, 17850, 17850, 17850, 17850,...
## $ Country <chr> "United Kingdom", "United Kingdom", "United Kingdom", "...
| Name | Piped data |
| Number of rows | 541909 |
| Number of columns | 8 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| numeric | 3 |
| POSIXct | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| InvoiceNo | 0 | 1 | 6 | 7 | 0 | 25900 | 0 |
| StockCode | 0 | 1 | 1 | 12 | 0 | 4070 | 0 |
| Description | 1454 | 1 | 1 | 35 | 0 | 4211 | 0 |
| Country | 0 | 1 | 3 | 20 | 0 | 38 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Quantity | 0 | 1.00 | 9.55 | 218.08 | -80995.00 | 1.00 | 3.00 | 10.00 | 80995 | ▁▁▇▁▁ |
| UnitPrice | 0 | 1.00 | 4.61 | 96.76 | -11062.06 | 1.25 | 2.08 | 4.13 | 38970 | ▁▇▁▁▁ |
| CustomerID | 135080 | 0.75 | 15287.69 | 1713.60 | 12346.00 | 13953.00 | 15152.00 | 16791.00 | 18287 | ▇▇▇▇▇ |
Variable type: POSIXct
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| InvoiceDate | 0 | 1 | 2010-12-01 08:26:00 | 2011-12-09 12:50:00 | 2011-07-19 17:17:00 | 23260 |
retail <- retail %>%
drop_na() %>%
filter(Quantity >0) %>%
mutate(InNo_Desc = paste(InvoiceNo, Description, sep = ' ')) %>%
# Setting 'Description' and 'Country' as factors
mutate(Description = as.factor(Description)) %>%
mutate(Country = as.factor(Country)) %>%
# Changing 'InvoiceNo' type to numeric
mutate(InvoiceNo = as.numeric(InvoiceNo)) %>%
# Extracting 'Date' and 'Time' from 'InvoiceDate'
mutate(Date = as.Date(InvoiceDate)) %>%
mutate(Time = as.factor(format(InvoiceDate,"%H:%M:%S")))retail %>%
ggplot(aes(wday(Date,
week_start = getOption("lubridate.week.start", 1)))) +
geom_histogram(stat = "count" , fill = "blue", colour = "blue") +
labs(x = "Day of Week", y = "") +
scale_x_continuous(breaks = c(1,2,3,4,5,6,7),
labels = c("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")) +
theme_fivethirtyeight(base_size = 14)## Warning: Ignoring unknown parameters: binwidth, bins, pad
retail %>%
ggplot(aes(hour(hms(Time)))) +
geom_histogram(stat = "count",fill = "blue", colour = "blue") +
labs(x = "Hour of Day", y = "") +
theme_fivethirtyeight(base_size = 12)## Warning: Ignoring unknown parameters: binwidth, bins, pad
retail <- retail %>%
# Create unique identifier
mutate(InNo_Desc = paste(InvoiceNo, Description, sep = ' '))
# Filter out duplicates and drop unique identifier
retail <- retail[!duplicated(retail$InNo_Desc), ] %>%
select(-InNo_Desc)
ratings_matrix <- retail %>%
select(InvoiceNo, Description) %>%
mutate(value=1) %>%
spread(Description, value, fill = 0) %>%
select(-InvoiceNo) %>%
as.matrix() %>%
as("binaryRatingMatrix")We create a training set comprised of 80% of the data. The remain data is will be the test data set. Additionally, the test schema will use all but one radomly selected items (given -1).
## Warning in .local(data, ...): The following users do not have enough items
## leaving no given items: 5, 7, 10, 15, 26, 33, 38, 52, 62, 82, 95, 101, 108,
## 110, 111, 112, 139, 147, 157, 161, 165, 180, 207, 208, 215, 219, 229, 234, 235,
## 236, 241, 246, 247, 280, 286, 287, 305, 314, 341, 343, 373, 405, 406, 421, 442,
## 446, 462, 481, 482, 499, 504, 505, 512, 514, 528, 540, 553, 566, 568, 603, 608,
## 635, 646, 651, 652, 655, 662, 663, 672, 698, 727, 732, 751, 764, 779, 782, 793,
## 794, 795, 825, 827, 836, 844, 866, 876, 888, 902, 908, 950, 952, 956, 968, 969,
## 970, 978, 995, 1006, 1035, 1067, 1070, 1081, 1086, 1098, 1101, 1102, 1104, 1125,
## 1134, 1138, 1155, 1189, 1200, 1206, 1225, 1226, 1238, 1244, 1247, 1261, 1274,
## 1306, 1308, 1318, 1319, 1323, 1338, 1339, 1348, 1352, 1356, 1359, 1365, 1371,
## 1375, 1384, 1399, 1405, 1417, 1421, 1460, 1461, 1462, 1481, 1488, 1490, 1501,
## 1520, 1530, 1537, 1547, 1561, 1629, 1632, 1633, 1641, 1651, 1684, 1697, 1710,
## 1729, 1760, 1776, 1787, 1793, 1800, 1832, 1838, 1840, 1841, 1848, 1867, 1891,
## 1895, 1899, 1903, 1904, 1905, 1910, 1931, 1935, 1943, 2007, 2013, 2030, 2042,
## 2064, 2078, 2103, 2141, 2164, 2170, 2178, 2179, 2193, 2203, 2219, 2297, 2306,
## 2326, 2338, 2353, 2356, 2380, 2386, 2387, 2425, 2436, 2441, 2444, 2451, 2463,
## 2480, 2481, 2541, 2544, 2573, 2619, 2647, 2661, 2673, 2675, 2688, 2722, 2731,
## 2753, 2755, 2758, 2760, 2769, 2813, 2841, 2843, 2860, 2884, 2885, 2903, 2919,
## 2921, 2922, 2937, 2946, 2951, 2997, 3002, 3004, 3033, 3039, 3040, 3041, 3045,
## 3048, 3073, 3076, 3093, 3106, 3107, 3111, 3114, 3160, 3161, 3179, 3183, 3191,
## 3193, 3195, 3226, 3250, 3261, 3275, 3281, 3319, 3325, 3340, 3350, 3377, 3389,
## 3394, 3405, 3436, 3437, 3439, 3444, 3450, 3462, 3466, 3471, 3476, 3481, 3488,
## 3578, 3593, 3614, 3621, 3628, 3644, 3651, 3657, 3700, 3706, 3710, 3731, 3739,
## 3756, 3763, 3801, 3810, 3811, 3816, 3817, 3823, 3826, 3839, 3850, 3883, 3888,
## 3918, 3924, 3934, 3947, 3972, 3978, 3982, 3986, 4007, 4021, 4022, 4036, 4052,
## 4058, 4059, 4075, 4099, 4195, 4196, 4211, 4258, 4262, 4270, 4291, 4306, 4319,
## 4328, 4382, 4387, 4395, 4396, 4414, 4423, 4428, 4450, 4509, 4515, 4529, 4536,
## 4587, 4588, 4591, 4594, 4612, 4649, 4729, 4734, 4737, 4739, 4740, 4743, 4747,
## 4748, 4750, 4754, 4762, 4765, 4795, 4803, 4812, 4824, 4825, 4831, 4853, 4859,
## 4875, 4891, 4909, 4910, 4934, 4935, 5026, 5039, 5042, 5043, 5045, 5050, 5058,
## 5096, 5101, 5103, 5116, 5130, 5136, 5151, 5187, 5193, 5217, 5227, 5230, 5244,
## 5251, 5258, 5320, 5321, 5355, 5397, 5398, 5426, 5434, 5441, 5448, 5459, 5482,
## 5494, 5507, 5515, 5516, 5545, 5552, 5559, 5561, 5569, 5595, 5614, 5643, 5649,
## 5651, 5654, 5694, 5704, 5712, 5715, 5716, 5719, 5721, 5735, 5736, 5745, 5749,
## 5764, 5770, 5777, 5785, 5787, 5792, 5832, 5835, 5838, 5862, 5868, 5878, 5883,
## 5890, 5894, 5896, 5900, 5906, 5923, 5937, 5942, 5967, 5998, 6009, 6021, 6053,
## 6070, 6090, 6094, 6105, 6114, 6115, 6126, 6176, 6233, 6234, 6252, 6274, 6276,
## 6286, 6293, 6302, 6305, 6325, 6327, 6328, 6355, 6356, 6375, 6382, 6383, 6400,
## 6424, 6440, 6448, 6475, 6483, 6500, 6502, 6504, 6513, 6517, 6519, 6521, 6526,
## 6529, 6540, 6558, 6560, 6615, 6616, 6633, 6648, 6657, 6671, 6683, 6690, 6691,
## 6692, 6699, 6714, 6715, 6734, 6735, 6739, 6742, 6752, 6790, 6817, 6831, 6844,
## 6852, 6865, 6868, 6871, 6877, 6891, 6912, 6915, 6929, 6936, 6950, 6959, 6963,
## 6964, 6970, 6971, 6973, 6988, 7005, 7037, 7043, 7050, 7062, 7064, 7065, 7074,
## 7076, 7081, 7084, 7085, 7106, 7116, 7141, 7157, 7158, 7180, 7182, 7187, 7190,
## 7195, 7197, 7231, 7240, 7244, 7290, 7305, 7306, 7324, 7326, 7369, 7379, 7388,
## 7389, 7394, 7446, 7453, 7476, 7479, 7500, 7502, 7509, 7514, 7515, 7527, 7549,
## 7552, 7579, 7597, 7612, 7616, 7649, 7650, 7651, 7670, 7671, 7677, 7678, 7684,
## 7688, 7710, 7776, 7790, 7797, 7798, 7799, 7816, 7873, 7884, 7914, 7925, 7927,
## 7928, 7930, 7932, 7968, 7977, 7987, 8027, 8030, 8037, 8059, 8062, 8071, 8078,
## 8086, 8087, 8095, 8114, 8121, 8142, 8156, 8170, 8172, 8184, 8188, 8209, 8226,
## 8240, 8241, 8251, 8252, 8268, 8314, 8323, 8327, 8335, 8338, 8339, 8340, 8343,
## 8353, 8365, 8367, 8369, 8384, 8386, 8420, 8443, 8445, 8457, 8465, 8482, 8507,
## 8511, 8530, 8560, 8585, 8591, 8599, 8614, 8625, 8641, 8653, 8659, 8668, 8673,
## 8683, 8684, 8686, 8690, 8691, 8698, 8704, 8736, 8741, 8748, 8749, 8750, 8762,
## 8767, 8778, 8790, 8812, 8838, 8841, 8878, 8903, 8904, 8912, 8914, 8918, 8922,
## 8958, 8992, 8995, 9002, 9012, 9039, 9042, 9043, 9048, 9077, 9097, 9102, 9131,
## 9151, 9157, 9169, 9184, 9202, 9214, 9223, 9231, 9253, 9271, 9276, 9279, 9291,
## 9324, 9338, 9345, 9366, 9384, 9416, 9420, 9421, 9426, 9479, 9495, 9509, 9520,
## 9542, 9563, 9582, 9584, 9585, 9592, 9609, 9632, 9643, 9667, 9681, 9693, 9707,
## 9711, 9713, 9744, 9745, 9768, 9787, 9792, 9810, 9825, 9863, 9871, 9906, 9913,
## 9944, 9973, 9976, 9991, 9999, 10000, 10014, 10023, 10029, 10030, 10033, 10051,
## 10081, 10082, 10083, 10126, 10147, 10153, 10155, 10178, 10190, 10200, 10205,
## 10251, 10265, 10267, 10271, 10272, 10297, 10332, 10335, 10341, 10343, 10346,
## 10349, 10352, 10364, 10367, 10388, 10389, 10403, 10428, 10445, 10448, 10458,
## 10498, 10508, 10509, 10524, 10528, 10543, 10545, 10560, 10563, 10565, 10615,
## 10620, 10635, 10650, 10654, 10692, 10694, 10695, 10710, 10742, 10756, 10759,
## 10770, 10809, 10819, 10831, 10839, 10842, 10848, 10860, 10896, 10909, 10911,
## 10953, 10955, 10961, 10970, 10998, 11000, 11022, 11040, 11041, 11042, 11061,
## 11065, 11067, 11076, 11095, 11098, 11099, 11120, 11142, 11179, 11187, 11194,
## 11211, 11212, 11215, 11219, 11248, 11275, 11279, 11280, 11300, 11303, 11312,
## 11314, 11318, 11324, 11332, 11358, 11359, 11377, 11378, 11380, 11381, 11385,
## 11390, 11407, 11420, 11436, 11440, 11447, 11450, 11460, 11461, 11464, 11483,
## 11535, 11542, 11543, 11544, 11550, 11584, 11617, 11622, 11627, 11631, 11640,
## 11651, 11682, 11684, 11694, 11700, 11701, 11717, 11730, 11732, 11737, 11748,
## 11765, 11772, 11789, 11795, 11804, 11821, 11831, 11832, 11856, 11867, 11869,
## 11881, 11895, 11903, 11906, 11927, 11936, 11983, 12000, 12029, 12030, 12049,
## 12073, 12112, 12138, 12157, 12199, 12204, 12213, 12230, 12237, 12251, 12254,
## 12331, 12374, 12393, 12397, 12399, 12408, 12426, 12449, 12450, 12511, 12512,
## 12548, 12560, 12577, 12595, 12613, 12617, 12645, 12759, 12778, 12841, 12845,
## 12849, 12857, 12866, 12885, 12904, 12935, 12955, 12994, 13008, 13009, 13042,
## 13052, 13053, 13059, 13063, 13077, 13090, 13122, 13134, 13135, 13136, 13140,
## 13142, 13149, 13166, 13203, 13244, 13245, 13250, 13257, 13263, 13266, 13268,
## 13269, 13284, 13291, 13307, 13315, 13342, 13344, 13347, 13371, 13378, 13382,
## 13386, 13394, 13395, 13410, 13422, 13429, 13433, 13448, 13457, 13464, 13518,
## 13519, 13534, 13539, 13569, 13609, 13612, 13618, 13619, 13626, 13639, 13658,
## 13679, 13689, 13712, 13722, 13745, 13758, 13762, 13764, 13766, 13769, 13784,
## 13793, 13823, 13842, 13854, 13856, 13857, 13862, 13869, 13871, 13873, 13909,
## 13912, 13951, 13965, 13971, 13976, 13978, 13992, 13999, 14000, 14013, 14015,
## 14016, 14024, 14028, 14031, 14033, 14044, 14053, 14056, 14057, 14069, 14072,
## 14074, 14106, 14128, 14129, 14153, 14167, 14198, 14211, 14214, 14282, 14293,
## 14300, 14320, 14328, 14333, 14340, 14346, 14381, 14385, 14392, 14399, 14405,
## 14407, 14410, 14427, 14473, 14510, 14541, 14548, 14557, 14560, 14567, 14598,
## 14608, 14630, 14651, 14674, 14690, 14694, 14696, 14701, 14709, 14718, 14729,
## 14735, 14739, 14783, 14794, 14799, 14801, 14825, 14827, 14843, 14849, 14856,
## 14884, 14902, 14904, 14905, 14908, 14917, 14937, 14941, 14944, 14947, 14949,
## 15011, 15031, 15044, 15070, 15090, 15111, 15171, 15186, 15191, 15195, 15202,
## 15209, 15223, 15239, 15298, 15301, 15314, 15317, 15320, 15326, 15357, 15360,
## 15395, 15397, 15436, 15438, 15446, 15447, 15472, 15474, 15494, 15514, 15529,
## 15534, 15555, 15563, 15572, 15580, 15600, 15665, 15669, 15739, 15755, 15759,
## 15762, 15764, 15767, 15787, 15812, 15818, 15840, 15842, 15860, 15878, 15884,
## 15913, 15916, 15928, 15936, 16008, 16029, 16038, 16043, 16060, 16068, 16086,
## 16103, 16127, 16160, 16176, 16209, 16212, 16216, 16227, 16255, 16257, 16274,
## 16278, 16297, 16317, 16330, 16354, 16376, 16391, 16397, 16411, 16432, 16444,
## 16479, 16497, 16512, 16525, 16533, 16556, 16559, 16581, 16607, 16623, 16629,
## 16630, 16647, 16658, 16698, 16720, 16723, 16761, 16776, 16794, 16810, 16816,
## 16825, 16836, 16915, 16919, 16926, 16950, 16952, 16955, 16968, 16
results <- recommenderlab::evaluate(scheme,
algorithms,
type = "topNList",
n = c(2, 5, 10, 15, 20)
)## IBCF run fold/sample [model time/prediction time]
## 1 [505.99sec/3.95sec]
## 2 [513.32sec/2.91sec]
## 3 [458.96sec/3.17sec]
## 4 [460.02sec/3.02sec]
## 5 [460.21sec/2.88sec]
## UBCF run fold/sample [model time/prediction time]
## 1 [0sec/294.48sec]
## 2 [0sec/299.1sec]
## 3 [0sec/312.82sec]
## 4 [0sec/299sec]
## 5 [0sec/298.77sec]
tmp <- results$`user-based CF` %>%
getConfusionMatrix() %>%
as.list()
# Calculate average value of 5 cross-validation rounds
as.data.frame( Reduce("+",tmp) / length(tmp)) %>%
# Add a column to mark the number of recommendations calculated
mutate(n = c(2, 5, 10, 15, 20)) %>%
# Select only columns needed and sorting out order
select('n', 'precision', 'recall', 'TPR', 'FPR')## n precision recall TPR FPR
## 1 2 0.05672100 0.1134304 0.1134304 0.0004879343
## 2 5 0.03654112 0.1826861 0.1826861 0.0012459324
## 3 10 0.02453336 0.2453074 0.2453074 0.0025229213
## 4 15 0.01926488 0.2889428 0.2889428 0.0038048213
## 5 20 0.01607506 0.3214671 0.3214671 0.0050895953
avg_conf_matr <- function(results) {
tmp <- results %>%
getConfusionMatrix() %>%
as.list()
as.data.frame(Reduce("+",tmp) / length(tmp)) %>%
mutate(n = c(3, 5, 10, 15, 20)) %>%
select('n', 'precision', 'recall', 'TPR', 'FPR')
}
results_tbl <- results %>%
map(avg_conf_matr) %>%
# Turning into an unnested tibble
enframe() %>%
# Unnesting to have all variables on same level
unnest()## Warning: `cols` is now required.
## Please use `cols = c(value)`
## # A tibble: 10 x 6
## name n precision recall TPR FPR
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 item-based CF 3 0.0680 0.125 0.125 0.000444
## 2 item-based CF 5 0.0463 0.206 0.206 0.00113
## 3 item-based CF 10 0.0345 0.287 0.287 0.00223
## 4 item-based CF 15 0.0290 0.336 0.336 0.00329
## 5 item-based CF 20 0.0257 0.369 0.369 0.00430
## 6 user-based CF 3 0.0567 0.113 0.113 0.000488
## 7 user-based CF 5 0.0365 0.183 0.183 0.00125
## 8 user-based CF 10 0.0245 0.245 0.245 0.00252
## 9 user-based CF 15 0.0193 0.289 0.289 0.00380
## 10 user-based CF 20 0.0161 0.321 0.321 0.00509
results_tbl %>%
ggplot(aes(FPR, TPR,
colour = fct_reorder2(as.factor(name),
FPR, TPR))) +
geom_line() +
geom_label(aes(label = n)) +
labs(title = "ROC curves", colour = "Model") +
theme_fivethirtyeight(base_size = 14)results_tbl %>%
ggplot(aes(recall, precision,
colour = fct_reorder2(as.factor(name),
precision, recall))) +
geom_line() +
geom_label(aes(label = n)) +
labs(title = "Precision-Recall curves", colour = "Model") +
theme_fivethirtyeight(base_size = 14)## $`1`
## [1] "PINK REGENCY TEACUP AND SAUCER" "ROSES REGENCY TEACUP AND SAUCER"
## [3] "SET OF 3 HEART COOKIE CUTTERS" "REGENCY CAKESTAND 3 TIER"
## [5] "3 PIECE SPACEBOY COOKIE CUTTER SET" "GINGERBREAD MAN COOKIE CUTTER"
## [7] "JAM MAKING SET PRINTED" "SET OF 3 CAKE TINS PANTRY DESIGN"
## [9] "SET OF 3 REGENCY CAKE TINS" "RECIPE BOX PANTRY YELLOW DESIGN"