library(datasetsICR)
data(customers)Lab
Loading the Dataset
First 10 rows
head(customers,10) Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicassen
1 2 3 12669 9656 7561 214 2674 1338
2 2 3 7057 9810 9568 1762 3293 1776
3 2 3 6353 8808 7684 2405 3516 7844
4 1 3 13265 1196 4221 6404 507 1788
5 2 3 22615 5410 7198 3915 1777 5185
6 2 3 9413 8259 5126 666 1795 1451
7 2 3 12126 3199 6975 480 3140 545
8 2 3 7579 4956 9426 1669 3321 2566
9 1 3 5963 3648 6192 425 1716 750
10 2 3 6006 11093 18881 1159 7425 2098
Last 10 rows
tail(customers,10) Channel Region Fresh Milk Grocery Frozen Detergents_Paper Delicassen
431 1 3 3097 4230 16483 575 241 2080
432 1 3 8533 5506 5160 13486 1377 1498
433 1 3 21117 1162 4754 269 1328 395
434 1 3 1982 3218 1493 1541 356 1449
435 1 3 16731 3922 7994 688 2371 838
436 1 3 29703 12051 16027 13135 182 2204
437 1 3 39228 1431 764 4510 93 2346
438 2 3 14531 15488 30243 437 14841 1867
439 1 3 10290 1981 2232 1038 168 2125
440 1 3 2787 1698 2510 65 477 52
Structure of Dataset
str(customers)'data.frame': 440 obs. of 8 variables:
$ Channel : int 2 2 2 1 2 2 2 2 1 2 ...
$ Region : int 3 3 3 3 3 3 3 3 3 3 ...
$ Fresh : int 12669 7057 6353 13265 22615 9413 12126 7579 5963 6006 ...
$ Milk : int 9656 9810 8808 1196 5410 8259 3199 4956 3648 11093 ...
$ Grocery : int 7561 9568 7684 4221 7198 5126 6975 9426 6192 18881 ...
$ Frozen : int 214 1762 2405 6404 3915 666 480 1669 425 1159 ...
$ Detergents_Paper: int 2674 3293 3516 507 1777 1795 3140 3321 1716 7425 ...
$ Delicassen : int 1338 1776 7844 1788 5185 1451 545 2566 750 2098 ...
Summary Statistics
summary(customers$Frozen) # x variable Min. 1st Qu. Median Mean 3rd Qu. Max.
25.0 742.2 1526.0 3071.9 3554.2 60869.0
summary(customers$Delicassen) # y variable Min. 1st Qu. Median Mean 3rd Qu. Max.
3.0 408.2 965.5 1524.9 1820.2 47943.0
Table
table(customers$Region)
1 2 3
77 47 316
customers$region <- as.factor(customers$Region)library(ggplot2)
ggplot(customers, aes(x = Frozen, y = Delicassen, color = region)) +
geom_point() +
scale_x_log10()customers$Region <- as.factor(customers$Region)
ggplot(customers, aes(x = Frozen, y = Delicassen, color = Region)) +
geom_point() +
geom_smooth(method = "auto", se = TRUE)`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
customers$Region <- as.factor(customers$Region)
ggplot(customers, aes(x=Frozen, y=Delicassen, color=Region)) +
geom_point() +
scale_x_log10() +
scale_y_log10()library(ggplot2)
customers$Region <- as.factor(customers$Region)
ggplot(customers, aes(x = Frozen, y = Delicassen, color = Region)) +
geom_point() +
scale_x_log10() +
scale_y_log10() +
labs(
x = "Frozen",
y = "Deli",
title = "Frozen vs Deli",
subtitle = "Data points are Regions",
caption = "source: customers."
)