library(dplyr)
library(ggplot2)
W = read.csv('wholesales.csv')
W$Channel = factor( paste0("Ch",W$Channel) )
W$Region = factor( paste0("Reg",W$Region) )
W[3:8] = lapply(W[3:6], log, base=10)
names(W)[7] = "Detergents"
summary(W)
## Channel Region Fresh Milk Grocery
## Ch1:298 Reg1: 77 Min. :0.4771 Min. :1.740 Min. :0.4771
## Ch2:142 Reg2: 47 1st Qu.:3.4952 1st Qu.:3.186 1st Qu.:3.3330
## Reg3:316 Median :3.9296 Median :3.560 Median :3.6772
## Mean :3.7916 Mean :3.527 Mean :3.6660
## 3rd Qu.:4.2288 3rd Qu.:3.857 3rd Qu.:4.0276
## Max. :5.0498 Max. :4.866 Max. :4.9675
## Frozen Detergents Delicassen
## Min. :1.398 Min. :0.4771 Min. :1.740
## 1st Qu.:2.871 1st Qu.:3.4952 1st Qu.:3.186
## Median :3.184 Median :3.9296 Median :3.560
## Mean :3.171 Mean :3.7916 Mean :3.527
## 3rd Qu.:3.551 3rd Qu.:4.2288 3rd Qu.:3.857
## Max. :4.784 Max. :5.0498 Max. :4.866
單一變數的變化
# 類別變數:
table(W$Channel)
##
## Ch1 Ch2
## 298 142
table(W$Channel)%>% barplot

# 數量變數:
summary(W$Fresh)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.4771 3.4952 3.9296 3.7916 4.2288 5.0498
hist(W$Fresh)

兩個變數之間的關係
# 類別 x 類別
table(W$Channel, W$Region) %>% barplot()

# 類別 x 數量
tapply(W$Milk, W$Region, sum)
## Reg1 Reg2 Reg3
## 270.0807 163.2922 1118.4746
tapply(W$Milk,list(W$Channel,W$Region),mean)
## Reg1 Reg2 Reg3
## Ch1 3.368636 3.227366 3.346987
## Ch2 3.962843 3.838205 3.926289
ggplot(W, aes(x=log(Milk))) +
geom_histogram(aes(fill=Region), alpha=0.5, bins=20) +
facet_grid(Channel~Region) +
labs(title="Dist. of Sales of Milk")

# 數量 x 數量
ggplot(W, aes(x=log(Milk), y=log(Fresh))) +
geom_point(size=2) +
stat_smooth(method="lm",se=F)

兩個數量之間的關係 在 不同族群之中是否相同
# (數量 x 數量) by 類別
ggplot(W, aes(x=log(Milk), y=log(Fresh))) +
geom_point(size=2) +
stat_smooth(method="lm", se=F) +
facet_grid(~Channel)

ggplot(W, aes(x=log(Milk), y=log(Fresh))) +
geom_point(size=2) +
stat_smooth(method="lm", se=F) +
facet_grid(~Region)

# (數量 x 數量) by (類別 x 類別)
ggplot(W, aes(x=log(Milk), y=log(Fresh))) +
geom_point(size=2) +
stat_smooth(method="lm", se=F, col='red') +
facet_grid(Channel~Region)
