讀取Purchase 資料
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv')
#使用colClasses 轉型
purchase <- read.csv('purchase.csv', header=TRUE, colClasses = c('integer', 'POSIXct', NA, NA, NA, 'integer', 'numeric'))
求出最踴躍購買的時段
#根據小時計算
hour.buy.stat <- purchase %>% select(Time, Quantity) %>% mutate(Hour=format(Time, '%H')) %>% group_by(Hour) %>% summarise(sum(Quantity))
plot(hour.buy.stat, type='l')

#根據日子計算
day.buy.stat <- purchase %>% select(Time, Quantity) %>% mutate(Day=format(Time, '%d')) %>% group_by(Day) %>% summarise(sum(Quantity))
plot(day.buy.stat , type='l')

#求出使用者購買數量頻率
purchase %>% select(Quantity) %>% table()
## .
## 1 2 3 4 5 6 7 8 9 10 11 12
## 49296 3403 1344 238 171 113 23 36 12 111 3 3
## 13 15 16 18 20 21 24 25 30
## 1 2 1 1 8 1 2 1 2
求出購買金額最高的使用者
purchase %>% select(User, Quantity, Price) %>% mutate(Buy = Quantity * Price) %>% select(User, Buy) %>% group_by(User) %>% summarise(sum(Buy)) %>% arrange(desc(`sum(Buy)`)) %>% head(3)
## # A tibble: 3 x 2
## User sum(Buy)
## <fctr> <dbl>
## 1 U166708333 2942744
## 2 U142809250 747550
## 3 U1006283751 515688
# 前10大VIP
top10.user <- purchase %>% select(User, Quantity, Price) %>% mutate(Buy = Quantity * Price) %>% select(User, Buy) %>% group_by(User) %>% summarise(sum(Buy)) %>% arrange(desc(`sum(Buy)`)) %>% head(10)
colnames(top10.user) = c('User', 'Buy')
barplot(height=top10.user$Buy, name=top10.user$User)

尾數定價驗證
table(purchase$Price %% 10)
##
## 0 1 2 3 4 5 6 7 8 9
## 17645 702 1515 676 1290 2302 1927 548 3959 24157