讀取使用者購買紀錄

download.file('https://github.com/ywchiu/rtibame/raw/master/Data/purchase.csv', 'purchase.csv') 
purchase <- read.csv('purchase.csv', header=TRUE)

學員需要找出用戶在每天哪個時段(0~24)會購買最多商品。

library(dplyr)
## Warning: package 'dplyr' was built under R version 3.2.5
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# Convert String to POSIXct Time Format
purchase$Hour <- purchase$Time %>% strptime("%Y-%m-%d %H:%M:%S") %>% format('%H')

# SELECT hour, SUM(quantity) FROM purchase
# GROUP BY hour

purchase_by_hour <- purchase %>% 
  select(Quantity, Hour) %>% 
  group_by(Hour) %>% 
  summarise(num_of_products = sum(Quantity)) 

plot(x=as.numeric(purchase_by_hour$Hour), y = purchase_by_hour$num_of_products, type='l')

學員需要找出購買金額排行前三名的使用者ID。

## SELECT User, SUM(Price * Quantity)
## FROM purchase GROUP BY User 
## ORDER by SUM(Price * Quantity) DESC LIMIT 3

purchase %>% 
  select(User, Price, Quantity) %>%
  mutate(Price_Sum = Price * Quantity) %>%
  group_by(User) %>% 
  summarise(sum_of_purchase = sum(Price_Sum)) %>%
  arrange(desc(sum_of_purchase)) %>% 
  head(3)
## # A tibble: 3 x 2
##          User sum_of_purchase
##        <fctr>           <dbl>
## 1  U166708333         2942744
## 2  U142809250          747550
## 3 U1006283751          515688

學員需要根據使用者購買金額,將前十名使用者的購買金額繪製成長條圖。(X軸為使用者ID, Y 軸為購買金額),並將該長條圖由大到小做排列。

## SELECT User, SUM(Price * Quantity)
## FROM purchase GROUP BY User 
## ORDER by SUM(Price * Quantity) DESC LIMIT 10

top10_purchase <- purchase %>% 
  select(User, Price, Quantity) %>%
  mutate(Price_Sum = Price * Quantity) %>%
  group_by(User) %>% 
  summarise(sum_of_purchase = sum(Price_Sum)) %>%
  arrange(desc(sum_of_purchase)) %>% 
  head(10)

barplot(top10_purchase$sum_of_purchase, col=top10_purchase$User, names.arg = top10_purchase$User)