pacman::p_load(tidyverse, data.table, forecast, zoo)
balance = fread("tianchi-zijin/datasets/user_balance_table.csv")
##
Read 0.0% of 2840421 rows
Read 23.9% of 2840421 rows
Read 51.4% of 2840421 rows
Read 63.0% of 2840421 rows
Read 74.3% of 2840421 rows
Read 95.1% of 2840421 rows
Read 2840421 rows and 18 (of 18) columns from 0.147 GB file in 00:00:09
head(balance)
## user_id report_date tBalance yBalance total_purchase_amt
## 1: 1 20140805 20385 20383 2
## 2: 1 20140808 20391 20389 2
## 3: 1 20140811 20397 20395 2
## 4: 1 20140814 20403 20401 2
## 5: 1 20140817 20409 20407 2
## 6: 1 20140820 20415 20413 2
## direct_purchase_amt purchase_bal_amt purchase_bank_amt total_redeem_amt
## 1: 0 0 0 0
## 2: 0 0 0 0
## 3: 0 0 0 0
## 4: 0 0 0 0
## 5: 0 0 0 0
## 6: 0 0 0 0
## consume_amt transfer_amt tftobal_amt tftocard_amt share_amt category1
## 1: 0 0 0 0 2 NA
## 2: 0 0 0 0 2 NA
## 3: 0 0 0 0 2 NA
## 4: 0 0 0 0 2 NA
## 5: 0 0 0 0 2 NA
## 6: 0 0 0 0 2 NA
## category2 category3 category4
## 1: NA NA NA
## 2: NA NA NA
## 3: NA NA NA
## 4: NA NA NA
## 5: NA NA NA
## 6: NA NA NA
首先,直接汇总每日申购和赎回总量(2013-07-01至2014-08-31)。
# 汇总到每日
dt = balance[, .(purchase = sum(total_purchase_amt),
redeem = sum(total_redeem_amt)), report_date]
dt$report_date = as.Date(as.character(dt$report_date), format = "%Y%m%d")
dt = dt[order(report_date)]
head(dt)
## report_date purchase redeem
## 1: 2013-07-01 32488348 5525022
## 2: 2013-07-02 29037390 2554548
## 3: 2013-07-03 27270770 5953867
## 4: 2013-07-04 18321185 6410729
## 5: 2013-07-05 11648749 2763587
## 6: 2013-07-06 36751272 1616635
tail(dt)
## report_date purchase redeem
## 1: 2014-08-26 306945089 285478563
## 2: 2014-08-27 302194801 468164147
## 3: 2014-08-28 245082751 297893861
## 4: 2014-08-29 267554713 273756380
## 5: 2014-08-30 199708772 196374134
## 6: 2014-08-31 275090213 292943033
画出每日趋势图
dt_reshape = melt(dt, id.vars = "report_date")
ggplot(dt_reshape, aes(x = report_date, y = value, group = variable)) +
geom_line(aes(color = variable), size = 1) +
scale_color_manual(values = c("red", "black")) +
theme_bw() +
theme(legend.position = "top", legend.title = element_blank())
初步的观察结果:
待续。