newdata=NULL
newdata$customerid=sample(1:300000,300000,T)
newdata$pdate=rep(Sys.Date()-sample(300,300,T),1000)
newdata$pval=rep(sample(10000,1000,T),300)
newdata$dayssincep=Sys.Date()-newdata$pdate
newdata2=as.data.frame(newdata)
a=unique(newdata$customerid)
length(a)
## [1] 189576
b=unique(newdata$pdate)
length(b)
## [1] 190
head(newdata2)
## customerid pdate pval dayssincep
## 1 204764 2015-08-16 2794 20 days
## 2 233292 2015-08-12 4845 24 days
## 3 171899 2015-07-15 6934 52 days
## 4 130053 2015-06-02 6907 95 days
## 5 272028 2015-05-30 8545 98 days
## 6 68639 2015-03-16 9737 173 days
c=unique(newdata2$pval)
length(c)
## [1] 954
sample(1:30,30,F)
## [1] 16 2 18 1 7 26 29 30 27 15 19 11 5 10 8 13 21 23 9 17 4 3 14
## [24] 25 12 6 22 20 28 24
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
newdata3=NULL
newdata3=sqldf("select customerid,count(customerid)as freq,max(dayssincep),sum(pval) from newdata2 group by customerid")
## Loading required package: tcltk
head(newdata3)
## customerid freq max(dayssincep) sum(pval)
## 1 1 3 287 11330
## 2 2 1 237 2492
## 3 3 2 184 8562
## 4 4 1 237 6135
## 5 5 1 195 1192
## 6 7 1 102 2834