library(readxl)
m <- read_excel("C:/Users/USER/Desktop/m.xlsx")
summary(m)
## customer_id name Product_id Order_id
## Min. :1.00 Length:24 Length:24 Length:24
## 1st Qu.:1.75 Class :character Class :character Class :character
## Median :2.50 Mode :character Mode :character Mode :character
## Mean :2.50
## 3rd Qu.:3.25
## Max. :4.00
## Unit Price Total DATE
## Min. :2.00 Min. :123.0 Min. : 400.0 Min. :2020-01-03 00:00:00
## 1st Qu.:2.75 1st Qu.:180.8 1st Qu.: 703.5 1st Qu.:2020-01-23 00:00:00
## Median :4.50 Median :329.5 Median : 907.5 Median :2020-05-18 00:00:00
## Mean :5.00 Mean :284.7 Mean :1543.3 Mean :2020-04-27 07:00:00
## 3rd Qu.:6.75 3rd Qu.:377.2 3rd Qu.:1779.8 3rd Qu.:2020-07-12 00:00:00
## Max. :9.00 Max. :450.0 Max. :4050.0 Max. :2020-11-23 00:00:00
str(m)
## tibble [24 × 8] (S3: tbl_df/tbl/data.frame)
## $ customer_id: num [1:24] 1 2 3 4 1 2 3 4 1 2 ...
## $ name : chr [1:24] "john" "peter" "james" "james" ...
## $ Product_id : chr [1:24] "p1" "p2" "p3" "p1" ...
## $ Order_id : chr [1:24] "o1" "o2" "o3" "o4" ...
## $ Unit : num [1:24] 2 3 6 9 2 3 6 9 2 3 ...
## $ Price : num [1:24] 200 359 123 432 200 359 123 432 200 359 ...
## $ Total : num [1:24] 400 1077 738 3888 400 ...
## $ DATE : POSIXct[1:24], format: "2020-01-23" "2020-07-12" ...
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
c<-sqldf("select * from m")
c
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
f<-sqldf("select DATE,customer_id,Product_id,Order_id,Total,AVG(Total) over() as avg_total,AVG(Total) over(partition by DATE) as ave_by_day,AVG(Total) over() as avg_by_month from m")
f
d<-month(m$DATE,label = T)
d
## [1] Jan Jul Jan Jul Jan Jul Nov Jul Jan Jul Mar Jul Jan Jul Jan Jul Jan Apr Jan
## [20] Jul Jan Jul Jan Jul
## 12 Levels: Jan < Feb < Mar < Apr < May < Jun < Jul < Aug < Sep < ... < Dec
#J<-sqldf("select * from m WHERE Product_id=p1)
#J