library(ggplot2)
library(dplyr)
library(knitr)
orders <- read.table("~/orders.csv", sep=";", head=TRUE)
interval <- seq(0, to = 120000, by = 1000)
orders <- orders %>% mutate(group_price = cut(price_total_init, interval, dig.lab = 5))
all_count <- orders %>%
group_by(group_price) %>%
summarise(id=n()) %>%
select(group_price, all_count = id)
returned_count <- orders %>%
filter(state=="returned") %>%
group_by(group_price) %>%
summarise(id=n()) %>%
select(group_price, returned_count = id)
result <- all_count %>%
merge(returned_count, sort=F) %>%
mutate(p = round(returned_count/all_count, 2))
result
## group_price all_count returned_count p
## 1 (0,1000] 2100 186 0.09
## 2 (1000,2000] 5206 469 0.09
## 3 (2000,3000] 3449 373 0.11
## 4 (3000,4000] 3443 254 0.07
## 5 (4000,5000] 1765 160 0.09
## 6 (5000,6000] 1028 76 0.07
## 7 (6000,7000] 938 63 0.07
## 8 (7000,8000] 394 26 0.07
## 9 (8000,9000] 260 22 0.08
## 10 (9000,10000] 179 18 0.10
## 11 (10000,11000] 116 11 0.09
## 12 (11000,12000] 93 15 0.16
## 13 (12000,13000] 68 6 0.09
## 14 (13000,14000] 51 8 0.16
## 15 (14000,15000] 26 4 0.15
## 16 (15000,16000] 29 1 0.03
## 17 (17000,18000] 14 2 0.14
## 18 (18000,19000] 6 1 0.17
## 19 (19000,20000] 10 2 0.20
## 20 (23000,24000] 2 2 1.00
## 21 (27000,28000] 2 1 0.50
## 22 (31000,32000] 1 1 1.00
## 23 (33000,34000] 4 1 0.25
## 24 (45000,46000] 1 1 1.00