customer_id <- c(568468,568468,568468,568468,568468,568468)
customer <- c('paramount','paramount','paramount','paramount','paramount','paramount')
start_date <- as.Date(c('2016-03-15','2016-03-15','2016-03-15','2016-03-15','2016-03-15','2016-03-15'))
occured_on <- as.POSIXct(c('2017-08-08 20:05:00','2017-08-08 20:30:00','2017-08-11 21:13:00','2017-08-11 21:30:00','2017-08-31 05:16:00','2017-08-31 05:30:00'))
old_plan <- c('a',NA,'b',NA,'b',NA)
old_price <- c(NA,29,NA,99,NA,82.5)
old_recurrence <- c('monthly',NA,'monthly',NA,'annually',NA)
new_plan <- c('b',NA,'b',NA,'c',NA)
new_price <- c(NA,99,NA,82.5,NA,349)
new_recurrence <- c('monthly',NA,'annually',NA,'monthly',NA)
df <- data.frame(customer_id,customer,start_date,occured_on,old_plan,old_price,old_recurrence,new_plan,new_price,new_recurrence)
df
Odd rows are missing plan and even rows are missing price so let’s make it right
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
names(df)
## [1] "customer_id" "customer" "start_date" "occured_on"
## [5] "old_plan" "old_price" "old_recurrence" "new_plan"
## [9] "new_price" "new_recurrence"
odds <- seq(2, nrow(df), 2)
evens <- seq(1, nrow(df)-1, 2)
work.df <- df[odds, c("customer_id","customer","start_date","old_price", "new_price")] %>%
cbind(df[evens, c("old_plan", "old_recurrence", "new_plan", "new_recurrence")]) %>%
data.frame()
work.df
min.old.price.df <- top_n(work.df,-1, wt = old_price) %>%
select(customer_id, customer, start_date, old_price, old_plan, old_recurrence) %>%
data.frame()
min.old.price.df
max.new.price.df <- top_n(work.df,-1, wt = old_price) %>%
select(customer_id, customer, start_date, new_price, new_plan, new_recurrence) %>%
data.frame()
max.new.price.df
result.df <- min.old.price.df %>%
inner_join(max.new.price.df)
## Joining, by = c("customer_id", "customer", "start_date")
result.df