R Notebook

Load Data

customer_id <- c(568468,568468,568468,568468,568468,568468)
customer <- c('paramount','paramount','paramount','paramount','paramount','paramount')
start_date <- as.Date(c('2016-03-15','2016-03-15','2016-03-15','2016-03-15','2016-03-15','2016-03-15'))
occured_on <- as.POSIXct(c('2017-08-08 20:05:00','2017-08-08 20:30:00','2017-08-11 21:13:00','2017-08-11 21:30:00','2017-08-31 05:16:00','2017-08-31 05:30:00'))
old_plan <- c('a',NA,'b',NA,'b',NA)
old_price <- c(NA,29,NA,99,NA,82.5)
old_recurrence <- c('monthly',NA,'monthly',NA,'annually',NA)
new_plan <- c('b',NA,'b',NA,'c',NA)
new_price <- c(NA,99,NA,82.5,NA,349)
new_recurrence <- c('monthly',NA,'annually',NA,'monthly',NA)

df <- data.frame(customer_id,customer,start_date,occured_on,old_plan,old_price,old_recurrence,new_plan,new_price,new_recurrence)

df

Odd rows are missing plan and even rows are missing price so let’s make it right

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

names(df)

##  [1] "customer_id"    "customer"       "start_date"     "occured_on"    
##  [5] "old_plan"       "old_price"      "old_recurrence" "new_plan"      
##  [9] "new_price"      "new_recurrence"

odds  <- seq(2, nrow(df), 2)
evens <- seq(1, nrow(df)-1, 2)

work.df <- df[odds, c("customer_id","customer","start_date","old_price", "new_price")] %>% 
           cbind(df[evens, c("old_plan", "old_recurrence", "new_plan", "new_recurrence")]) %>%
           data.frame()

work.df

Min old price and max new price

min.old.price.df <- top_n(work.df,-1, wt = old_price) %>% 
                      select(customer_id, customer, start_date, old_price, old_plan, old_recurrence) %>%
                      data.frame()
min.old.price.df

max.new.price.df <- top_n(work.df,-1, wt = old_price) %>% 
                      select(customer_id, customer, start_date, new_price, new_plan, new_recurrence) %>%
                        data.frame()
max.new.price.df

result.df <- min.old.price.df %>%
             inner_join(max.new.price.df)

## Joining, by = c("customer_id", "customer", "start_date")

result.df