This project aims at creating a working workforce management plan for a chosen customer care unit. A workforce manager is responsible for forecasting the contact volume and staffing appropriate number of heads for the contact center. It Is therefore important to forecast the business volume, the inquiry rate, the number of contacts per channel and later the number of heads per interval for healthy customer experience.
The aim is to plan January of the year 2022. Special attention is given to making informative visualization of the data across the calculation steps. Because the insights from the visualization will be necessary for the decisions the workforce manager makes.
This document should inspire the WF managers of all levels to use R and R’s packages for their daily tasks in this heavily Excel dependent field.
Loading the dataset
data_contact <- read.csv("contacts.csv")
data_transactions <- read.csv("orders.csv", dec = ".")
Let’s install and load all the necessary packages
requiredPackages = c("tidyverse","ggplot2", "ggthemes", "forecast" ,"TTR", "dplyr", "fpp2", "xts","lubridate",
"ggpubr","forestmangr","DT", "ggstatsplot","rnaturalearth", "rgeos", "plotly",
"hrbrthemes", "reshape", "reshape2", "sf","treemap")
for(i in requiredPackages){if(!require(i,character.only = TRUE)) install.packages(i)}
for(i in requiredPackages){if(!require(i,character.only = TRUE)) library(i,character.only = TRUE)}
let’s start with the contacts data.
ID: Contact ID
TimeCreated: the time instance the Contact is created
Channel: the medium of contact - Email/Phone/Chat
Market: Netherlands - BENE, Belgium - BENE, Germany - DE.CH, Switzerland - DE.CH, Denmark - DK, France - FR, Italy - ITA.ES, Spain: ITA.ES, Norway -NO.UK, Poland - PL, Sweden - SE.FI, Finland - SE.FI, United Kingdom - NO.UK
handling_duration - time in seconds
# head(data_contact)
colnames(data_contact)[1] <- c("ID") # rename the first column to a proper name.
str(data_contact)
## 'data.frame': 182211 obs. of 22 variables:
## $ ID : int 558844 558843 558841 558839 558838 558836 558834 558833 558832 558831 ...
## $ direction : chr "inbound" "inbound" "inbound" "outbound" ...
## $ total_duration : int 1492 1064 722 NA 1458 5910 NA 396 874 NA ...
## $ handling_duration : int 1092 1060 698 NA 1444 5896 NA 383 855 NA ...
## $ DateCreated : chr "04/12/2021" "04/12/2021" "04/12/2021" "04/12/2021" ...
## $ Channel : chr "Chat" "Chat" "Chat" "Email" ...
## $ TimeQueued : int 24 24 24 21 24 24 24 23 23 23 ...
## $ TimeAssigned : int 1 24 24 NA 24 24 NA 23 23 NA ...
## $ TimeClosed : int 1 24 24 NA 24 1 NA 24 24 NA ...
## $ Week : int 48 48 48 48 48 48 48 48 48 48 ...
## $ TimeCreated : int 24 24 24 24 24 24 24 23 23 23 ...
## $ Queue : chr "Customer Chat NL" "Customer Chat DK" "Customer Chat SE" "CC Customers - NO" ...
## $ Market : chr "BENE" "DK" "SE/FI" "NO/UK" ...
## $ HandleTime : int 1092 1060 698 NA 1444 5896 NA 383 855 NA ...
## $ Queued : chr "YES" "YES" "YES" "YES" ...
## $ Answered. : int 1 1 1 0 1 1 0 1 1 0 ...
## $ Abandoned : int 0 0 0 1 0 0 1 0 0 1 ...
## $ Closed : int 1 1 1 0 1 1 0 1 1 0 ...
## $ SLAtime : num 6.4333 0.0667 0.4167 NA 0.2333 ...
## $ SLAtimehrs : num 0.10722 0.00111 0.00694 NA 0.00389 ...
## $ WithinSLAtime : int 0 1 1 NA 1 1 NA 1 1 NA ...
## $ WithinEmailSLAtime: int NA NA NA NA NA NA NA NA NA NA ...
Plot1: Contacts per Market per channel
ggplot(data = data_contact %>% group_by(Market, Channel) %>% summarise(N = n()), aes(x = Market, y = N, fill = Channel)) +
geom_bar(stat = "identity", position = position_dodge(0.9)) +
geom_text(
aes(label = N, group = Channel),
position = position_dodge(0.9),
vjust = -0.3, size = 2.5,
color = "black"
) +
labs(title = 'Contacts per Market per channel',
# subtitle = '',
caption = 'Source: Marketing and Sales Team') +
xlab("Market Countries") +
ylab("Number of Contacts") +
theme_minimal() +
theme(legend.position = "right") +
guides(colour = guide_legend(override.aes = list(shape = 15, alpha = 1, size = 8)))
Plot2: Average Handling time per market per channel
p <- ggdotchart(data_contact %>%
group_by(Market, Channel) %>%
summarise(Mean = round(mean(handling_duration, na.rm = T), 2)),
x = "Market", y = "Mean", color = "Channel",
palette = c("#00AFBB", "#E7B800", "#FC4E07"),
sorting = "descending",
rotate = TRUE,
dot.size = 2,
y.text.col = TRUE) +
labs(title = 'Average Handling Time Per Market Per Channel',
# subtitle = '',
caption = 'Source: Marketing and Sales Team') +
ylab("Average Handling Time") +
theme_cleveland()
ggplotly(p)
time_data.Market <- data_contact %>% group_by(DateCreated, Market) %>% summarise(N = n())
time_data.Market$DateCreated <- as.Date(time_data.Market$DateCreated, tryFormats = c("%d/%m/%Y"))
time_data.Channel <- data_contact %>% group_by(DateCreated, Channel) %>% summarise(N = n())
time_data.Channel$DateCreated <- as.Date(time_data.Channel$DateCreated, tryFormats = c("%d/%m/%Y"))
Plot3: Number of Contacts in Time Per Market
# Usual area chart
p <- time_data.Market %>%
ggplot( aes(x=DateCreated, y=N, color=Market)) +
geom_area(fill="#69b3a2", alpha=0.5) +
labs(title = 'Number of Contacts in Time Per Market',
x = "Date",
y = "Contacts",
caption = 'Source: Marketing and Sales Team') +
theme_ipsum()
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
Plot4: Number of Contact in Time Per Channel
# Usual area chart
p <- time_data.Channel %>%
ggplot( aes(x=DateCreated, y=N, color=Channel)) +
geom_area(fill="#69b3a2", alpha=0.5) +
ylab("Nuber of Contact Created") +
labs(title = 'Number of Contact in Time Per Channel',
# subtitle = '',
caption = 'Source: Marketing and Sales Team') +
theme_ipsum()
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
Plot5: Number of Contact in Time Per Channel
# Usual area chart
p <- time_data.Channel %>%
ggplot(aes(x = DateCreated, y = N)) +
geom_point(size = 0.5) +
geom_smooth(
fill = "green",
col = "darkgreen",
size = 0.5,
alpha = 0.1) +
labs(title = 'Number of Contact in Time Per Channel',
y = 'Count of contacts',
caption = 'Source: Marketing and Sales Team') +
facet_wrap(~Channel)
ylab('Number of Contact Created')
## $y
## [1] "Number of Contact Created"
##
## attr(,"class")
## [1] "labels"
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
Let’s also analyze the transactions data.
data_transactions["Date"] <- as.Date(data_transactions$Date)
str(data_transactions)
## 'data.frame': 365 obs. of 9 variables:
## $ Date : Date, format: "2022-01-01" "2022-01-02" ...
## $ BENE : int 436 982 998 666 687 610 503 540 482 565 ...
## $ DK : int 408 1315 1293 638 669 667 613 522 528 654 ...
## $ SE.FI : int 181 497 465 227 247 337 260 210 214 226 ...
## $ FR : int 18 41 36 27 34 23 27 33 33 21 ...
## $ DE.CH : int 20 38 49 35 29 32 27 42 38 54 ...
## $ ITA.ES: int 15 39 40 27 24 47 71 41 37 39 ...
## $ NO.UK : int 396 526 536 1181 904 1059 827 1053 827 978 ...
## $ PL : int 95 166 136 173 176 324 356 163 131 163 ...
Plot6: Distribution of transactions volume
total_transactions <-colSums(data_transactions[,-1])
total_transactions2 <- as.data.frame(total_transactions)
total_transactions2$markets <- row.names(total_transactions2)
library(treemap)
treemap(total_transactions2,
index="markets",
vSize="total_transactions",
type="index",
title = "Relative Market Size"
)
This is the first big part of the workforce management planning. The historical volume of contacts together with the forecast of future transactions is used to calculate the future contacts. Therefore, it is necessary to first forecast the future transactions for Jan 2022.
However, since the purpose of this task is mainly visualization, we will go ahead and use the already calculated forecast data.
transcations_Jan2022 <- read.csv("orders_Jan2022.csv")
colnames(transcations_Jan2022)[1] <- c("Date")
transcations_Jan2022["Date"] <- as.Date(transcations_Jan2022$Date, tryFormats = c("%d/%m/%Y"))
# library(forestmangr)
transcations_Jan2022 <- forestmangr::round_df(transcations_Jan2022) # ROUND all numberic values to the nearest integer.
datatable(transcations_Jan2022,caption='Table 1. Transaction Table',
rowname = F,
filter = 'top')
Plot7: Forecasted Transactions Jan 2022
p <- melt(transcations_Jan2022, id.vars="Date") %>%
ggplot(aes(x = Date, y = value)) +
geom_point() +
geom_smooth(
fill = "blue",
col = "darkgreen",
alpha = 0.1) +
labs(title = 'Forecasted Transactions Jan 2022',
caption = 'Source: Marketing and Sales Team') +
facet_wrap(~variable)
ylab('Number of Contact Created')
## $y
## [1] "Number of Contact Created"
##
## attr(,"class")
## [1] "labels"
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
Contact volumes for Jan 2022.
Let’s also calculate the forecast of contact volume for the same period.
The contact volume can be assumed to be directly proportional to the transactions volume. Therefore, finding the average transaction to contact ratio from the year 2021, we can calculate the contacts volume for year 2022. This is of course ignoring the dynamism due to special events, holiday season and such. The common practice is, however, to readjust the volumes after the calculation to fit to the seasonality in the period.
Calculating transactions to contacts ratio of the previous year, per market.
Total Transactions per market
total_transactions <-colSums(data_transactions[,-1])
Total Contacts per market per channel
total_contacts <- data_contact %>% group_by(Market, Channel) %>% summarise(
Total_2021 = n())
Contact to transaction ratio
Markets <- colnames(data_transactions[,-1])
Channels <- c("Chat", "Phone", "Email")
tran_cont_ratios <- list() # store the ratios here,
# for loop for calculating ratio in loop
for (market in Markets)
{
for (channel in Channels)
{
tran_cont_ratios[paste(market, channel)] <- total_contacts$Total_2021[total_contacts$Market==sub('\\.','/',market) & total_contacts$Channel==channel]/total_transactions[market] # contact/transaction
}
}
(tran_cont_ratios_df <- data.frame(tran_cont_ratios))
## BENE.Chat BENE.Phone BENE.Email DK.Chat DK.Phone DK.Email SE.FI.Chat
## 1 0.03947791 0.03162474 0.08491639 0.02989905 0.0308044 0.06522566 0.04888611
## SE.FI.Phone SE.FI.Email FR.Chat FR.Phone FR.Email DE.CH.Chat
## 1 0.03838934 0.09375463 0.05265674 3.983112e-05 0.07472317 0.05365678
## DE.CH.Phone DE.CH.Email ITA.ES.Chat ITA.ES.Phone ITA.ES.Email NO.UK.Chat
## 1 0.01376802 0.1273883 0.09816622 0.02220187 0.1286324 0.03148949
## NO.UK.Phone NO.UK.Email PL.Chat PL.Phone PL.Email
## 1 0.0324435 0.06811382 0.009172844 0.08714202 0.1191536
Let’s calculate the contacts for Jan 2022 using the calculated ratios. The contacts will be the product of the ratio calculated and transactions forecast above.
forecast_2022 <- c(channel1=0,channel2=0,channel3=0)
forcasts_thischanel <- data.frame(matrix(ncol = 10, nrow = 31)) # empty dataframe with 10 cols, 31 rows
colnames(forcasts_thischanel) <- c("Date","Channel", colnames(transcations_Jan2022)[2:9])
# total_contacts$Total_2022 <- 0
i = 0
for (channel in Channels)
{
i = i+1
for (market in Markets)
{
for (day in seq(1,31,1))
{
forcasts_thischanel[market][day,]= transcations_Jan2022[market][day,]*tran_cont_ratios_df[sub(' ','.',paste(sub('/','\\.',market),channel))]
forcasts_thischanel$Channel[day]= channel
forcasts_thischanel$Date[day]= day
}
}
if(channel=="Email")
{
forecast_Email <- forcasts_thischanel
}
if(channel=="Phone")
{
forecast_Phone <- forcasts_thischanel
}
if(channel=="Chat")
{
forecast_Chat <- forcasts_thischanel
}
}
forecast_Jan_2022 <- rbind(forecast_Email,forecast_Phone,forecast_Chat)
Plot8: Forecasted Contacts Jan 2022
Large Markets
# Usual area chart
p <- melt(forecast_Jan_2022, id.vars=c("Date", "Channel")) %>% filter(!variable %in% c("FR","DE.CH","ITA.ES"))%>%
ggplot(aes(x = Date, y = value)) +
geom_col(aes(color = Channel)) +
labs(title = 'Forecasted Contacts Jan 2022',
caption = 'Source: Marketing and Sales Team') +
facet_wrap(~variable)
ylab('Number of Contact Created')
## $y
## [1] "Number of Contact Created"
##
## attr(,"class")
## [1] "labels"
p <- ggplotly(p)
p
Smaller Markets
# Usual area chart
p <- melt(forecast_Jan_2022, id.vars=c("Date", "Channel")) %>% filter(variable %in% c("FR","DE.CH","ITA.ES"))%>%
ggplot(aes(x = Date, y = value)) +
geom_col(aes(color = Channel)) +
labs(title = 'Forecasted Contacts Jan 2022',
caption = 'Source: Marketing and Sales Team') +
facet_wrap(~variable)
ylab('Number of Contact Created')
## $y
## [1] "Number of Contact Created"
##
## attr(,"class")
## [1] "labels"
p <- ggplotly(p)
p
This is the most important section of workforce management, because at this stage, we will figure out the amount of staffing we will need and allocate the workforce to appropriate markets and channels.
We will need to change the volume to per-hour interval numbers. It is therefore important to discover the arrival pattern from the historical data.
Plot9: Arrival Pattern
p <- ggboxplot(data_contact, x = "Channel", y = "TimeCreated",
color = "Channel", palette = "jco",
add = "jitter", facet.by = "Market", short.panel.labs = FALSE)
p
Arrival_pattern <- data_contact %>% group_by(Market, Channel,TimeCreated) %>% summarise(Count = n())%>%
mutate(freq = 100*round(Count / sum(Count), 3)) # rounded to 3 decimal places
# head(Arrival_pattern)
We can show the same using dot Plot
# Usual area chart
p <- melt(Arrival_pattern[,-4], id.vars=c("TimeCreated", "Market", "Channel")) %>%
ggplot(aes(x = TimeCreated, y = value, color = Channel)) +
geom_point() +
ylim(0,25)+
labs(title = 'Arrival pattern - per market',
# subtitle = '',
caption = 'Source: Marketing and Sales Team') +
facet_wrap(~Market)
# Turn it interactive with ggplotly
p <- ggplotly(p)
p
Days = seq(1,31,1) # for days in Jan
ContactPerInterval <- data.frame(matrix(ncol = 5, nrow = 0)) # empty dataframe with 5 cols, 31 rows
colnames(ContactPerInterval) <- c("Market","Channel","Date","Time", "Volume")
for (market in Markets)
{
for (channel in Channels)
{
for (day in Days)
{
for (time_int in seq(1:24))
{
vector = Arrival_pattern$Market==market & Arrival_pattern$Channel==channel & Arrival_pattern$TimeCreated==time_int
ContactPerInterval[nrow(ContactPerInterval)+1,] <- c(market, channel,day,time_int,
if(sum(vector)==0)
{
0
}
else
forecast_Jan_2022[market][day,]*Arrival_pattern$freq[vector]*0.01
)
}
}
}
}
First however, we will need to establish some assumptions about the capacity of an average agent. We will assume the average number of conversations that can be handled within 1 hour of shift time. This assumption puts into account the unoccupied time between conversations, for well needed break or other reasons.
Average Handling Time
AHT_chat <- 10 # minutes
AHT_email <- 6 # minutes
AHT_Phone <- 8 # minutes
Service Level/time
SL_chat <- 0.9 # 90 %percent
SL_email <- 1 # 100 %percent
SL_Phone <- 0.9 # 90 %percent
SLA_time_chat<- 60 # sec
SLA_time_Phone <- 60 # sec
SLA_time_Phone <- 60*12*60 # sec = 12 hrs
You may want to include other assumptions such as occupancy, shrinkage, etc at this stage or later. We have decided to leave it up to you.
The next and last calculation is to find the minimum number of heads required to handle the volume. First, we will calculate the numbers per interval, and then combine the heads to find out the actual heads that need to be hired assuming each of them a full-timer. (NB: A full-timer works 8 hrs per day and 40 hrs per week. In customer support centers, some of those days can also be weekend days)
We will, however, need to calculate per interval heads first. It is important to remember that that we have 3 channels, each of them with different handling time, service level agreement and arrival pattern. Sequential contacts like email contacts can be handled in sequential manner, whereas the dynamic contacts such as chat and phone call need an immediate answering. This is because customers do not wait on the line, usually longer than a couple of minutes. Longer waiting usually translates to bad customer experience.
We will define two separate functions, each calculating the number of Erlangs(Erlangs are the number of worked hours, also equal to the number of heads in case of 1-hr interval) per interval.
a. Dynamic contacts
The assumption is that contacts arrive in Poisson distribution.
## Erlang-C Functions from https://lucidmanager.org/data-science/call-centre-workforce-planning-erlang-c-in-r/
intensity <- function(rate, duration, interval = 60) {
(rate / interval) * duration
}
erlang_c <- function(agents, rate, duration, interval = 60) {
int <- intensity(rate, duration, interval)
erlang_b_inv <- 1
for (i in 1:agents) {
erlang_b_inv <- 1 + erlang_b_inv * i / int
}
erlang_b <- 1 / erlang_b_inv
agents * erlang_b / (agents - int * (1 - erlang_b))
}
service_level <- function(agents, rate, duration, target, interval = 60) {
pw <- erlang_c(agents, rate, duration, interval)
int <- intensity(rate, duration, interval)
1 - (pw * exp(-(agents - int) * (target / duration)))
}
resource <- function(rate, duration, target, gos_target, interval = 60) {
agents <- round(intensity(rate, duration, interval) + 1)
gos <- service_level(agents, rate, duration, target, interval)
while (gos < gos_target * (gos_target > 1) / 100) {
agents <- agents + 1
gos <- service_level(agents, rate, duration, target, interval)
}
return(agents)
}
Let’s find the part of our ‘ContactPerInterval’ for our dynamic contacts.
ContactPerInterval_dynamic <- ContactPerInterval%>% filter(Channel %in% c("Phone","Chat"))
Let’s apply the resources function defined above on all dynamic contacts volume.
ContactPerInterval_dynamic$resources <- lapply(as.numeric(ContactPerInterval_dynamic$Volume),
resource,duration = SLA_time_Phone, target=SLA_time_Phone, gos_target=SL_Phone, interval = SLA_time_Phone)
b. Sequential Contacts
ContactPerInterval_sequential <- ContactPerInterval%>% filter(Channel =="Email")
ContactPerInterval_sequential$resources <- max(as.numeric(ContactPerInterval_sequential$Volume)/(60/AHT_email),1) # 60/AHT = productivity per head per hour
Let’s combine the two dataframes and find average heads per interval for the whole month.
heads_day_interval <- rbind(ContactPerInterval_dynamic,ContactPerInterval_sequential)
Avg_heads_interval <- heads_day_interval %>% group_by(Market, Channel, Time) %>% summarize(Resources = ceiling(mean(as.numeric(resources)))) # round up since heads
Avg_heads_interval <- Avg_heads_interval[with(Avg_heads_interval, order(Market,Channel,Time)),] # sorted by time
Plot11: Required resources per interval
Avg_heads_interval %>% group_by(Channel, Time) %>% summarize(Resources = sum(Resources)) %>%
ggplot() +
geom_col(aes(x = as.numeric(Time), y = Resources), fill = "tomato4") +
facet_grid(Channel ~ .)+
labs(title= "Required resources per interval")+
scale_x_continuous(name = "Time",
breaks = seq(1,24,1),
labels = seq(1,24,1))+
ylim(0,40)+
geom_text(
aes(x = as.numeric(Time), y = Resources,label = Resources),
# position = position_dodge(0.9),
vjust = -0.3, size = 2.5,
color = "black"
)
We have calculated the required number of heads per interval. This is not the last step, as we have to also translate the per interval numbers into full-time equivalents. An FTE is an abbreviation to a full time equivalent. For example, when it comes to a week, an FTE is equal to an agent who works 40 hrs (at least in Poland). A daily FTE is an agent who works 8 hrs.
Daily FTEs requirement per market per channel
Avg_heads_daily <- heads_day_interval %>% group_by(Market, Channel, Date) %>% summarize(Resources = sum(as.numeric(resources)/8)) # 8 hrs an FTE's daily [hr]
datatable(Avg_heads_daily,caption='Table 2. Daily FTEs requirement per market per channel',
rowname = F,
filter = 'top')
Weekly FTEs requirement per market per channel
Avg_heads_daily$Day_of_week <- (as.numeric(Avg_heads_daily$Date) %% 7) +1
Avg_heads_daily_1_week <- Avg_heads_daily %>% group_by(Market, Channel, Day_of_week) %>% summarize(Resources = mean(Resources))
Now we are in a position to know how many heads we need to hire for each channel per market.
total_heads_required <- Avg_heads_daily_1_week %>% group_by(Market, Channel) %>% summarize(Resources = ceiling(sum(Resources)/5)) # since an FTE works 5 days a week
datatable(total_heads_required,caption='Table 3. # of heads we need to hire',
rowname = F,
filter = 'top')
Plot12: Required heads per market (Heat Map)
ggplot(total_heads_required, aes(x = Channel, y = Market)) +
geom_tile(aes(fill = Resources), color = 'black', show.legend = T) +
theme_minimal() +
geom_text(aes(label = Resources), size = 5, fontface = 'bold', color = 'white') +
labs(title = 'Required Heads') +
scale_fill_gradient(low = 'green3', high = 'orange')
Plot: Heads by market (on Map)
Let’s prepare the data
world <- ne_countries(scale = "medium", returnclass = "sf")
Heads_per_Market <- world %>%
filter(name %in% c("Netherlands", "Belgium", "Germany", "Switzerland",
"Denmark", "France", "Italy", "Spain", "Norway",
"Poland", "Sweden", "Finland", "United Kingdom"))
Heads_per_Market$Heads <- c(37, 15, 15, 27, 15, 15, 11, 15, 15, 34, 15, 21, 10)
Heads_per_Market$Markets <- c("BENE","DE.CH","DE.CH","DK","ITA.ES","SE.FI","FR","NO.UK","ITA.ES","BENE","NO.UK","PL","SE.FI")
# filter the columns
data_Heads_per_Market <- Heads_per_Market[,c("name", "Markets", "geometry", "Heads")]
# Assign X and Y values
data_Heads_per_Market <- cbind(data_Heads_per_Market, st_coordinates(st_centroid(data_Heads_per_Market)))
# france
data_Heads_per_Market[7,4] <- 2
data_Heads_per_Market[7,5] <- 46.8
# norway
data_Heads_per_Market[11,4] <- 9
data_Heads_per_Market[11,5] <- 60
Plot13: Distribution of Heads across markets
ggplotly(ggplot(data = data_Heads_per_Market) +
geom_sf(aes(fill = Heads, label = Markets)) +
geom_text(aes(X, Y, label = Markets), size = 2,color = "black") +
scale_fill_viridis_c(option = "plasma", trans = "sqrt", guide = guide_colorbar(barwidth = 30), name = NULL) +
coord_sf(xlim = c(-10, 33), ylim = c(30, 70), expand = TRUE) +
labs(title = 'Map of Heads Required') +
theme(legend.position = 'bottom'))