Analysis of Call Center Data

# Load Jan
jan_records <- read.csv("/cloud/project/call-center/Jan.csv")
head(jan_records)

##         Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-01-16   A1504        119           107             12         51.14
## 2 2024-01-10   A1030        170            69            101         46.05
## 3 2024-01-29   A1200         60            52              8         39.03
## 4 2024-01-30   A1289        150           128             22         54.46
## 5 2024-01-06   A1313        110            95             15         32.10
## 6 2024-01-09   A1345        148            59             89         46.16
##   CallLength CustomerSatisfaction
## 1     419.67                    5
## 2     418.78                    4
## 3     497.31                    2
## 4     879.29                    4
## 5     605.58                    1
## 6     647.53                    3

#Load Feb
feb_records <- read.csv("/cloud/project/call-center/Feb.csv")
head(feb_records)

##         Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-02-02   A1921        147            87             60         27.02
## 2 2024-02-07   A1186        175           106             69         25.85
## 3 2024-02-11   A1379         58            56              2         17.55
## 4 2024-02-10   A1030        102            84             18         53.72
## 5 2024-02-07   A1920        161            41            120         16.60
## 6 2024-02-25   A1674         93            68             25         53.74
##   CallLength CustomerSatisfaction
## 1     221.14                    2
## 2     749.26                    1
## 3     606.89                    5
## 4     522.77                    2
## 5     826.24                    1
## 6     687.73                    4

# Load March
mar_records <- read.csv("/cloud/project/call-center/Mar.csv")
head(mar_records)

##         Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-03-29   A1024         80            57             23         13.46
## 2 2024-03-18   A1058        181           121             60         53.78
## 3 2024-03-11   A1435        191           148             43         39.03
## 4 2024-03-13   A1395        163            60            103         39.83
## 5 2024-03-07   A1678        183            74            109         18.07
## 6 2024-03-07   A1438         84            48             36         51.88
##   CallLength CustomerSatisfaction
## 1     759.96                    3
## 2     218.84                    5
## 3     566.16                    2
## 4     312.83                    1
## 5     231.97                    4
## 6     626.81                    5

# Load April
apr_records <- read.csv("/cloud/project/call-center/April.csv")
head(apr_records)

##         Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-04-04   A1583         63            61              2         56.13
## 2 2024-04-16   A1881        195           171             24         29.19
## 3 2024-04-05   A1545         92            44             48         33.93
## 4 2024-04-01   A1684        157           105             52         11.04
## 5 2024-04-11   A1759         97            78             19         42.15
## 6 2024-04-08   A1641         58            50              8         14.60
##   CallLength CustomerSatisfaction
## 1     617.43                    1
## 2     647.87                    4
## 3     327.31                    5
## 4     868.88                    2
## 5     409.07                    4
## 6     428.56                    4

# Load May
may_records <- read.csv("/cloud/project/call-center/May.csv")
head(may_records)

##         Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-05-01   A1812        148            48            100         50.09
## 2 2024-05-12   A1577        196            72            124         12.30
## 3 2024-05-26   A1536        146            99             47         14.59
## 4 2024-05-28   A1622         89            88              1         57.01
## 5 2024-05-24   A1292         80            75              5         26.16
## 6 2024-05-15   A1039        103            45             58         51.53
##   CallLength CustomerSatisfaction
## 1     295.17                    3
## 2     298.04                    1
## 3     265.21                    4
## 4     290.17                    1
## 5     625.37                    4
## 6     839.16                    4

# Load June
june_records <- read.csv("/cloud/project/call-center/June.csv")
head(june_records)

##         Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-06-08   A1851        178           119             59         38.79
## 2 2024-06-15   A1846        152            53             99         29.96
## 3 2024-06-17   A1178         70            52             18         49.43
## 4 2024-06-29   A1029        180           109             71         10.21
## 5 2024-06-12   A1077         57            54              3         17.46
## 6 2024-06-26   A1804        186            86            100         36.90
##   CallLength CustomerSatisfaction
## 1     222.54                    1
## 2     707.99                    5
## 3     606.64                    1
## 4     436.68                    2
## 5     349.67                    1
## 6     603.82                    5

# Agents with highest % of answered calls versus total calls
library(tidyverse)

## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)

# firstly, create months column
jan_records <- jan_records %>% mutate(Month = 'Jan')
feb_records <- feb_records %>% mutate(Month = 'Feb')
mar_records <- mar_records %>% mutate(Month = 'Mar')
apr_records <- apr_records %>% mutate(Month = 'Apr')
may_records <- may_records %>% mutate(Month = 'May')
june_records <- june_records %>% mutate(Month ='Jun')

# secondly combine datasets jan to june
combined_df <- rbind(jan_records,feb_records,mar_records,apr_records,may_records,june_records)

# sum calls convert to % and group by agent
top_agent <- combined_df %>%
  group_by (AgentID,Month)%>%
  summarise (total_answered = sum(AnsweredCalls,na.rm=TRUE),
             total_calls = sum(TotalCalls,na.rm=TRUE),
             percentage_answered = (total_answered/total_calls * 100)
             ,.groups='drop')%>%
  arrange(desc(total_answered))%>%
  slice(1:6)

# print results
print(top_agent)

## # A tibble: 6 × 5
##   AgentID Month total_answered total_calls percentage_answered
##   <chr>   <chr>          <int>       <int>               <dbl>
## 1 A1513   May              404         569                71.0
## 2 A1781   Jun              400         533                75.0
## 3 A1578   Mar              397         527                75.3
## 4 A1865   Jan              393         487                80.7
## 5 A1466   Jun              387         497                77.9
## 6 A1145   Apr              381         571                66.7

# visualize output
ggplot(data=top_agent, mapping =aes(x=AgentID, y=percentage_answered, color=Month))+geom_col()+
  labs(title='Agents with the highest answered call percentage',
              x='Agent ID',
              y= '% Calls Answered') + theme_minimal()

# Agents with higher abandoned calls
high_abandonment <- combined_df %>%
  group_by (AgentID,Month)%>%
  summarise(total_abandoned = sum(AbandonedCalls,na.rm=TRUE),.groups='drop')%>%
    group_by (Month)%>%
  slice_max(total_abandoned,n=1)

# print results
print(high_abandonment)

## # A tibble: 6 × 3
## # Groups:   Month [6]
##   AgentID Month total_abandoned
##   <chr>   <chr>           <int>
## 1 A1345   Apr               235
## 2 A1713   Feb               233
## 3 A1745   Jan               240
## 4 A1420   Jun               216
## 5 A1420   Mar               250
## 6 A1252   May               345

# visualize output
ggplot(data=high_abandonment, mapping=aes(x=AgentID, y=total_abandoned, fill=Month))+
  geom_col()+ labs(title='Agents with highest abandon')

# Monthly Avg customer satisfaction, and how call length & speed of answer contributed to it

monthly_satisfaction <- combined_df %>%
  group_by (Month)%>%
  summarise(avg_satisfaction = mean(CustomerSatisfaction,na.rm=TRUE),
            avg_call_length = mean(CallLength,na.rm=TRUE),
            avg_speed_answer = mean(SpeedOfAnswer,na.rm=TRUE)
            ,.groups='drop')

print(monthly_satisfaction)

## # A tibble: 6 × 4
##   Month avg_satisfaction avg_call_length avg_speed_answer
##   <chr>            <dbl>           <dbl>            <dbl>
## 1 Apr               2.97            548.             34.9
## 2 Feb               2.98            542.             33.4
## 3 Jan               3.07            547.             35.0
## 4 Jun               3.06            532.             34.2
## 5 Mar               2.98            526.             35.0
## 6 May               3.07            547.             34.2

#What trends can be identified in total calls and abandoned calls month-over-month? 
trend_analysis <- combined_df %>%
  group_by(Month)%>%
  summarise(avg_total_calls = mean(TotalCalls),
            avg_abandoned_calls = mean(AbandonedCalls),.groups='drop')

print(trend_analysis)

## # A tibble: 6 × 3
##   Month avg_total_calls avg_abandoned_calls
##   <chr>           <dbl>               <dbl>
## 1 Apr              125.                41.7
## 2 Feb              123.                41.4
## 3 Jan              127.                41.1
## 4 Jun              126.                42.4
## 5 Mar              125.                42.4
## 6 May              125.                43.9

# Visualize output
ggplot(data=trend_analysis,mapping=aes(x=avg_total_calls,y=avg_abandoned_calls, fill=Month))+
  geom_col()+labs(title='Total calls and Abandoned Calls',
                               x='avg_total_calls')+
  theme_minimal()

# Which agent has the highest speed of answering calls in each month?
  speed <- combined_df %>%
    group_by (AgentID,Month)%>%
    summarise(avg_speed = mean(SpeedOfAnswer))%>%
    group_by (Month)%>%
    slice_max(avg_speed,n=1)

## `summarise()` has grouped output by 'AgentID'. You can override using the
## `.groups` argument.

# print results
  print(speed)

## # A tibble: 6 × 3
## # Groups:   Month [6]
##   AgentID Month avg_speed
##   <chr>   <chr>     <dbl>
## 1 A1028   Apr        59.9
## 2 A1277   Feb        59.8
## 3 A1816   Jan        59.9
## 4 A1847   Jun        60.0
## 5 A1906   Mar        59.9
## 6 A1944   May        59.6

# How does that correlate to customer satisfaction?
  cor.data <- cor(combined_df$CustomerSatisfaction,combined_df$SpeedOfAnswer)
  print(cor.data)

## [1] -0.0427999

Analysis of Call Center Data

Andrew

2024-09-20