# Load Jan
jan_records <- read.csv("/cloud/project/call-center/Jan.csv")
head(jan_records)
## Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-01-16 A1504 119 107 12 51.14
## 2 2024-01-10 A1030 170 69 101 46.05
## 3 2024-01-29 A1200 60 52 8 39.03
## 4 2024-01-30 A1289 150 128 22 54.46
## 5 2024-01-06 A1313 110 95 15 32.10
## 6 2024-01-09 A1345 148 59 89 46.16
## CallLength CustomerSatisfaction
## 1 419.67 5
## 2 418.78 4
## 3 497.31 2
## 4 879.29 4
## 5 605.58 1
## 6 647.53 3
#Load Feb
feb_records <- read.csv("/cloud/project/call-center/Feb.csv")
head(feb_records)
## Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-02-02 A1921 147 87 60 27.02
## 2 2024-02-07 A1186 175 106 69 25.85
## 3 2024-02-11 A1379 58 56 2 17.55
## 4 2024-02-10 A1030 102 84 18 53.72
## 5 2024-02-07 A1920 161 41 120 16.60
## 6 2024-02-25 A1674 93 68 25 53.74
## CallLength CustomerSatisfaction
## 1 221.14 2
## 2 749.26 1
## 3 606.89 5
## 4 522.77 2
## 5 826.24 1
## 6 687.73 4
# Load March
mar_records <- read.csv("/cloud/project/call-center/Mar.csv")
head(mar_records)
## Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-03-29 A1024 80 57 23 13.46
## 2 2024-03-18 A1058 181 121 60 53.78
## 3 2024-03-11 A1435 191 148 43 39.03
## 4 2024-03-13 A1395 163 60 103 39.83
## 5 2024-03-07 A1678 183 74 109 18.07
## 6 2024-03-07 A1438 84 48 36 51.88
## CallLength CustomerSatisfaction
## 1 759.96 3
## 2 218.84 5
## 3 566.16 2
## 4 312.83 1
## 5 231.97 4
## 6 626.81 5
# Load April
apr_records <- read.csv("/cloud/project/call-center/April.csv")
head(apr_records)
## Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-04-04 A1583 63 61 2 56.13
## 2 2024-04-16 A1881 195 171 24 29.19
## 3 2024-04-05 A1545 92 44 48 33.93
## 4 2024-04-01 A1684 157 105 52 11.04
## 5 2024-04-11 A1759 97 78 19 42.15
## 6 2024-04-08 A1641 58 50 8 14.60
## CallLength CustomerSatisfaction
## 1 617.43 1
## 2 647.87 4
## 3 327.31 5
## 4 868.88 2
## 5 409.07 4
## 6 428.56 4
# Load May
may_records <- read.csv("/cloud/project/call-center/May.csv")
head(may_records)
## Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-05-01 A1812 148 48 100 50.09
## 2 2024-05-12 A1577 196 72 124 12.30
## 3 2024-05-26 A1536 146 99 47 14.59
## 4 2024-05-28 A1622 89 88 1 57.01
## 5 2024-05-24 A1292 80 75 5 26.16
## 6 2024-05-15 A1039 103 45 58 51.53
## CallLength CustomerSatisfaction
## 1 295.17 3
## 2 298.04 1
## 3 265.21 4
## 4 290.17 1
## 5 625.37 4
## 6 839.16 4
# Load June
june_records <- read.csv("/cloud/project/call-center/June.csv")
head(june_records)
## Date AgentID TotalCalls AnsweredCalls AbandonedCalls SpeedOfAnswer
## 1 2024-06-08 A1851 178 119 59 38.79
## 2 2024-06-15 A1846 152 53 99 29.96
## 3 2024-06-17 A1178 70 52 18 49.43
## 4 2024-06-29 A1029 180 109 71 10.21
## 5 2024-06-12 A1077 57 54 3 17.46
## 6 2024-06-26 A1804 186 86 100 36.90
## CallLength CustomerSatisfaction
## 1 222.54 1
## 2 707.99 5
## 3 606.64 1
## 4 436.68 2
## 5 349.67 1
## 6 603.82 5
# Agents with highest % of answered calls versus total calls
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
# firstly, create months column
jan_records <- jan_records %>% mutate(Month = 'Jan')
feb_records <- feb_records %>% mutate(Month = 'Feb')
mar_records <- mar_records %>% mutate(Month = 'Mar')
apr_records <- apr_records %>% mutate(Month = 'Apr')
may_records <- may_records %>% mutate(Month = 'May')
june_records <- june_records %>% mutate(Month ='Jun')
# secondly combine datasets jan to june
combined_df <- rbind(jan_records,feb_records,mar_records,apr_records,may_records,june_records)
# sum calls convert to % and group by agent
top_agent <- combined_df %>%
group_by (AgentID,Month)%>%
summarise (total_answered = sum(AnsweredCalls,na.rm=TRUE),
total_calls = sum(TotalCalls,na.rm=TRUE),
percentage_answered = (total_answered/total_calls * 100)
,.groups='drop')%>%
arrange(desc(total_answered))%>%
slice(1:6)
# print results
print(top_agent)
## # A tibble: 6 × 5
## AgentID Month total_answered total_calls percentage_answered
## <chr> <chr> <int> <int> <dbl>
## 1 A1513 May 404 569 71.0
## 2 A1781 Jun 400 533 75.0
## 3 A1578 Mar 397 527 75.3
## 4 A1865 Jan 393 487 80.7
## 5 A1466 Jun 387 497 77.9
## 6 A1145 Apr 381 571 66.7
# visualize output
ggplot(data=top_agent, mapping =aes(x=AgentID, y=percentage_answered, color=Month))+geom_col()+
labs(title='Agents with the highest answered call percentage',
x='Agent ID',
y= '% Calls Answered') + theme_minimal()

# Agents with higher abandoned calls
high_abandonment <- combined_df %>%
group_by (AgentID,Month)%>%
summarise(total_abandoned = sum(AbandonedCalls,na.rm=TRUE),.groups='drop')%>%
group_by (Month)%>%
slice_max(total_abandoned,n=1)
# print results
print(high_abandonment)
## # A tibble: 6 × 3
## # Groups: Month [6]
## AgentID Month total_abandoned
## <chr> <chr> <int>
## 1 A1345 Apr 235
## 2 A1713 Feb 233
## 3 A1745 Jan 240
## 4 A1420 Jun 216
## 5 A1420 Mar 250
## 6 A1252 May 345
# visualize output
ggplot(data=high_abandonment, mapping=aes(x=AgentID, y=total_abandoned, fill=Month))+
geom_col()+ labs(title='Agents with highest abandon')

# Monthly Avg customer satisfaction, and how call length & speed of answer contributed to it
monthly_satisfaction <- combined_df %>%
group_by (Month)%>%
summarise(avg_satisfaction = mean(CustomerSatisfaction,na.rm=TRUE),
avg_call_length = mean(CallLength,na.rm=TRUE),
avg_speed_answer = mean(SpeedOfAnswer,na.rm=TRUE)
,.groups='drop')
print(monthly_satisfaction)
## # A tibble: 6 × 4
## Month avg_satisfaction avg_call_length avg_speed_answer
## <chr> <dbl> <dbl> <dbl>
## 1 Apr 2.97 548. 34.9
## 2 Feb 2.98 542. 33.4
## 3 Jan 3.07 547. 35.0
## 4 Jun 3.06 532. 34.2
## 5 Mar 2.98 526. 35.0
## 6 May 3.07 547. 34.2
#What trends can be identified in total calls and abandoned calls month-over-month?
trend_analysis <- combined_df %>%
group_by(Month)%>%
summarise(avg_total_calls = mean(TotalCalls),
avg_abandoned_calls = mean(AbandonedCalls),.groups='drop')
print(trend_analysis)
## # A tibble: 6 × 3
## Month avg_total_calls avg_abandoned_calls
## <chr> <dbl> <dbl>
## 1 Apr 125. 41.7
## 2 Feb 123. 41.4
## 3 Jan 127. 41.1
## 4 Jun 126. 42.4
## 5 Mar 125. 42.4
## 6 May 125. 43.9
# Visualize output
ggplot(data=trend_analysis,mapping=aes(x=avg_total_calls,y=avg_abandoned_calls, fill=Month))+
geom_col()+labs(title='Total calls and Abandoned Calls',
x='avg_total_calls')+
theme_minimal()

# Which agent has the highest speed of answering calls in each month?
speed <- combined_df %>%
group_by (AgentID,Month)%>%
summarise(avg_speed = mean(SpeedOfAnswer))%>%
group_by (Month)%>%
slice_max(avg_speed,n=1)
## `summarise()` has grouped output by 'AgentID'. You can override using the
## `.groups` argument.
# print results
print(speed)
## # A tibble: 6 × 3
## # Groups: Month [6]
## AgentID Month avg_speed
## <chr> <chr> <dbl>
## 1 A1028 Apr 59.9
## 2 A1277 Feb 59.8
## 3 A1816 Jan 59.9
## 4 A1847 Jun 60.0
## 5 A1906 Mar 59.9
## 6 A1944 May 59.6
# How does that correlate to customer satisfaction?
cor.data <- cor(combined_df$CustomerSatisfaction,combined_df$SpeedOfAnswer)
print(cor.data)
## [1] -0.0427999