Training-Data.R

# Load the cleaned dataset
library(readxl)

## Warning: package 'readxl' was built under R version 4.4.2

Cleaned_TNA_Questionnaire <- read_excel("C:/Users/Mr Cowboy/Downloads/Cleaned_TNA_Questionnaire.xlsx")
data <- Cleaned_TNA_Questionnaire
attach(data)
#str(data)
head(data)

## # A tibble: 6 × 37
##      ID Name  Gender Age              Directorate Department `Work Unit/section`
##   <dbl> <lgl> <chr>  <chr>            <chr>       <chr>      <chr>              
## 1     1 NA    Male   36 – 45 years    Finance an… Finance a… Finance and Account
## 2     2 NA    Male   36 – 45 years    Finance an… Finance a… Finance and Accoun…
## 3     3 NA    Female 36 – 45 years    Human reso… Records m… Registry           
## 4     4 NA    Male   46 – 55 years    Registrati… Benefits   Data               
## 5     5 NA    Female 25 years & below Registrati… Benefits   Customer care      
## 6     6 NA    Female 26 – 35 years    Human reso… Administr… Kitchen            
## # ℹ 30 more variables: `Date of first appointment` <dttm>,
## #   `Designation at first appointment` <chr>, `Current Designation` <chr>,
## #   `Date of current appointment` <dttm>,
## #   `Length of Service at the department/work section/Institution/organization` <chr>,
## #   `Length of service in the public/private Sector` <chr>,
## #   `Academic qualifications (Tick the highest level attained)` <chr>,
## #   `If selected others kindly specify` <chr>, …

# Load required libraries
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

# Summary of Gender
gender_summary <- data %>% group_by(Gender) %>% summarise(Count = n())
print(gender_summary)

## # A tibble: 2 × 2
##   Gender Count
##   <chr>  <int>
## 1 Female    17
## 2 Male      23

# Summary of Age
age_summary <- data %>% group_by(Age) %>% summarise(Count = n())
print(age_summary)

## # A tibble: 5 × 2
##   Age                Count
##   <chr>              <int>
## 1 25 years & below       1
## 2 26 – 35 years         10
## 3 36 – 45 years         16
## 4 46 – 55 years         11
## 5 55 years and above     2

# Save summaries to CSV
write.csv(gender_summary, "Gender_Summary.csv", row.names = FALSE)
write.csv(age_summary, "Age_Summary.csv", row.names = FALSE)

# Summary of Directorate
directorate_summary <- data %>% group_by(Directorate) %>% summarise(Count = n())
print(directorate_summary)

## # A tibble: 34 × 2
##    Directorate                              Count
##    <chr>                                    <int>
##  1 Adm                                          1
##  2 BENEFITS                                     1
##  3 BENEFITS ADMIISTRATION                       1
##  4 BENEFITS ADMINISTRATION                      1
##  5 BENEFITS AND REGISTRATION                    1
##  6 Benefits                                     1
##  7 Benefits Administration                      3
##  8 Benefits Administration and Registration     1
##  9 CEO                                          1
## 10 CORPORATION SECRETARY & LEGAL SERVICES       1
## # ℹ 24 more rows

write.csv(directorate_summary, "Directorate_Summary.csv", row.names = FALSE)

# Summary of Department
department_summary <- data %>% group_by(Department) %>% summarise(Count = n())
print(department_summary)

## # A tibble: 31 × 2
##    Department                             Count
##    <chr>                                  <int>
##  1 ADMINISTRATION                             2
##  2 Administration                             1
##  3 BENEFITS                                   3
##  4 Benefits                                   4
##  5 Benefits & Customer Experience             1
##  6 Benefits Administration                    1
##  7 Benefits department                        1
##  8 CEO                                        1
##  9 CORPORATION SECRETARY & LEGAL SERVICES     1
## 10 Corporate Communication                    1
## # ℹ 21 more rows

# Summary of sponsor of all courses 
sponsor_summary <- data %>% group_by(`Indicate sponsor of all the courses/seminars/workshops you have attended in the last two years. (e.g. Employer, Self etc)`) %>% summarise(Count = n())
print(sponsor_summary)

## # A tibble: 18 × 2
##    Indicate sponsor of all the courses/seminars/workshops you have atten…¹ Count
##    <chr>                                                                   <int>
##  1 "1. The Actuarial Society of Kenya (TASK) 9th Annual seminar - PSSF\r\…     1
##  2 "1.PSSF"                                                                    1
##  3 "EMPLOYER"                                                                  1
##  4 "Employer"                                                                 10
##  5 "Employer \r\nSelf"                                                         1
##  6 "NONE"                                                                      1
##  7 "None"                                                                      2
##  8 "None sponsored me"                                                         1
##  9 "PSSF"                                                                      7
## 10 "PSSS"                                                                      1
## 11 "Pss fund."                                                                 1
## 12 "Pssf"                                                                      2
## 13 "Pssf \r\nThe National Treasury"                                            1
## 14 "Public service superannuation fund"                                        1
## 15 "Self"                                                                      3
## 16 "Self sponsorship"                                                          1
## 17 "Unknown"                                                                   4
## 18 "employer"                                                                  1
## # ℹ abbreviated name:
## #   ¹`Indicate sponsor of all the courses/seminars/workshops you have attended in the last two years. (e.g. Employer, Self etc)`

# Summary of Work unit
workunit_summary <- data %>% group_by(`Work Unit/section`) %>% summarise(Count = n())
print(workunit_summary)

## # A tibble: 33 × 2
##    `Work Unit/section`                    Count
##    <chr>                                  <int>
##  1 Administration                             1
##  2 BENEFITS                                   2
##  3 Benefits                                   2
##  4 Benefits Administration                    1
##  5 Benefits Claims Unit                       1
##  6 CEO                                        1
##  7 CEO'S OFFICE                               2
##  8 CORPORATION SECRETARY & LEGAL SERVICES     1
##  9 CUSTOMER EXPERIENCE                        1
## 10 Clerical                                   1
## # ℹ 23 more rows

# Combine summaries into one list
summary_report <- list(
  Gender = gender_summary,
  Age = age_summary,
  Directorate = directorate_summary,
  Department = department_summary,
  WorkUnit = workunit_summary                                                                                                                                        
  )

# Save the summaries to a single file
print(summary_report)

## $Gender
## # A tibble: 2 × 2
##   Gender Count
##   <chr>  <int>
## 1 Female    17
## 2 Male      23
## 
## $Age
## # A tibble: 5 × 2
##   Age                Count
##   <chr>              <int>
## 1 25 years & below       1
## 2 26 – 35 years         10
## 3 36 – 45 years         16
## 4 46 – 55 years         11
## 5 55 years and above     2
## 
## $Directorate
## # A tibble: 34 × 2
##    Directorate                              Count
##    <chr>                                    <int>
##  1 Adm                                          1
##  2 BENEFITS                                     1
##  3 BENEFITS ADMIISTRATION                       1
##  4 BENEFITS ADMINISTRATION                      1
##  5 BENEFITS AND REGISTRATION                    1
##  6 Benefits                                     1
##  7 Benefits Administration                      3
##  8 Benefits Administration and Registration     1
##  9 CEO                                          1
## 10 CORPORATION SECRETARY & LEGAL SERVICES       1
## # ℹ 24 more rows
## 
## $Department
## # A tibble: 31 × 2
##    Department                             Count
##    <chr>                                  <int>
##  1 ADMINISTRATION                             2
##  2 Administration                             1
##  3 BENEFITS                                   3
##  4 Benefits                                   4
##  5 Benefits & Customer Experience             1
##  6 Benefits Administration                    1
##  7 Benefits department                        1
##  8 CEO                                        1
##  9 CORPORATION SECRETARY & LEGAL SERVICES     1
## 10 Corporate Communication                    1
## # ℹ 21 more rows
## 
## $WorkUnit
## # A tibble: 33 × 2
##    `Work Unit/section`                    Count
##    <chr>                                  <int>
##  1 Administration                             1
##  2 BENEFITS                                   2
##  3 Benefits                                   2
##  4 Benefits Administration                    1
##  5 Benefits Claims Unit                       1
##  6 CEO                                        1
##  7 CEO'S OFFICE                               2
##  8 CORPORATION SECRETARY & LEGAL SERVICES     1
##  9 CUSTOMER EXPERIENCE                        1
## 10 Clerical                                   1
## # ℹ 23 more rows

# Gender Distribution
gender_plot <- ggplot(data, aes(x = Gender)) +
  geom_bar(fill = "blue", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Gender Distribution", x = "Gender", y = "Count")
print(gender_plot)

# Save the plot
ggsave("Gender_Distribution.png", plot = gender_plot, width = 6, height = 4)

# Age Distribution
age_plot <- ggplot(data, aes(x = Age)) +
  geom_bar(fill = "green", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Age Distribution", x = "Age Group", y = "Count")
print(age_plot)

# Save the plot
ggsave("Age_Distribution.png", plot = age_plot, width = 6, height = 4)

# Bar plot for Directorate distribution
directorate_plot <- ggplot(data, aes(x = Directorate)) +
  geom_bar(fill = "purple", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Directorate Distribution", x = "Directorate", y = "Count") +
  coord_flip()
print(directorate_plot)

ggsave("Directorate_Distribution.png", plot = directorate_plot, width = 6, height = 4)

# Bar plot for Sponsor of all courses distribution
sponsor_plot <- ggplot(data, aes(x = `Indicate sponsor of all the courses/seminars/workshops you have attended in the last two years. (e.g. Employer, Self etc)`)) +
  geom_bar(fill = "red", alpha = 0.7) +
  theme_minimal() +
  labs(title = "Sponsor of all courses Distribution", x = "Sponsor", y = "Count") +
  coord_flip()
print(sponsor_plot)

# save the plot
ggsave("Directorate_Distribution.png", plot = directorate_plot, width = 6, height = 4)

Training-Data.R

Mr Cowboy

2024-12-09