#including libs
library(dplyr)
library(ggplot2)
#importing data
occupation <- read.csv("occupation_data.csv")
occupation_complete <- occupation[complete.cases(occupation),]
#removing all null values
cleandata <- occupation[complete.cases(occupation),]
#filter data for office workers only
just_office <- cleandata %>% filter(occupation_category == "OFFICE")
#print count of occ_category
table(occupation_complete$occupation_category)
AGRICULTURAL ARTS BUSINESS COMPUTATIONAL
1 3 12 7
CULINARY EDUCATION ENGINEERING GROUNDSKEEPING
7 4 2 3
HEALTHCARE PROFESSIONAL HEALTHCARE SUPPORT LEGAL MANAGEMENT
7 1 1 16
OFFICE PRODUCTION PROTECTIVE SERVICE SALES
17 11 3 11
SCIENCE SERVICE SOCIAL SERVICE TRANSPORTATION
3 3 3 4
#using a frequency table to assess the number of categories
this graph shows the distribution of earnings for each category
hist(occupation_complete$All_weekly, breaks = 40, col = "lightpink", main = "Distribution of Earnings - All Categories",
xlab = "Weekly Earnings", ylab = "Frequency")
#HISTOGRAM--visualizes the distribution of earnings within the category for both genders
management <- occupation_complete %>% filter(occupation_category %in% ("MANAGEMENT")) #filtering management occupations only
management_earnings <- management$All_weekly #saving the values in a vector as reference
hist(management$All_weekly, breaks = 10, col = "lightyellow", main = "Distribution of Earnings - Management",
xlab = "Weekly Earnings", ylab = "Frequency")
#the data points all appear to fall into the same range of frequency!
#Bar Plot--investigates the gender balance across different occupations within the category
#sum the counts of females & males
female_count <- sum(management$F_workers)
male_count <- sum(management$M_workers)
#create a data frame with gender counts
gender_data <- data.frame(Gender = c("Female", "Male"),
Count = c(female_count, male_count))
ggplot(data = gender_data, aes(x = Gender, y = Count, fill = Gender)) +
geom_bar(stat = "identity", width = 0.6) +
labs(title = "Gender Balance in Management",
x = "Gender", y = "Count") +
scale_fill_manual(values = c("pink", "skyblue")) +
theme_minimal()