#importing data

library(dplyr)
library(ggplot2)
occupation <- read.csv("occupation_data.csv")

occupation_complete <- occupation[complete.cases(occupation),]
#removing all null values

cleandata <- occupation[complete.cases(occupation),]

just_office <- cleandata %>% filter(occupation_category == "OFFICE")


table(occupation_complete$occupation_category)

           AGRICULTURAL                    ARTS                BUSINESS 
                      1                       3                      12 
          COMPUTATIONAL                CULINARY               EDUCATION 
                      7                       7                       4 
            ENGINEERING          GROUNDSKEEPING HEALTHCARE PROFESSIONAL 
                      2                       3                       7 
     HEALTHCARE SUPPORT                   LEGAL              MANAGEMENT 
                      1                       1                      16 
                 OFFICE              PRODUCTION      PROTECTIVE SERVICE 
                     17                      11                       3 
                  SALES                 SCIENCE                 SERVICE 
                     11                       3                       3 
         SOCIAL SERVICE          TRANSPORTATION 
                      3                       4 
#using a frequency table to assess the number of categories 
hist(occupation_complete$All_weekly, breaks = 40, col = "lightpink", main = "Distribution of Earnings - All Categories",
     xlab = "Weekly Earnings", ylab = "Frequency")

#HISTOGRAM--visualizes the distribution of earnings within the category for both genders

management <- occupation_complete %>% filter(occupation_category %in% ("MANAGEMENT")) #filtering management occupations only
management_earnings <- management$All_weekly #saving the values in a vector as reference

hist(management$All_weekly, breaks = 10, col = "lightyellow", main = "Distribution of Earnings - Management",
     xlab = "Weekly Earnings", ylab = "Frequency")


#the data points all appear to fall into the same range of frequency!
#Bar Plot--investigates the gender balance across different occupations within the category

#sum the counts of females & males
female_count <- sum(management$F_workers) 
male_count <- sum(management$M_workers)

#create a data frame with gender counts
gender_data <- data.frame(Gender = c("Female", "Male"),
                          Count = c(female_count, male_count))

ggplot(data = gender_data, aes(x = Gender, y = Count, fill = Gender)) +
  geom_bar(stat = "identity", width = 0.6) +
  labs(title = "Gender Balance in Management",
       x = "Gender", y = "Count") +
  scale_fill_manual(values = c("pink", "skyblue")) +
  theme_minimal()

LS0tCnRpdGxlOiAiTWlkdGVybSBBc3NpZ25tZW50IgpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sKZWRpdG9yX29wdGlvbnM6IAogIG1hcmtkb3duOiAKICAgIHdyYXA6IDcyCi0tLQoKYGBge3J9CiNpbXBvcnRpbmcgZGF0YQoKbGlicmFyeShkcGx5cikKbGlicmFyeShnZ3Bsb3QyKQpvY2N1cGF0aW9uIDwtIHJlYWQuY3N2KCJvY2N1cGF0aW9uX2RhdGEuY3N2IikKCm9jY3VwYXRpb25fY29tcGxldGUgPC0gb2NjdXBhdGlvbltjb21wbGV0ZS5jYXNlcyhvY2N1cGF0aW9uKSxdCiNyZW1vdmluZyBhbGwgbnVsbCB2YWx1ZXMKCmNsZWFuZGF0YSA8LSBvY2N1cGF0aW9uW2NvbXBsZXRlLmNhc2VzKG9jY3VwYXRpb24pLF0KCmp1c3Rfb2ZmaWNlIDwtIGNsZWFuZGF0YSAlPiUgZmlsdGVyKG9jY3VwYXRpb25fY2F0ZWdvcnkgPT0gIk9GRklDRSIpCgoKdGFibGUob2NjdXBhdGlvbl9jb21wbGV0ZSRvY2N1cGF0aW9uX2NhdGVnb3J5KQojdXNpbmcgYSBmcmVxdWVuY3kgdGFibGUgdG8gYXNzZXNzIHRoZSBudW1iZXIgb2YgY2F0ZWdvcmllcyAKYGBgCgpgYGB7cn0KaGlzdChvY2N1cGF0aW9uX2NvbXBsZXRlJEFsbF93ZWVrbHksIGJyZWFrcyA9IDQwLCBjb2wgPSAibGlnaHRwaW5rIiwgbWFpbiA9ICJEaXN0cmlidXRpb24gb2YgRWFybmluZ3MgLSBBbGwgQ2F0ZWdvcmllcyIsCiAgICAgeGxhYiA9ICJXZWVrbHkgRWFybmluZ3MiLCB5bGFiID0gIkZyZXF1ZW5jeSIpCmBgYApgYGB7cn0KI0hJU1RPR1JBTS0tdmlzdWFsaXplcyB0aGUgZGlzdHJpYnV0aW9uIG9mIGVhcm5pbmdzIHdpdGhpbiB0aGUgY2F0ZWdvcnkgZm9yIGJvdGggZ2VuZGVycwoKbWFuYWdlbWVudCA8LSBvY2N1cGF0aW9uX2NvbXBsZXRlICU+JSBmaWx0ZXIob2NjdXBhdGlvbl9jYXRlZ29yeSAlaW4lICgiTUFOQUdFTUVOVCIpKSAjZmlsdGVyaW5nIG1hbmFnZW1lbnQgb2NjdXBhdGlvbnMgb25seQptYW5hZ2VtZW50X2Vhcm5pbmdzIDwtIG1hbmFnZW1lbnQkQWxsX3dlZWtseSAjc2F2aW5nIHRoZSB2YWx1ZXMgaW4gYSB2ZWN0b3IgYXMgcmVmZXJlbmNlCgpoaXN0KG1hbmFnZW1lbnQkQWxsX3dlZWtseSwgYnJlYWtzID0gMTAsIGNvbCA9ICJsaWdodHllbGxvdyIsIG1haW4gPSAiRGlzdHJpYnV0aW9uIG9mIEVhcm5pbmdzIC0gTWFuYWdlbWVudCIsCiAgICAgeGxhYiA9ICJXZWVrbHkgRWFybmluZ3MiLCB5bGFiID0gIkZyZXF1ZW5jeSIpCgojdGhlIGRhdGEgcG9pbnRzIGFsbCBhcHBlYXIgdG8gZmFsbCBpbnRvIHRoZSBzYW1lIHJhbmdlIG9mIGZyZXF1ZW5jeSEKCmBgYAoKYGBge3J9CiNCYXIgUGxvdC0taW52ZXN0aWdhdGVzIHRoZSBnZW5kZXIgYmFsYW5jZSBhY3Jvc3MgZGlmZmVyZW50IG9jY3VwYXRpb25zIHdpdGhpbiB0aGUgY2F0ZWdvcnkKCiNzdW0gdGhlIGNvdW50cyBvZiBmZW1hbGVzICYgbWFsZXMKZmVtYWxlX2NvdW50IDwtIHN1bShtYW5hZ2VtZW50JEZfd29ya2VycykgCm1hbGVfY291bnQgPC0gc3VtKG1hbmFnZW1lbnQkTV93b3JrZXJzKQoKI2NyZWF0ZSBhIGRhdGEgZnJhbWUgd2l0aCBnZW5kZXIgY291bnRzCmdlbmRlcl9kYXRhIDwtIGRhdGEuZnJhbWUoR2VuZGVyID0gYygiRmVtYWxlIiwgIk1hbGUiKSwKICAgICAgICAgICAgICAgICAgICAgICAgICBDb3VudCA9IGMoZmVtYWxlX2NvdW50LCBtYWxlX2NvdW50KSkKCmdncGxvdChkYXRhID0gZ2VuZGVyX2RhdGEsIGFlcyh4ID0gR2VuZGVyLCB5ID0gQ291bnQsIGZpbGwgPSBHZW5kZXIpKSArCiAgZ2VvbV9iYXIoc3RhdCA9ICJpZGVudGl0eSIsIHdpZHRoID0gMC42KSArCiAgbGFicyh0aXRsZSA9ICJHZW5kZXIgQmFsYW5jZSBpbiBNYW5hZ2VtZW50IiwKICAgICAgIHggPSAiR2VuZGVyIiwgeSA9ICJDb3VudCIpICsKICBzY2FsZV9maWxsX21hbnVhbCh2YWx1ZXMgPSBjKCJwaW5rIiwgInNreWJsdWUiKSkgKwogIHRoZW1lX21pbmltYWwoKQpgYGAK