setwd("C:/Users/Abigail/OneDrive - Binghamton University/Dida")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
jobdata <- read.csv("occupation_gender_race.csv")
jobdata
##        job_type                                            description year
## 1         total                               Total, 16 years and over 2020
## 2  professional                   Professional and related occupations 2020
## 3  computer_all                  Computer and mathematical occupations 2020
## 4      computer           Computer and information research scientists 2020
## 5      computer                              Computer systems analysts 2020
## 6      computer                          Information security analysts 2020
## 7      computer                                   Computer programmers 2020
## 8      computer                                    Software developers 2020
## 9      computer        Software quality assurance analysts and testers 2020
## 10     computer                                         Web developers 2020
## 11     computer                    Web and digital interface designers 2020
## 12     computer                           Computer support specialists 2020
## 13     computer                 Database administrators and architects 2020
## 14     computer            Network and computer systems administrators 2020
## 15     computer                            Computer network architects 2020
## 16     computer                        Computer occupations, all other 2020
## 17     computer                                              Actuaries 2020
## 18     computer                                         Mathematicians 2020
## 19     computer                           Operations research analysts 2020
## 20     computer                                          Statisticians 2020
## 21     computer                 Other mathematical science occupations 2020
## 22        total                               Total, 16 years and over 2015
## 23 professional                   Professional and related occupations 2015
## 24 computer_all                  Computer and mathematical occupations 2015
## 25     computer           Computer and information research scientists 2015
## 26     computer                              Computer systems analysts 2015
## 27     computer                          Information security analysts 2015
## 28     computer                                   Computer programmers 2015
## 29     computer Software developers, applications and systems software 2015
## 30     computer                                         Web developers 2015
## 31     computer                           Computer support specialists 2015
## 32     computer                                Database administrators 2015
## 33     computer            Network and computer systems administrators 2015
## 34     computer                            Computer network architects 2015
## 35     computer                        Computer occupations, all other 2015
## 36     computer                                              Actuaries 2015
## 37     computer                                         Mathematicians 2015
## 38     computer                           Operations research analysts 2015
## 39     computer                                          Statisticians 2015
## 40     computer         Miscellaneous mathematical science occupations 2015
## 41     computer                            Computer hardware engineers 2015
## 42     computer                 Other mathematical science occupations 2015
## 43        total                               Total, 16 years and over 2010
## 44 professional                   Professional and related occupations 2010
## 45 computer_all                  Computer and mathematical occupations 2010
## 46     computer               Computer scientists and systems analysts 2010
## 47     computer                                   Computer programmers 2010
## 48     computer                            Computer software engineers 2010
## 49     computer                           Computer support specialists 2010
## 50     computer                                Database administrators 2010
## 51     computer            Network and computer systems administrators 2010
## 52     computer       Network systems and data communications analysts 2010
## 53     computer                                              Actuaries 2010
## 54     computer                                         Mathematicians 2010
## 55     computer                           Operations research analysts 2010
## 56     computer                                          Statisticians 2010
## 57     computer         Miscellaneous mathematical science occupations 2010
## 58     computer                            Computer hardware engineers 2010
## 59        total                               Total, 16 years and over 2005
## 60 professional                  Professional and related occupations  2005
## 61 computer_all                  Computer and mathematical occupations 2005
## 62     computer               Computer scientists and systems analysts 2005
## 63     computer                                   Computer programmers 2005
## 64     computer                            Computer software engineers 2005
## 65     computer                          Computer support specialists  2005
## 66     computer                                Database administrators 2005
## 67     computer            Network and computer systems administrators 2005
## 68     computer       Network systems and data communications analysts 2005
## 69     computer                          Operations research analysts  2005
## 70     computer                          Computer hardware engineers   2005
##       All Women Black Asian Hispanic.Latino
## 1  147795  46.8  12.1   6.4            17.6
## 2   36502  57.0  10.5  10.1            10.1
## 3    5603  25.2   9.1  23.0             8.4
## 4      42    NA    NA    NA              NA
## 5     594  35.6   9.7  18.7             8.1
## 6     137  11.4  11.9   6.9            15.8
## 7     417  21.1   6.3  28.3             6.6
## 8    1883  19.4   6.2  34.1             5.9
## 9      82  25.1  12.0  29.6             9.2
## 10    104  27.8   3.7  16.2             5.9
## 11     70  44.8   5.9   9.9            15.8
## 12    660  25.9  13.0  10.7            11.6
## 13    121  28.8   5.9  30.1             5.6
## 14    238  19.8   9.0  11.8             7.8
## 15    107   9.3  14.8  15.5            16.1
## 16    792  28.0  13.3  15.8            11.2
## 17     32    NA    NA    NA              NA
## 18      3    NA    NA    NA              NA
## 19    156  42.9  13.6   8.6             9.5
## 20     61  50.3   7.9  28.0             2.0
## 21    103  37.8   9.3  25.7             6.2
## 22 148834  46.8  11.7   5.8            16.4
## 23  33852  57.2   9.8   8.7             8.8
## 24   4369  24.7   8.6  19.9             6.8
## 25     24    NA    NA    NA              NA
## 26    552  34.2   9.6  19.6             6.9
## 27     70  19.7   3.0   3.4             5.2
## 28    480  21.0   7.0  18.9             6.9
## 29   1353  17.9   5.0  33.8             5.4
## 30    204  34.3   9.1   9.6             6.2
## 31    475  26.4  12.5   9.6             8.4
## 32     93  38.0   6.2   9.6             2.1
## 33    218  15.9  11.5  10.4             9.4
## 34    114  12.1   8.9  16.5             6.5
## 35    547  24.3  11.8  12.0             9.5
## 36     21    NA    NA    NA              NA
## 37      6    NA    NA    NA              NA
## 38    123  50.7  15.3  10.2             9.3
## 39     86  52.9  12.4  11.4             3.7
## 40      4    NA    NA    NA              NA
## 41     72  12.8  12.1  23.4            11.0
## 42    245  49.2  12.3  21.6             7.9
## 43 139064  47.2  10.8   4.8            14.3
## 44  30805  57.4   9.2   7.0             7.1
## 45   3531  25.8   6.7  16.1             5.5
## 46    784  30.5   7.3  14.9             5.1
## 47    470  22.0   5.1  12.4             6.5
## 48   1026  20.9   5.1  28.0             3.9
## 49    388  27.6  11.3   7.9             6.9
## 50    101  36.4   9.0  11.8             8.6
## 51    229  16.5   5.6   9.4             6.0
## 52    366  26.2   6.6   7.4             6.7
## 53     25    NA    NA    NA              NA
## 54      5    NA    NA    NA              NA
## 55    107  46.2  10.7   5.8             8.4
## 56     28    NA    NA    NA              NA
## 57      3    NA    NA    NA              NA
## 58     70  10.3   3.1  26.7             7.3
## 59 141730  46.4  10.8   4.4            13.1
## 60  28795  56.3   8.8   6.6             6.4
## 61   3246  27.0   6.9  14.7             5.3
## 62    745  30.3   8.3  11.4             4.8
## 63    581  26.0   4.6  18.0             5.7
## 64    832  21.9   5.0  24.6             3.9
## 65    334  33.2   9.1   5.5             9.7
## 66     89  32.3  13.1   6.3             5.1
## 67    200  18.4   4.9  10.4             4.8
## 68    322  24.6   9.3   9.4             6.6
## 69     86  50.5  10.1   6.2             2.3
## 70     81  10.8   8.4  23.1             5.5
summaryjobs <- jobdata %>%
  filter(job_type %in% c("total", "computer_all", "professional", "computer")) %>%
  group_by(job_type) %>%
  summarise(total_workers = sum(All),
            Average.Women = mean(Women, na.rm = T),
            Average.Black = mean(Black, na.rm = T), 
            Average.Asian = mean(Asian, na.rm = T), 
            Average.Hispaniclatino = mean(Hispanic.Latino, na.rm = T), 
            total_percent_minorities = (Average.Women + Average.Black + Average.Asian + Average.Hispaniclatino))
summaryjobs
## # A tibble: 4 × 7
##   job_type     total_workers Average.Women Average.Black Average.Asian
##   <chr>                <dbl>         <dbl>         <dbl>         <dbl>
## 1 computer             17161          28.1          8.85         15.7 
## 2 computer_all         16749          25.7          7.82         18.4 
## 3 professional        129954          57.0          9.57          8.1 
## 4 total               577423          46.8         11.4           5.35
## # ℹ 2 more variables: Average.Hispaniclatino <dbl>,
## #   total_percent_minorities <dbl>
library(NineteenEightyR)
## 
##            ^^                   @@@@@@@@@
##       ^^       ^^            @@@@@@@@@@@@@@@
##                            @@@@@@@@@@@@@@@@@@              ^^
##                           @@@@@@@@@@@@@@@@@@@@
## ~~~~ ~~ ~~~~~ ~~~~~~~~ ~~ &&&&&&&&&&&&&&&&&&&& ~~~~~~~ ~~~~~~~~~~~ ~~~
## ~         ~~   ~  ~       ~~~~~~~~~~~~~~~~~~~~ ~       ~~     ~~ ~
##   ~      ~~      ~~ ~~ ~~  ~~~~~~~~~~~~~ ~~~~  ~     ~~~    ~ ~~~  ~ ~~
##   ~  ~~     ~         ~      ~~~~~~  ~~ ~~~       ~~ ~ ~~  ~~ ~
## ~  ~       ~ ~      ~           ~~ ~~~~~~  ~      ~~  ~             ~~
##       ~             ~        ~      ~      ~~   ~             ~
## 
palette <- sonny(n = 4)

job_computer_all <- summaryjobs %>% filter(job_type == "computer_all") %>%
  select(c(job_type, Average.Women, Average.Asian, Average.Black, Average.Hispaniclatino)) %>%
  pivot_longer(-job_type, names_to = "Minorities", values_to = "Percentage")

ggplot(job_computer_all, aes(x = Minorities, y = Percentage, group = Minorities, fill = Minorities))+
  geom_bar(stat = "Identity")+
  labs(title = "Average Number of Minorities in All Computer Jobs (2020)",
       color = "Minorities")+
  scale_fill_manual(values = palette)

percentyear <- jobdata %>%
  mutate(Total.Percent.Minorities = (Women + Black + Asian + Hispanic.Latino)) %>%
  group_by(year) %>%
  summarise(Average.Percent.Minorities = mean(Total.Percent.Minorities, na.rm = T))

ggplot(percentyear, aes(x = year, y = Average.Percent.Minorities, group = year, fill = year))+
  geom_bar(stat = "Identity")+
  labs(x = "Year", y = "Average Percent of Minorities",
       title = "Average Percent of Total Minorites Each Year")

groupedyears <- jobdata %>%
  group_by(year) %>%
  summarise(Average.Women = mean(Women, na.rm = T),
            Average.Black = mean(Black, na.rm = T), 
            Average.Asian = mean(Asian, na.rm = T), 
            Average.Hispaniclatino = mean(Hispanic.Latino, na.rm = T)) %>%
  pivot_longer(-year, names_to = "Minorities", values_to = "Percentage")

ggplot(groupedyears, aes(x = year, y = Percentage, group = Minorities, color = Minorities))+
  geom_line(size = 1.5) +
  labs(y = "Minority Percentage", x = "Year", 
       title = "Percent of Each Minority Over the Years",
       color = "Minority")+
  scale_color_manual(values = palette)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

From this analysis, you can see in the table a summary of the four types of job categories summarized into the average number of minority employees out of the total number of employees, which will later be used to create graphs. In the table, you can easily compare which minority has the highest percentage for each job type. You can also which job type has the largest amount of minorities, which is an easy way to see inclusion within different sub fields.

In conclusion, you can see in the table, that women are the highest minority employed in these types of jobs, and specifically in computer jobs in graph 1. In graph 2, you can see that as time goes on, there has been more minorities employed. Finally, in graph 3 you can tell that there has been the most increase in Asian employment, while women, black, and Hispanic latino have remained relatively the same with little to no increase.