setwd("C:/Users/Abigail/OneDrive - Binghamton University/Dida")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
jobdata <- read.csv("occupation_gender_race.csv")
jobdata
## job_type description year
## 1 total Total, 16 years and over 2020
## 2 professional Professional and related occupations 2020
## 3 computer_all Computer and mathematical occupations 2020
## 4 computer Computer and information research scientists 2020
## 5 computer Computer systems analysts 2020
## 6 computer Information security analysts 2020
## 7 computer Computer programmers 2020
## 8 computer Software developers 2020
## 9 computer Software quality assurance analysts and testers 2020
## 10 computer Web developers 2020
## 11 computer Web and digital interface designers 2020
## 12 computer Computer support specialists 2020
## 13 computer Database administrators and architects 2020
## 14 computer Network and computer systems administrators 2020
## 15 computer Computer network architects 2020
## 16 computer Computer occupations, all other 2020
## 17 computer Actuaries 2020
## 18 computer Mathematicians 2020
## 19 computer Operations research analysts 2020
## 20 computer Statisticians 2020
## 21 computer Other mathematical science occupations 2020
## 22 total Total, 16 years and over 2015
## 23 professional Professional and related occupations 2015
## 24 computer_all Computer and mathematical occupations 2015
## 25 computer Computer and information research scientists 2015
## 26 computer Computer systems analysts 2015
## 27 computer Information security analysts 2015
## 28 computer Computer programmers 2015
## 29 computer Software developers, applications and systems software 2015
## 30 computer Web developers 2015
## 31 computer Computer support specialists 2015
## 32 computer Database administrators 2015
## 33 computer Network and computer systems administrators 2015
## 34 computer Computer network architects 2015
## 35 computer Computer occupations, all other 2015
## 36 computer Actuaries 2015
## 37 computer Mathematicians 2015
## 38 computer Operations research analysts 2015
## 39 computer Statisticians 2015
## 40 computer Miscellaneous mathematical science occupations 2015
## 41 computer Computer hardware engineers 2015
## 42 computer Other mathematical science occupations 2015
## 43 total Total, 16 years and over 2010
## 44 professional Professional and related occupations 2010
## 45 computer_all Computer and mathematical occupations 2010
## 46 computer Computer scientists and systems analysts 2010
## 47 computer Computer programmers 2010
## 48 computer Computer software engineers 2010
## 49 computer Computer support specialists 2010
## 50 computer Database administrators 2010
## 51 computer Network and computer systems administrators 2010
## 52 computer Network systems and data communications analysts 2010
## 53 computer Actuaries 2010
## 54 computer Mathematicians 2010
## 55 computer Operations research analysts 2010
## 56 computer Statisticians 2010
## 57 computer Miscellaneous mathematical science occupations 2010
## 58 computer Computer hardware engineers 2010
## 59 total Total, 16 years and over 2005
## 60 professional Professional and related occupations 2005
## 61 computer_all Computer and mathematical occupations 2005
## 62 computer Computer scientists and systems analysts 2005
## 63 computer Computer programmers 2005
## 64 computer Computer software engineers 2005
## 65 computer Computer support specialists 2005
## 66 computer Database administrators 2005
## 67 computer Network and computer systems administrators 2005
## 68 computer Network systems and data communications analysts 2005
## 69 computer Operations research analysts 2005
## 70 computer Computer hardware engineers 2005
## All Women Black Asian Hispanic.Latino
## 1 147795 46.8 12.1 6.4 17.6
## 2 36502 57.0 10.5 10.1 10.1
## 3 5603 25.2 9.1 23.0 8.4
## 4 42 NA NA NA NA
## 5 594 35.6 9.7 18.7 8.1
## 6 137 11.4 11.9 6.9 15.8
## 7 417 21.1 6.3 28.3 6.6
## 8 1883 19.4 6.2 34.1 5.9
## 9 82 25.1 12.0 29.6 9.2
## 10 104 27.8 3.7 16.2 5.9
## 11 70 44.8 5.9 9.9 15.8
## 12 660 25.9 13.0 10.7 11.6
## 13 121 28.8 5.9 30.1 5.6
## 14 238 19.8 9.0 11.8 7.8
## 15 107 9.3 14.8 15.5 16.1
## 16 792 28.0 13.3 15.8 11.2
## 17 32 NA NA NA NA
## 18 3 NA NA NA NA
## 19 156 42.9 13.6 8.6 9.5
## 20 61 50.3 7.9 28.0 2.0
## 21 103 37.8 9.3 25.7 6.2
## 22 148834 46.8 11.7 5.8 16.4
## 23 33852 57.2 9.8 8.7 8.8
## 24 4369 24.7 8.6 19.9 6.8
## 25 24 NA NA NA NA
## 26 552 34.2 9.6 19.6 6.9
## 27 70 19.7 3.0 3.4 5.2
## 28 480 21.0 7.0 18.9 6.9
## 29 1353 17.9 5.0 33.8 5.4
## 30 204 34.3 9.1 9.6 6.2
## 31 475 26.4 12.5 9.6 8.4
## 32 93 38.0 6.2 9.6 2.1
## 33 218 15.9 11.5 10.4 9.4
## 34 114 12.1 8.9 16.5 6.5
## 35 547 24.3 11.8 12.0 9.5
## 36 21 NA NA NA NA
## 37 6 NA NA NA NA
## 38 123 50.7 15.3 10.2 9.3
## 39 86 52.9 12.4 11.4 3.7
## 40 4 NA NA NA NA
## 41 72 12.8 12.1 23.4 11.0
## 42 245 49.2 12.3 21.6 7.9
## 43 139064 47.2 10.8 4.8 14.3
## 44 30805 57.4 9.2 7.0 7.1
## 45 3531 25.8 6.7 16.1 5.5
## 46 784 30.5 7.3 14.9 5.1
## 47 470 22.0 5.1 12.4 6.5
## 48 1026 20.9 5.1 28.0 3.9
## 49 388 27.6 11.3 7.9 6.9
## 50 101 36.4 9.0 11.8 8.6
## 51 229 16.5 5.6 9.4 6.0
## 52 366 26.2 6.6 7.4 6.7
## 53 25 NA NA NA NA
## 54 5 NA NA NA NA
## 55 107 46.2 10.7 5.8 8.4
## 56 28 NA NA NA NA
## 57 3 NA NA NA NA
## 58 70 10.3 3.1 26.7 7.3
## 59 141730 46.4 10.8 4.4 13.1
## 60 28795 56.3 8.8 6.6 6.4
## 61 3246 27.0 6.9 14.7 5.3
## 62 745 30.3 8.3 11.4 4.8
## 63 581 26.0 4.6 18.0 5.7
## 64 832 21.9 5.0 24.6 3.9
## 65 334 33.2 9.1 5.5 9.7
## 66 89 32.3 13.1 6.3 5.1
## 67 200 18.4 4.9 10.4 4.8
## 68 322 24.6 9.3 9.4 6.6
## 69 86 50.5 10.1 6.2 2.3
## 70 81 10.8 8.4 23.1 5.5
summaryjobs <- jobdata %>%
filter(job_type %in% c("total", "computer_all", "professional", "computer")) %>%
group_by(job_type) %>%
summarise(total_workers = sum(All),
Average.Women = mean(Women, na.rm = T),
Average.Black = mean(Black, na.rm = T),
Average.Asian = mean(Asian, na.rm = T),
Average.Hispaniclatino = mean(Hispanic.Latino, na.rm = T),
total_percent_minorities = (Average.Women + Average.Black + Average.Asian + Average.Hispaniclatino))
summaryjobs
## # A tibble: 4 × 7
## job_type total_workers Average.Women Average.Black Average.Asian
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 computer 17161 28.1 8.85 15.7
## 2 computer_all 16749 25.7 7.82 18.4
## 3 professional 129954 57.0 9.57 8.1
## 4 total 577423 46.8 11.4 5.35
## # ℹ 2 more variables: Average.Hispaniclatino <dbl>,
## # total_percent_minorities <dbl>
library(NineteenEightyR)
##
## ^^ @@@@@@@@@
## ^^ ^^ @@@@@@@@@@@@@@@
## @@@@@@@@@@@@@@@@@@ ^^
## @@@@@@@@@@@@@@@@@@@@
## ~~~~ ~~ ~~~~~ ~~~~~~~~ ~~ &&&&&&&&&&&&&&&&&&&& ~~~~~~~ ~~~~~~~~~~~ ~~~
## ~ ~~ ~ ~ ~~~~~~~~~~~~~~~~~~~~ ~ ~~ ~~ ~
## ~ ~~ ~~ ~~ ~~ ~~~~~~~~~~~~~ ~~~~ ~ ~~~ ~ ~~~ ~ ~~
## ~ ~~ ~ ~ ~~~~~~ ~~ ~~~ ~~ ~ ~~ ~~ ~
## ~ ~ ~ ~ ~ ~~ ~~~~~~ ~ ~~ ~ ~~
## ~ ~ ~ ~ ~~ ~ ~
##
palette <- sonny(n = 4)
job_computer_all <- summaryjobs %>% filter(job_type == "computer_all") %>%
select(c(job_type, Average.Women, Average.Asian, Average.Black, Average.Hispaniclatino)) %>%
pivot_longer(-job_type, names_to = "Minorities", values_to = "Percentage")
ggplot(job_computer_all, aes(x = Minorities, y = Percentage, group = Minorities, fill = Minorities))+
geom_bar(stat = "Identity")+
labs(title = "Average Number of Minorities in All Computer Jobs (2020)",
color = "Minorities")+
scale_fill_manual(values = palette)
percentyear <- jobdata %>%
mutate(Total.Percent.Minorities = (Women + Black + Asian + Hispanic.Latino)) %>%
group_by(year) %>%
summarise(Average.Percent.Minorities = mean(Total.Percent.Minorities, na.rm = T))
ggplot(percentyear, aes(x = year, y = Average.Percent.Minorities, group = year, fill = year))+
geom_bar(stat = "Identity")+
labs(x = "Year", y = "Average Percent of Minorities",
title = "Average Percent of Total Minorites Each Year")
groupedyears <- jobdata %>%
group_by(year) %>%
summarise(Average.Women = mean(Women, na.rm = T),
Average.Black = mean(Black, na.rm = T),
Average.Asian = mean(Asian, na.rm = T),
Average.Hispaniclatino = mean(Hispanic.Latino, na.rm = T)) %>%
pivot_longer(-year, names_to = "Minorities", values_to = "Percentage")
ggplot(groupedyears, aes(x = year, y = Percentage, group = Minorities, color = Minorities))+
geom_line(size = 1.5) +
labs(y = "Minority Percentage", x = "Year",
title = "Percent of Each Minority Over the Years",
color = "Minority")+
scale_color_manual(values = palette)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
From this analysis, you can see in the table a summary of the four types of job categories summarized into the average number of minority employees out of the total number of employees, which will later be used to create graphs. In the table, you can easily compare which minority has the highest percentage for each job type. You can also which job type has the largest amount of minorities, which is an easy way to see inclusion within different sub fields.
In conclusion, you can see in the table, that women are the highest minority employed in these types of jobs, and specifically in computer jobs in graph 1. In graph 2, you can see that as time goes on, there has been more minorities employed. Finally, in graph 3 you can tell that there has been the most increase in Asian employment, while women, black, and Hispanic latino have remained relatively the same with little to no increase.