library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
employment <- read.csv("occupation_gender_race.csv", stringsAsFactors = F, fileEncoding="UTF-8-BOM")
sumstats <- employment %>%
group_by(job_type) %>%
summarise(mean_all = mean(All, na.rm = T),
mean_women = mean(Women, na.rm = T),
mean_black = mean(Black, na.rm = T),
mean_asian = mean(Asian, na.rm = T),
mean_hispanic = mean(Hispanic.Latino, na.rm = T)) %>%
mutate(minoritypercent = (mean_women+mean_black+mean_asian+mean_hispanic/mean_all))
sumstats
## # A tibble: 4 × 7
## job_type mean_all mean_women mean_black mean_asian mean_hispanic
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 computer 296. 28.1 8.85 15.7 7.31
## 2 computer_all 4187. 25.7 7.82 18.4 6.5
## 3 professional 32488. 57.0 9.57 8.1 8.1
## 4 total 144356. 46.8 11.4 5.35 15.4
## # ℹ 1 more variable: minoritypercent <dbl>
ggplot(employment) +
geom_point(aes(y = job_type, x = year, color = Women)) +
labs(y = "Job Type", x = "Year",
title = "Women Employed in Each Field",
color = "People")
wide_data <- employment %>%
select(c(year, Asian, Black, Hispanic.Latino))
colnames(wide_data) <- c("Year", "Asian", "Black", "Hispanic")
wide_data
## Year Asian Black Hispanic
## 1 2020 6.4 12.1 17.6
## 2 2020 10.1 10.5 10.1
## 3 2020 23.0 9.1 8.4
## 4 2020 NA NA NA
## 5 2020 18.7 9.7 8.1
## 6 2020 6.9 11.9 15.8
## 7 2020 28.3 6.3 6.6
## 8 2020 34.1 6.2 5.9
## 9 2020 29.6 12.0 9.2
## 10 2020 16.2 3.7 5.9
## 11 2020 9.9 5.9 15.8
## 12 2020 10.7 13.0 11.6
## 13 2020 30.1 5.9 5.6
## 14 2020 11.8 9.0 7.8
## 15 2020 15.5 14.8 16.1
## 16 2020 15.8 13.3 11.2
## 17 2020 NA NA NA
## 18 2020 NA NA NA
## 19 2020 8.6 13.6 9.5
## 20 2020 28.0 7.9 2.0
## 21 2020 25.7 9.3 6.2
## 22 2015 5.8 11.7 16.4
## 23 2015 8.7 9.8 8.8
## 24 2015 19.9 8.6 6.8
## 25 2015 NA NA NA
## 26 2015 19.6 9.6 6.9
## 27 2015 3.4 3.0 5.2
## 28 2015 18.9 7.0 6.9
## 29 2015 33.8 5.0 5.4
## 30 2015 9.6 9.1 6.2
## 31 2015 9.6 12.5 8.4
## 32 2015 9.6 6.2 2.1
## 33 2015 10.4 11.5 9.4
## 34 2015 16.5 8.9 6.5
## 35 2015 12.0 11.8 9.5
## 36 2015 NA NA NA
## 37 2015 NA NA NA
## 38 2015 10.2 15.3 9.3
## 39 2015 11.4 12.4 3.7
## 40 2015 NA NA NA
## 41 2015 23.4 12.1 11.0
## 42 2015 21.6 12.3 7.9
## 43 2010 4.8 10.8 14.3
## 44 2010 7.0 9.2 7.1
## 45 2010 16.1 6.7 5.5
## 46 2010 14.9 7.3 5.1
## 47 2010 12.4 5.1 6.5
## 48 2010 28.0 5.1 3.9
## 49 2010 7.9 11.3 6.9
## 50 2010 11.8 9.0 8.6
## 51 2010 9.4 5.6 6.0
## 52 2010 7.4 6.6 6.7
## 53 2010 NA NA NA
## 54 2010 NA NA NA
## 55 2010 5.8 10.7 8.4
## 56 2010 NA NA NA
## 57 2010 NA NA NA
## 58 2010 26.7 3.1 7.3
## 59 2005 4.4 10.8 13.1
## 60 2005 6.6 8.8 6.4
## 61 2005 14.7 6.9 5.3
## 62 2005 11.4 8.3 4.8
## 63 2005 18.0 4.6 5.7
## 64 2005 24.6 5.0 3.9
## 65 2005 5.5 9.1 9.7
## 66 2005 6.3 13.1 5.1
## 67 2005 10.4 4.9 4.8
## 68 2005 9.4 9.3 6.6
## 69 2005 6.2 10.1 2.3
## 70 2005 23.1 8.4 5.5
try1 <- wide_data %>%
group_by(Year)%>%
summarise(AsianMean = mean(Asian, na.rm = T),
BlackMean = mean(Black, na.rm = T),
HispanicMean = mean(Hispanic, na.rm = T))
try1
## # A tibble: 4 × 4
## Year AsianMean BlackMean HispanicMean
## <int> <dbl> <dbl> <dbl>
## 1 2005 11.7 8.28 6.1
## 2 2010 12.7 7.54 7.19
## 3 2015 14.4 9.81 7.67
## 4 2020 18.3 9.68 9.63
long_data <- try1 %>%
pivot_longer(-Year, names_to = "Race", values_to = "Percentage") %>%
filter(Year == 2020)
long_data
## # A tibble: 3 × 3
## Year Race Percentage
## <int> <chr> <dbl>
## 1 2020 AsianMean 18.3
## 2 2020 BlackMean 9.68
## 3 2020 HispanicMean 9.63
year1 <- wide_data %>% filter(Year == 2020)%>%
summarise(MeanAsian = mean(Asian, na.rm = T),
MeanBlack = mean(Black, na.rm = T),
MeanHispanic = mean(Hispanic, na.rm = T))
year1
## MeanAsian MeanBlack MeanHispanic
## 1 18.3 9.677778 9.633333
ggplot(long_data, aes(x = Race, y = Percentage, group = Race, fill = Race)) +
geom_bar(stat="Identity") +
labs(y = "Percentage Employed", x = "Minority",
title = "Percentage of Minorities Employed",
color = "red")
The data describes the minorities within the four job types computer, total, professional, and computer all. It gives a detail on all the employed people in the jobs and then details into focusing on the women, Asians, Blacks, and Hispanics within the job. From the data, I can see if there has been an increase in the minorities throughout the years or if it has remained the same, potentially even decreased. In my first graph, you can see the changing of the plots throughout the years for women in each job type. For example, women have densely populated the computer field for 2005 and 2010 but then in 2015 the amount of women employed decreased. Then in 2020, they increased once again. The other jobs types have seemed to remain relatively consistent with their employment of women in each field. For my second graph, you can see the employment of minorities by race in 2020 with all the fields. Asian people have a much larger percentage in the work field compared to Black and Hispanic-Latino. This can tell us about the bias with Asian people and how people can believe them to be “smarter” compared to Hispanic-Latino and Black people. This helps data helps give an insight to the employment of minorities of both gender in race in the workforce.