Patel_Midterm

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(tidyr)
library(ggplot2)

employment <- read.csv("occupation_gender_race.csv", stringsAsFactors = F, fileEncoding="UTF-8-BOM")

sumstats <- employment %>%
  group_by(job_type) %>%
  summarise(mean_all = mean(All, na.rm = T), 
            mean_women = mean(Women, na.rm = T),
            mean_black = mean(Black, na.rm = T), 
            mean_asian = mean(Asian, na.rm = T), 
            mean_hispanic = mean(Hispanic.Latino, na.rm = T)) %>%
  mutate(minoritypercent = (mean_women+mean_black+mean_asian+mean_hispanic/mean_all))

sumstats

## # A tibble: 4 × 7
##   job_type     mean_all mean_women mean_black mean_asian mean_hispanic
##   <chr>           <dbl>      <dbl>      <dbl>      <dbl>         <dbl>
## 1 computer         296.       28.1       8.85      15.7           7.31
## 2 computer_all    4187.       25.7       7.82      18.4           6.5 
## 3 professional   32488.       57.0       9.57       8.1           8.1 
## 4 total         144356.       46.8      11.4        5.35         15.4 
## # ℹ 1 more variable: minoritypercent <dbl>

ggplot(employment) +
  geom_point(aes(y = job_type, x = year, color = Women)) +
  labs(y = "Job Type", x = "Year", 
       title = "Women Employed in Each Field",
       color = "People")

wide_data <- employment %>%
  select(c(year, Asian, Black, Hispanic.Latino))
colnames(wide_data) <- c("Year", "Asian", "Black", "Hispanic")
wide_data

##    Year Asian Black Hispanic
## 1  2020   6.4  12.1     17.6
## 2  2020  10.1  10.5     10.1
## 3  2020  23.0   9.1      8.4
## 4  2020    NA    NA       NA
## 5  2020  18.7   9.7      8.1
## 6  2020   6.9  11.9     15.8
## 7  2020  28.3   6.3      6.6
## 8  2020  34.1   6.2      5.9
## 9  2020  29.6  12.0      9.2
## 10 2020  16.2   3.7      5.9
## 11 2020   9.9   5.9     15.8
## 12 2020  10.7  13.0     11.6
## 13 2020  30.1   5.9      5.6
## 14 2020  11.8   9.0      7.8
## 15 2020  15.5  14.8     16.1
## 16 2020  15.8  13.3     11.2
## 17 2020    NA    NA       NA
## 18 2020    NA    NA       NA
## 19 2020   8.6  13.6      9.5
## 20 2020  28.0   7.9      2.0
## 21 2020  25.7   9.3      6.2
## 22 2015   5.8  11.7     16.4
## 23 2015   8.7   9.8      8.8
## 24 2015  19.9   8.6      6.8
## 25 2015    NA    NA       NA
## 26 2015  19.6   9.6      6.9
## 27 2015   3.4   3.0      5.2
## 28 2015  18.9   7.0      6.9
## 29 2015  33.8   5.0      5.4
## 30 2015   9.6   9.1      6.2
## 31 2015   9.6  12.5      8.4
## 32 2015   9.6   6.2      2.1
## 33 2015  10.4  11.5      9.4
## 34 2015  16.5   8.9      6.5
## 35 2015  12.0  11.8      9.5
## 36 2015    NA    NA       NA
## 37 2015    NA    NA       NA
## 38 2015  10.2  15.3      9.3
## 39 2015  11.4  12.4      3.7
## 40 2015    NA    NA       NA
## 41 2015  23.4  12.1     11.0
## 42 2015  21.6  12.3      7.9
## 43 2010   4.8  10.8     14.3
## 44 2010   7.0   9.2      7.1
## 45 2010  16.1   6.7      5.5
## 46 2010  14.9   7.3      5.1
## 47 2010  12.4   5.1      6.5
## 48 2010  28.0   5.1      3.9
## 49 2010   7.9  11.3      6.9
## 50 2010  11.8   9.0      8.6
## 51 2010   9.4   5.6      6.0
## 52 2010   7.4   6.6      6.7
## 53 2010    NA    NA       NA
## 54 2010    NA    NA       NA
## 55 2010   5.8  10.7      8.4
## 56 2010    NA    NA       NA
## 57 2010    NA    NA       NA
## 58 2010  26.7   3.1      7.3
## 59 2005   4.4  10.8     13.1
## 60 2005   6.6   8.8      6.4
## 61 2005  14.7   6.9      5.3
## 62 2005  11.4   8.3      4.8
## 63 2005  18.0   4.6      5.7
## 64 2005  24.6   5.0      3.9
## 65 2005   5.5   9.1      9.7
## 66 2005   6.3  13.1      5.1
## 67 2005  10.4   4.9      4.8
## 68 2005   9.4   9.3      6.6
## 69 2005   6.2  10.1      2.3
## 70 2005  23.1   8.4      5.5

try1 <- wide_data %>%
  group_by(Year)%>%
  summarise(AsianMean = mean(Asian, na.rm = T),
            BlackMean = mean(Black, na.rm = T),
            HispanicMean = mean(Hispanic, na.rm = T))
try1

## # A tibble: 4 × 4
##    Year AsianMean BlackMean HispanicMean
##   <int>     <dbl>     <dbl>        <dbl>
## 1  2005      11.7      8.28         6.1 
## 2  2010      12.7      7.54         7.19
## 3  2015      14.4      9.81         7.67
## 4  2020      18.3      9.68         9.63

long_data <- try1 %>% 
  pivot_longer(-Year, names_to = "Race", values_to = "Percentage") %>%
  filter(Year == 2020)
long_data

## # A tibble: 3 × 3
##    Year Race         Percentage
##   <int> <chr>             <dbl>
## 1  2020 AsianMean         18.3 
## 2  2020 BlackMean          9.68
## 3  2020 HispanicMean       9.63

year1 <- wide_data %>% filter(Year == 2020)%>%
  summarise(MeanAsian = mean(Asian, na.rm = T),
            MeanBlack = mean(Black, na.rm = T),
            MeanHispanic = mean(Hispanic, na.rm = T))
year1

##   MeanAsian MeanBlack MeanHispanic
## 1      18.3  9.677778     9.633333

ggplot(long_data, aes(x = Race, y = Percentage, group = Race, fill = Race)) +
  geom_bar(stat="Identity") +
  labs(y = "Percentage Employed", x = "Minority", 
       title = "Percentage of Minorities Employed",
       color = "red")

The data describes the minorities within the four job types computer, total, professional, and computer all. It gives a detail on all the employed people in the jobs and then details into focusing on the women, Asians, Blacks, and Hispanics within the job. From the data, I can see if there has been an increase in the minorities throughout the years or if it has remained the same, potentially even decreased. In my first graph, you can see the changing of the plots throughout the years for women in each job type. For example, women have densely populated the computer field for 2005 and 2010 but then in 2015 the amount of women employed decreased. Then in 2020, they increased once again. The other jobs types have seemed to remain relatively consistent with their employment of women in each field. For my second graph, you can see the employment of minorities by race in 2020 with all the fields. Asian people have a much larger percentage in the work field compared to Black and Hispanic-Latino. This can tell us about the bias with Asian people and how people can believe them to be “smarter” compared to Hispanic-Latino and Black people. This helps data helps give an insight to the employment of minorities of both gender in race in the workforce.

Patel_Midterm

Diya Patel

2023-10-02