###DS Labs Hw Assignment using Admissions Data Set from dslabs

# install.packages("dslabs")  # these are data science labs
library("dslabs")
## Warning: package 'dslabs' was built under R version 4.2.3
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("admissions")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.2.3
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.2.3
str(admissions)
## 'data.frame':    12 obs. of  4 variables:
##  $ major     : chr  "A" "B" "C" "D" ...
##  $ gender    : chr  "men" "men" "men" "men" ...
##  $ admitted  : num  62 63 37 33 28 6 82 68 34 35 ...
##  $ applicants: num  825 560 325 417 191 373 108 25 593 375 ...
# calculate the average rate for men and women
admissions_avg <- admissions %>%
  group_by(gender) %>%
  summarise(avg_rate = sum(admitted)/sum(applicants))
# plot the scatter plot
ggplot(admissions_avg, aes(x = gender, y = avg_rate, color = gender, label = sprintf("%.1f%%", avg_rate*100))) +
  geom_point(size = 3) +
  geom_text_repel() +
  theme_minimal() +
  xlab("Gender") +
  ylab("Average Admission Rate") +
  ggtitle("Average Admission Rate by Gender") +
  scale_color_manual(values = c("blue", "pink"), name="Gender") + 
  theme(plot.title = element_text(hjust = 0.5)) +
  # Remove legend title
  theme(legend.title=element_blank())

##The scatter plot shows the average admission rate by gender for graduate school admissions at UC Berkeley. The data shows that on average, women have a higher admission rate compared to men, with women’s admission rate around 5% higher than men’s admission rate. This suggests that there might be gender bias in the admissions process at UC Berkeley. However, it is important to note that this analysis only considers the average admission rate by gender and does not account for other potential factors that may contribute to differences in admission rates.