library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.5 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.0.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(httr)
library(purrr)
library(stringr)
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
## The following object is masked from 'package:purrr':
##
## compact
library(ggplot2)
library(Rmisc)
## Loading required package: lattice
library(dat)
## To use dplyr as backend set 'options(dat.use.dplyr = TRUE)'.
##
## Attaching package: 'dat'
## The following object is masked from 'package:purrr':
##
## map
## The following object is masked from 'package:tidyr':
##
## extract
## The following object is masked from 'package:base':
##
## replace
pulse39 <-read.csv("C:\\Users\\Bryan\\Desktop\\US Census data\\pulse2021_puf_39.csv")
Null hypothesis: No association between sex at birth (IV) and sexual orientation (DV).
Alternative hypothesis: There is an association between sex at birth (IV) and sexual orientation (DV).
The key assumptions is that the data is normally distributed and is not biased.
pulse39$subgroup <-paste(pulse39 $EGENID_BIRTH,
pulse39$SEXUAL_ORIENTATION, sep = "")
pulse39 %>%
tabyl(subgroup)
## subgroup n percent
## 1-99 520 0.009112575
## 11 1121 0.019644610
## 12 20711 0.362943362
## 13 451 0.007903407
## 14 297 0.005204682
## 15 358 0.006273658
## 2-99 744 0.013037992
## 21 722 0.012652460
## 22 29768 0.521659891
## 23 1409 0.024691574
## 24 508 0.008902285
## 25 455 0.007973503
pulse39$subgroupcat <-car::Recode(pulse39$ subgroup,
recodes=" '11' = 'Born male and is gay'; '12' = 'Born male and is straight'; '21' = 'Born female and lesbian'; '22' = 'Born female and straight'; '23' = 'Born female and identifies as bisexual'; '13' = 'Born male and is bisexual'; else=NA",
as.factor=T)
pulse39 %>%
tabyl(subgroupcat)
## subgroupcat n percent valid_percent
## Born female and identifies as bisexual 1409 0.024691574 0.026004946
## Born female and lesbian 722 0.012652460 0.013325459
## Born female and straight 29768 0.521659891 0.549407552
## Born male and is bisexual 451 0.007903407 0.008323798
## Born male and is gay 1121 0.019644610 0.020689528
## Born male and is straight 20711 0.362943362 0.382248717
## <NA> 2882 0.050504696 NA
pulse39 %>%
tabyl(EGENID_BIRTH,subgroupcat,show_missing_levels=F,show_na = FALSE) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
| EGENID_BIRTH | Born female and identifies as bisexual | Born female and lesbian | Born female and straight | Born male and is bisexual | Born male and is gay | Born male and is straight |
|---|---|---|---|---|---|---|
| 1 | 0.00% (0) | 0.00% (0) | 0.00% (0) | 2.02% (451) | 5.03% (1121) | 92.95% (20711) |
| 2 | 4.42% (1409) | 2.26% (722) | 93.32% (29768) | 0.00% (0) | 0.00% (0) | 0.00% (0) |
The data is statistically significant at the point 00
library(vcd)
## Loading required package: grid
assocstats(table(pulse39$EGENID_BIRTH, pulse39$SEXUAL_ORIENTATION))
## X^2 df P(> X^2)
## Likelihood Ratio 532.02 5 0
## Pearson 523.27 5 0
##
## Phi-Coefficient : NA
## Contingency Coeff.: 0.095
## Cramer's V : 0.096
From the table we can see that the p value is less than the signifigance level of .05. Therefore, we can reject the null hypothesis because of the relationship between sexual orientation and gender at birth.
table(pulse39$EGENID_BIRTH, pulse39$SEXUAL_ORIENTATION)
##
## -99 1 2 3 4 5
## 1 520 1121 20711 451 297 358
## 2 744 722 29768 1409 508 455
pulse39 %>%
tabyl(EGENID_BIRTH,subgroupcat,show_missing_levels=F,show_na = FALSE) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
| EGENID_BIRTH | Born female and identifies as bisexual | Born female and lesbian | Born female and straight | Born male and is bisexual | Born male and is gay | Born male and is straight |
|---|---|---|---|---|---|---|
| 1 | 0.00% (0) | 0.00% (0) | 0.00% (0) | 2.02% (451) | 5.03% (1121) | 92.95% (20711) |
| 2 | 4.42% (1409) | 2.26% (722) | 93.32% (29768) | 0.00% (0) | 0.00% (0) | 0.00% (0) |
pulse39 %>%
tabyl(EGENID_BIRTH,subgroupcat) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits=2) %>%
adorn_ns() %>%
knitr::kable()
| EGENID_BIRTH | Born female and identifies as bisexual | Born female and lesbian | Born female and straight | Born male and is bisexual | Born male and is gay | Born male and is straight | NA_ |
|---|---|---|---|---|---|---|---|
| 1 | 0.00% (0) | 0.00% (0) | 0.00% (0) | 1.92% (451) | 4.78% (1121) | 88.29% (20711) | 5.01% (1175) |
| 2 | 4.19% (1409) | 2.15% (722) | 88.58% (29768) | 0.00% (0) | 0.00% (0) | 0.00% (0) | 5.08% (1707) |
The bisexual variable dropped when missing data was included, but it did not change the output enough to change the alter the intrepetation.
library(vcd)
mosaic(~ EGENID_BIRTH + SEXUAL_ORIENTATION,
direction = c("v", "h"),
data = pulse39,
shade = TRUE)
The number of males that are gay and women that identify as bisexual were larger than expected. According to the data there was a smaller number than expected of men who men who identify as bisexual and women who identify as lesbian. This is significant because of these relationships between gender at birth and sexual orientation.