library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(rvest)
library(httr)
library(purrr)
library(stringr)
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggplot2)
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
## 
##     compact
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:purrr':
## 
##     some
## The following object is masked from 'package:dplyr':
## 
##     recode
pulse39 <-read.csv("C:\\Users\\Bryan\\Desktop\\US Census data\\pulse2021_puf_39.csv")
pulse39$subgroup <-paste(pulse39 $EGENID_BIRTH, pulse39$GENID_DESCRIBE, sep = "")
pulse39 %>%
tabyl(subgroup)
##  subgroup     n     percent
##      1-99   417 0.007307584
##        11 22652 0.396957802
##        12    66 0.001156596
##        13    60 0.001051451
##        14   263 0.004608860
##      2-99   544 0.009533156
##        21    73 0.001279265
##        22 32522 0.569921492
##        23    79 0.001384410
##        24   388 0.006799383
pulse39 <- transform(pulse39, age=2021-TBIRTH_YEAR)
pulse39$trans <-car::Recode(pulse39$ subgroup, recodes="'23' ='transman' ; '13' ='transwoman' ; else=NA", as.factor=T) 
pulse39%>%
tabyl(trans)
##       trans     n     percent valid_percent
##    transman    79 0.001384410     0.5683453
##  transwoman    60 0.001051451     0.4316547
##        <NA> 56925 0.997564139            NA
pulse39$cis <-car::Recode(pulse39$ subgroup, recodes="'11' = 'cisman';'22' ='ciswoman'; else=NA",as.factor=T)
pulse39%>%
tabyl(cis)
##       cis     n    percent valid_percent
##    cisman 22652 0.39695780     0.4105557
##  ciswoman 32522 0.56992149     0.5894443
##      <NA>  1890 0.03312071            NA
pulse39 %>%
  group_by(trans) %>%
  summarise_at(vars(age), list(name = mean))
## # A tibble: 3 x 2
##   trans       name
##   <fct>      <dbl>
## 1 transman    33.6
## 2 transwoman  44.0
## 3 <NA>        53.9

According to the data transmen are younger than women.

pulse39 %>%
  group_by(cis) %>%
  summarise_at(vars(age), list(name = mean))
## # A tibble: 3 x 2
##   cis       name
##   <fct>    <dbl>
## 1 cisman    55.0
## 2 ciswoman  53.1
## 3 <NA>      52.9

According to the data cismen are older than ciswomen by 1.94 years.

pulse39 %>%
group_by(cis) %>%
summarise_at(vars(age), list(name = sd))
## # A tibble: 3 x 2
##   cis       name
##   <fct>    <dbl>
## 1 cisman    16.3
## 2 ciswoman  15.5
## 3 <NA>      18.2
t.test(age ~ trans, var.equal=FALSE, data = pulse39)
## 
##  Welch Two Sample t-test
## 
## data:  age by trans
## t = -3.7457, df = 129.47, p-value = 0.0002697
## alternative hypothesis: true difference in means between group transman and group transwoman is not equal to 0
## 95 percent confidence interval:
##  -15.971303  -4.930807
## sample estimates:
##   mean in group transman mean in group transwoman 
##                 33.58228                 44.03333

According to the data you can be 95% confident that transmen are between 5 and 15 years younger than transwomen, therefore the nullhypothesis can be rejected.

A negative t test does not show directionaly of the relationship, or strength in relationship, however it comes from the calculation completed in R.

t.test(age ~ cis, var.equal=FALSE, data = pulse39)
## 
##  Welch Two Sample t-test
## 
## data:  age by cis
## t = 14.056, df = 47056, p-value < 2.2e-16
## alternative hypothesis: true difference in means between group cisman and group ciswoman is not equal to 0
## 95 percent confidence interval:
##  1.672666 2.214754
## sample estimates:
##   mean in group cisman mean in group ciswoman 
##               55.04441               53.10070

The data shows that there is a practical difference in the ages of trans men and trans women, with over a ten year difference in age. The difference in age between cis men and cis women is less than 2 and less practical.