Introduction

In this analysis, we will investigate the mental and physical risks of being a non-citizen in the dairy workforce, focusing on wage disparities. The analysis will include descriptive statistics, regression models, and contingency tables.

Basic R Markdown Formatting

Write the text in here to document what you are doing. But if you want to have paragraphs, you need to add a line break and white space.

Here is an example of text with a line break and white space.

You can also add horizontal line by placing three asterisks (or three hyphens) next to each other:



R Markdown Learning Resources

R Markdown cheat sheet

More information on R Markdown can be found here: R Markdown

Heading1

Heading2

Heading3

Heading4

Heading5
Heading6

Italic

Bald

Bald Italic

  1. list item 1
  2. list item 2
  • bullet 1
  • bullet 2

Add links to document:

For example, here is a link to my website

Inserting code

# Add this before install.packages()
options(repos = c(CRAN = "https://cloud.r-project.org"))  # Official R project mirror

# Then install
install.packages("tidyverse")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages ("tidyverse")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages ("readr")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages("haven")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages("magrittr")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages("dplyr")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages("arsenal")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages("labelled")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
install.packages("janitor")
## 
## The downloaded binary packages are in
##  /var/folders/wk/m0j53x291xq5n0wkvv2q0nqh0000gn/T//Rtmp0cy4oS/downloaded_packages
library (tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(arsenal)
## 
## Attaching package: 'arsenal'
## 
## The following object is masked from 'package:lubridate':
## 
##     is.Date
library (readr)
library(haven)
library(magrittr)
## 
## Attaching package: 'magrittr'
## 
## The following object is masked from 'package:arsenal':
## 
##     set_attr
## 
## The following object is masked from 'package:purrr':
## 
##     set_names
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(dplyr)
library(labelled)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
Dairy_worker_data <- read_sav("/Users/valeriaalvarado/Documents/Dairy Worker Data/Dairy_works.sav")
View(Dairy_worker_data)
names (Dairy_worker_data)
##   [1] "RT"        "SERIALNO"  "DIVISION"  "SPORDER"   "PUMA"      "REGION"   
##   [7] "STATE"     "ADJINC"    "PWGTP"     "AGEP"      "CIT"       "CITWP"    
##  [13] "COW"       "DDRS"      "DEAR"      "DEYE"      "DOUT"      "DPHY"     
##  [19] "DRAT"      "DRATX"     "DREM"      "ENG"       "FER"       "GCL"      
##  [25] "GCM"       "GCR"       "HIMRKS"    "HINS1"     "HINS2"     "HINS3"    
##  [31] "HINS4"     "HINS5"     "HINS6"     "HINS7"     "INTP"      "JWMNP"    
##  [37] "JWRIP"     "JWTRNS"    "LANX"      "MAR"       "MARHD"     "MARHM"    
##  [43] "MARHT"     "MARHW"     "MARHYP"    "MIG"       "MIL"       "MLPA"     
##  [49] "MLPB"      "MLPCD"     "MLPE"      "MLPFG"     "MLPH"      "MLPIK"    
##  [55] "MLPJ"      "NWAB"      "NWAV"      "NWLA"      "NWLK"      "NWRE"     
##  [61] "OIP"       "PAP"       "RELSHIPP"  "RETP"      "SCH"       "SCHG"     
##  [67] "SCHL"      "SEMP"      "SEX"       "SSIP"      "SSP"       "WAGP"     
##  [73] "WKHP"      "WKL"       "WKWN"      "WRK"       "YOEP"      "ANC"      
##  [79] "ANC1P"     "ANC2P"     "DECADE"    "DIS"       "DRIVESP"   "ESP"      
##  [85] "ESR"       "FOD1P"     "FOD2P"     "HICOV"     "HISP"      "INDP"     
##  [91] "JWAP"      "JWDP"      "LANP"      "MIGPUMA"   "MIGSP"     "MSP"      
##  [97] "NAICSP"    "NATIVITY"  "NOP"       "OC"        "OCCP"      "PAOC"     
## [103] "PERNP"     "PINCP"     "POBP"      "POVPIP"    "POWPUMA"   "POWSP"    
## [109] "PRIVCOV"   "PUBCOV"    "QTRBIR"    "RAC1P"     "RAC2P"     "RAC3P"    
## [115] "RACAIAN"   "RACASN"    "RACBLK"    "RACNH"     "RACNUM"    "RACPI"    
## [121] "RACSOR"    "RACWHT"    "RC"        "SCIENGP"   "SCIENGRLP" "SFN"      
## [127] "SFR"       "SOCP"      "VPS"       "WAOB"      "FAGEP"     "FANCP"    
## [133] "FCITP"     "FCITWP"    "FCOWP"     "FDDRSP"    "FDEARP"    "FDEYEP"   
## [139] "FDISP"     "FDOUTP"    "FDPHYP"    "FDRATP"    "FDRATXP"   "FDREMP"   
## [145] "FENGP"     "FESRP"     "FFERP"     "FFODP"     "FGCLP"     "FGCMP"    
## [151] "FGCRP"     "FHICOVP"   "FHIMRKSP"  "FHINS1P"   "FHINS2P"   "FHINS3C"  
## [157] "FHINS3P"   "FHINS4C"   "FHINS4P"   "FHINS5C"   "FHINS5P"   "FHINS6P"  
## [163] "FHINS7P"   "FHISP"     "FINDP"     "FINTP"     "FJWDP"     "FJWMNP"   
## [169] "FJWRIP"    "FJWTRNSP"  "FLANP"     "FLANXP"    "FMARP"     "FMARHDP"  
## [175] "FMARHMP"   "FMARHTP"   "FMARHWP"   "FMARHYP"   "FMIGP"     "FMIGSP"   
## [181] "FMILPP"    "FMILSP"    "FOCCP"     "FOIP"      "FPAP"      "FPERNP"   
## [187] "FPINCP"    "FPOBP"     "FPOWSP"    "FPRIVCOVP" "FPUBCOVP"  "FRACP"    
## [193] "FRELSHIPP" "FRETP"     "FSCHGP"    "FSCHLP"    "FSCHP"     "FSEMP"    
## [199] "FSEXP"     "FSSIP"     "FSSP"      "FWAGP"     "FWKHP"     "FWKLP"    
## [205] "FWKWNP"    "FWRKP"     "FYOEP"     "PWGTP1"    "PWGTP2"    "PWGTP3"   
## [211] "PWGTP4"    "PWGTP5"    "PWGTP6"    "PWGTP7"    "PWGTP8"    "PWGTP9"   
## [217] "PWGTP10"   "PWGTP11"   "PWGTP12"   "PWGTP13"   "PWGTP14"   "PWGTP15"  
## [223] "PWGTP16"   "PWGTP17"   "PWGTP18"   "PWGTP19"   "PWGTP20"   "PWGTP21"  
## [229] "PWGTP22"   "PWGTP23"   "PWGTP24"   "PWGTP25"   "PWGTP26"   "PWGTP27"  
## [235] "PWGTP28"   "PWGTP29"   "PWGTP30"   "PWGTP31"   "PWGTP32"   "PWGTP33"  
## [241] "PWGTP34"   "PWGTP35"   "PWGTP36"   "PWGTP37"   "PWGTP38"   "PWGTP39"  
## [247] "PWGTP40"   "PWGTP41"   "PWGTP42"   "PWGTP43"   "PWGTP44"   "PWGTP45"  
## [253] "PWGTP46"   "PWGTP47"   "PWGTP48"   "PWGTP49"   "PWGTP50"   "PWGTP51"  
## [259] "PWGTP52"   "PWGTP53"   "PWGTP54"   "PWGTP55"   "PWGTP56"   "PWGTP57"  
## [265] "PWGTP58"   "PWGTP59"   "PWGTP60"   "PWGTP61"   "PWGTP62"   "PWGTP63"  
## [271] "PWGTP64"   "PWGTP65"   "PWGTP66"   "PWGTP67"   "PWGTP68"   "PWGTP69"  
## [277] "PWGTP70"   "PWGTP71"   "PWGTP72"   "PWGTP73"   "PWGTP74"   "PWGTP75"  
## [283] "PWGTP76"   "PWGTP77"   "PWGTP78"   "PWGTP79"   "PWGTP80"   "filter_$"
Dairy_worker_data_2<-Dairy_worker_data %>% select("DEAR", "DEYE", "CIT", "LANX", "DOUT", "NATIVITY", "AGEP", "SEX", "WAGP", "DIS")
View(Dairy_worker_data_2)
library(readxl)
Dairy_worker_data_2_1_ <- read_excel("~/Downloads/Dairy_worker_data_2 (1).xlsx")
View(Dairy_worker_data_2_1_)

Dairy_worker_data_2<-Dairy_worker_data %>% mutate(CIT_2=ifelse(CIT>4, 2, 1))

table(Dairy_worker_data_2$CIT_2)
## 
##    1    2 
## 1176   67

Plot

You can create code snippet by using plot()

plot(Dairy_worker_data_2$WAGP ~ Dairy_worker_data_2$CIT_2)

plot(Dairy_worker_data_2$WAGP)

class(Dairy_worker_data_2$CIT_2)
## [1] "numeric"
glimpse(Dairy_worker_data_2$CIT_2)
##  num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
unique(Dairy_worker_data_2$CIT_2) 
## [1] 1 2
  # male is a dummy variable; and there is no missing value in male

# Dependent variable: WAGE
# check data type: WAGE
class(Dairy_worker_data_2$WAGP)
## [1] "numeric"
glimpse(Dairy_worker_data_2$WAGP)
##  num [1:1243] 32000 31700 100000 110000 50000 105000 28000 40000 48000 86000 ...
##  - attr(*, "format.spss")= chr "F6.0"
unique(Dairy_worker_data_2$WAGP)
##   [1]  32000  31700 100000 110000  50000 105000  28000  40000  48000  86000
##  [11]  60000  55000  24000 150000  52000  45500  30400  90000      0 497000
##  [21]  65000  70000  10000  75000 120000  88000  57000  58000   7000  37000
##  [31]  64000  36000  45000   3300  48200  49000  35000  30000  80000  18800
##  [41] 112000  25000  42000  41600   9400  21800  22800  84000  59000  46000
##  [51]  79000  29200 140000  14000  38000  19200  72000   9000  81000  85000
##  [61]   4000  34000 130000  31200  32200  20000   5000 160000 118000  99000
##  [71]  49300 106000    500  37400  47200  63000   9200 127000  47000 347000
##  [81]  36200  56000  22000  33000  21000 114000  53000  92000  48700  38500
##  [91] 101000 770000   7300  61000 190000  31000  37500  18500 200000 170000
## [101]  54000  82000  98000  69000  17000  66000   8000  62000  43000  39500
## [111]  10400  12000   5400 109000    400  23000  26000  93000  74000  22500
## [121]   6500 240000  68000  51000   7200  13200  11200    800  46600  39400
## [131]  27300  44000   3200  87000  11000  15000  46900 506000  27000  12100
## [141]   6000  48500  34500  37200  73000 565000  42800 566000  71000 108000
## [151] 123000  67000   2400 131000   6700  77000  28300  19000 250000  76000
## [161]  83000  95000 260000    900  78000   2000 135000 189000  28500  18000
## [171]   2200   2500  16000   1000  89000   8900  36600 117000  28600 300000
## [181] 220000  12500 241000  43300  49100 113000 116000  37800 103000 156000
## [191] 155000  27600   9600  10500  46800  41500  15200 153000  31800  29500
## [201]  42100  38300 172000  39000  41000   3000   4400 265000  47800 180000
## [211]  26400  12800  30900 499000   1200  32400   1600 210000 115000  30800
## [221]  13000  31500  48600  42900  42500  47400  42700  32300  22400   4500
## [231]  23800  44600   8500
# salary is a numeric variable; and there is no missing value in salary

## Step 1 & 2: Construct a summary table of variables in the sample
# See all the variables and their class in the dataset
str(Dairy_worker_data_2)
## tibble [1,243 × 289] (S3: tbl_df/tbl/data.frame)
##  $ RT       : chr [1:1243] "P" "P" "P" "P" ...
##   ..- attr(*, "format.spss")= chr "A1"
##   ..- attr(*, "display_width")= int 1
##  $ SERIALNO : chr [1:1243] "2023HU0023418" "2023HU0045887" "2023HU0059978" "2023HU0085718" ...
##   ..- attr(*, "format.spss")= chr "A13"
##   ..- attr(*, "display_width")= int 13
##  $ DIVISION : num [1:1243] 4 4 4 4 4 4 4 4 4 4 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ SPORDER  : num [1:1243] 1 2 2 2 1 1 2 1 2 2 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ PUMA     : num [1:1243] 2701 2900 2900 800 2702 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ REGION   : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ STATE    : num [1:1243] 29 29 29 29 29 29 29 29 29 29 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ ADJINC   : num [1:1243] 1019518 1019518 1019518 1019518 1019518 ...
##   ..- attr(*, "format.spss")= chr "F7.0"
##  $ PWGTP    : num [1:1243] 41 862 30 28 98 72 67 44 59 54 ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ AGEP     : num [1:1243] 32 55 42 43 66 53 65 34 61 62 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ CIT      : num [1:1243] 1 1 1 1 1 1 1 4 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ CITWP    : num [1:1243] NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ COW      : num [1:1243] 1 1 1 1 1 1 1 5 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DDRS     : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DEAR     : num [1:1243] 2 2 2 2 2 2 2 2 2 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DEYE     : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DOUT     : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DPHY     : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DRAT     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DRATX    : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DREM     : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ ENG      : num [1:1243] NA NA NA NA NA NA NA 1 NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ FER      : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ GCL      : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ GCM      : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ GCR      : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HIMRKS   : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS1    : num [1:1243] 1 1 1 1 1 1 2 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS2    : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS3    : num [1:1243] 2 2 2 2 2 2 1 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS4    : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS5    : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS6    : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HINS7    : num [1:1243] 2 2 2 2 2 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ INTP     : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F6.0"
##  $ JWMNP    : num [1:1243] 30 30 20 55 15 25 25 40 20 10 ...
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ JWRIP    : num [1:1243] 1 1 1 2 1 1 1 1 2 1 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ JWTRNS   : num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ LANX     : num [1:1243] 2 2 2 2 2 2 2 1 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MAR      : num [1:1243] 3 5 1 1 5 1 1 3 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MARHD    : num [1:1243] 1 NA 2 2 NA 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MARHM    : num [1:1243] 2 NA 2 2 NA 1 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MARHT    : num [1:1243] 1 NA 2 1 NA 3 1 1 1 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MARHW    : num [1:1243] 2 NA 2 2 NA 2 2 2 2 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MARHYP   : num [1:1243] 2013 NA 2020 2017 NA ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ MIG      : num [1:1243] 3 1 1 1 3 3 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MIL      : num [1:1243] 4 4 4 4 4 4 4 4 4 4 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPA     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPB     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPCD    : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPE     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPFG    : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPH     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPIK    : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ MLPJ     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NWAB     : num [1:1243] 3 3 3 3 2 3 2 3 3 3 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NWAV     : num [1:1243] 5 5 5 5 5 5 5 5 5 5 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NWLA     : num [1:1243] 3 3 3 3 2 3 2 3 3 3 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NWLK     : num [1:1243] 3 3 3 3 2 3 2 3 3 3 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NWRE     : num [1:1243] 3 3 3 3 3 3 3 3 3 3 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ OIP      : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ PAP      : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ RELSHIPP : num [1:1243] 20 25 21 21 20 20 21 20 21 21 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ RETP     : num [1:1243] 11000 0 0 0 0 0 0 18000 0 0 ...
##   ..- attr(*, "format.spss")= chr "F6.0"
##  $ SCH      : num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ SCHG     : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ SCHL     : num [1:1243] 16 18 17 17 18 19 19 14 18 16 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ SEMP     : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F6.0"
##  $ SEX      : num [1:1243] 1 2 1 1 1 1 2 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ SSIP     : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ SSP      : num [1:1243] 0 0 0 0 0 0 0 0 0 0 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ WAGP     : num [1:1243] 32000 31700 100000 110000 50000 105000 28000 40000 48000 86000 ...
##   ..- attr(*, "format.spss")= chr "F6.0"
##  $ WKHP     : num [1:1243] 40 84 60 45 40 45 40 40 40 48 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ WKL      : num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ WKWN     : num [1:1243] 52 52 52 52 52 52 52 52 52 52 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ WRK      : num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ YOEP     : num [1:1243] NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ ANC      : num [1:1243] 4 4 1 4 1 1 2 1 1 2 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ ANC1P    : num [1:1243] 999 999 939 999 51 32 50 226 32 22 ...
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ ANC2P    : num [1:1243] 999 999 999 999 999 999 21 999 999 26 ...
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ DECADE   : num [1:1243] NA NA NA NA NA NA NA 7 NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DIS      : num [1:1243] 2 2 2 2 2 2 2 2 2 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ DRIVESP  : num [1:1243] 1 1 1 2 1 1 1 1 2 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ ESP      : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ ESR      : num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ FOD1P    : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ FOD2P    : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ HICOV    : num [1:1243] 1 1 1 1 1 1 1 1 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ HISP     : num [1:1243] 1 1 1 1 1 1 1 11 1 1 ...
##   ..- attr(*, "format.spss")= chr "F2.0"
##  $ INDP     : num [1:1243] 1170 1170 1170 1170 1170 1170 1170 1170 1170 1170 ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ JWAP     : num [1:1243] 67 58 200 87 94 90 93 66 67 75 ...
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ JWDP     : num [1:1243] 22 16 118 37 52 46 49 19 24 34 ...
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ LANP     : num [1:1243] NA NA NA NA NA NA NA 1200 NA NA ...
##   ..- attr(*, "format.spss")= chr "F4.0"
##  $ MIGPUMA  : num [1:1243] 2704 NA NA NA 2704 ...
##   ..- attr(*, "format.spss")= chr "F5.0"
##  $ MIGSP    : num [1:1243] 29 NA NA NA 29 29 NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F3.0"
##  $ MSP      : num [1:1243] 4 6 1 1 6 1 1 4 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NAICSP   : chr [1:1243] "3115" "3115" "3115" "3115" ...
##   ..- attr(*, "format.spss")= chr "A7"
##   ..- attr(*, "display_width")= int 7
##  $ NATIVITY : num [1:1243] 1 1 1 1 1 1 1 2 1 1 ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##  $ NOP      : num [1:1243] NA NA NA NA NA NA NA NA NA NA ...
##   ..- attr(*, "format.spss")= chr "F1.0"
##   [list output truncated]
table
## function (..., exclude = if (useNA == "no") c(NA, NaN), useNA = c("no", 
##     "ifany", "always"), dnn = list.names(...), deparse.level = 1) 
## {
##     list.names <- function(...) {
##         l <- as.list(substitute(list(...)))[-1L]
##         if (length(l) == 1L && is.list(..1) && !is.null(nm <- names(..1))) 
##             return(nm)
##         nm <- names(l)
##         fixup <- if (is.null(nm)) 
##             seq_along(l)
##         else nm == ""
##         dep <- vapply(l[fixup], function(x) switch(deparse.level + 
##             1, "", if (is.symbol(x)) as.character(x) else "", 
##             deparse(x, nlines = 1)[1L]), "")
##         if (is.null(nm)) 
##             dep
##         else {
##             nm[fixup] <- dep
##             nm
##         }
##     }
##     miss.use <- missing(useNA)
##     miss.exc <- missing(exclude)
##     useNA <- if (miss.use && !miss.exc && !match(NA, exclude, 
##         nomatch = 0L)) 
##         "ifany"
##     else match.arg(useNA)
##     doNA <- useNA != "no"
##     if (!miss.use && !miss.exc && doNA && match(NA, exclude, 
##         nomatch = 0L)) 
##         warning("'exclude' containing NA and 'useNA' != \"no\"' are a bit contradicting")
##     args <- list(...)
##     if (length(args) == 1L && is.list(args[[1L]])) {
##         args <- args[[1L]]
##         if (length(dnn) != length(args)) 
##             dnn <- paste(dnn[1L], seq_along(args), sep = ".")
##     }
##     if (!length(args)) 
##         stop("nothing to tabulate")
##     bin <- 0L
##     lens <- NULL
##     dims <- integer()
##     pd <- 1L
##     dn <- NULL
##     for (a in args) {
##         if (is.null(lens)) 
##             lens <- length(a)
##         else if (length(a) != lens) 
##             stop("all arguments must have the same length")
##         fact.a <- is.factor(a)
##         if (doNA) 
##             aNA <- anyNA(a)
##         if (!fact.a) {
##             a0 <- a
##             op <- options(warn = 2)
##             on.exit(options(op))
##             a <- factor(a, exclude = exclude)
##             options(op)
##         }
##         add.na <- doNA
##         if (add.na) {
##             ifany <- (useNA == "ifany")
##             anNAc <- anyNA(a)
##             add.na <- if (!ifany || anNAc) {
##                 ll <- levels(a)
##                 if (add.ll <- !anyNA(ll)) {
##                   ll <- c(ll, NA)
##                   TRUE
##                 }
##                 else if (!ifany && !anNAc) 
##                   FALSE
##                 else TRUE
##             }
##             else FALSE
##         }
##         if (add.na) 
##             a <- factor(a, levels = ll, exclude = NULL)
##         else ll <- levels(a)
##         a <- as.integer(a)
##         if (fact.a && !miss.exc) {
##             ll <- ll[keep <- which(match(ll, exclude, nomatch = 0L) == 
##                 0L)]
##             a <- match(a, keep)
##         }
##         else if (!fact.a && add.na) {
##             if (ifany && !aNA && add.ll) {
##                 ll <- ll[!is.na(ll)]
##                 is.na(a) <- match(a0, c(exclude, NA), nomatch = 0L) > 
##                   0L
##             }
##             else {
##                 is.na(a) <- match(a0, exclude, nomatch = 0L) > 
##                   0L
##             }
##         }
##         nl <- length(ll)
##         dims <- c(dims, nl)
##         if (prod(dims) > .Machine$integer.max) 
##             stop("attempt to make a table with >= 2^31 elements")
##         dn <- c(dn, list(ll))
##         bin <- bin + pd * (a - 1L)
##         pd <- pd * nl
##     }
##     names(dn) <- dnn
##     bin <- bin[!is.na(bin)]
##     if (length(bin)) 
##         bin <- bin + 1L
##     y <- array(tabulate(bin, pd), dims, dimnames = dn)
##     class(y) <- "table"
##     y
## }
## <bytecode: 0x1478bf748>
## <environment: namespace:base>
range(Dairy_worker_data_2$WAGP)
## [1]      0 770000
# Mean 
mean(Dairy_worker_data_2$WAGP)
## [1] 51758
# SD
sd(Dairy_worker_data_2$WAGP)
## [1] 54936.59
# Create summary table for wage
WAGP_summary <- Dairy_worker_data_2 %>%
  summarise(
    mean_WAGP = mean(WAGP, na.rm = TRUE),  # Mean salary
    sd_WAGP = sd(WAGP, na.rm = TRUE),  # Standard deviation
    WAGP_min = min(WAGP, na.rm = TRUE), # Min salary
    p25_WAGP = quantile(WAGP, 0.25, na.rm = TRUE),  # 25th percentile
    median_WAGP = quantile(WAGP, 0.50, na.rm = TRUE),  # 50th percentile (median)
    p75_WAGP = quantile(WAGP, 0.75, na.rm = TRUE),  # 75th percentile
    WAGP_max = max(WAGP, na.rm = TRUE), # Max salary
    WAGP_range = max(WAGP, na.rm = TRUE) - min(WAGP, na.rm = TRUE)  # Range
  )

# Print the summary table
print(WAGP_summary)
## # A tibble: 1 × 8
##   mean_WAGP sd_WAGP WAGP_min p25_WAGP median_WAGP p75_WAGP WAGP_max WAGP_range
##       <dbl>   <dbl>    <dbl>    <dbl>       <dbl>    <dbl>    <dbl>      <dbl>
## 1    51758.  54937.        0    25000       47800    64000   770000     770000
num_vars <- c("WAGP", "AGEP")

# Create descriptive table
# Note: In the following codes, I removed obs with missing values
descriptive_table_num <- Dairy_worker_data_2 %>%
  summarise(
    across(all_of(num_vars), list( # ensures that only columns listed in vars are selected for transformation
      mean = ~mean(.x, na.rm = TRUE), # the .x is a placeholder for each variable being processed inside the across() function
      sd = ~sd(.x, na.rm = TRUE),
      min = ~min(.x, na.rm = TRUE),
      p25 = ~quantile(.x, 0.25, na.rm = TRUE),
      median = ~median(.x, na.rm = TRUE),
      p75 = ~quantile(.x, 0.75, na.rm = TRUE),
      max = ~max(.x, na.rm = TRUE),
      range = ~max(.x, na.rm = TRUE) - min(.x, na.rm = TRUE)
    ), 
    .names = "{.col}_{.fn}") # ensures that the column names are structured as "variable_stat"
  ) %>%
  pivot_longer(cols = everything(),  # converts the wide format (one row for all variables) into a long format
               names_to = c("Variable", ".value"), # assigns the statistic type (e.g., "mean", "sd", "min", etc.) as new column names
               names_sep = "_") %>% # specifies that the column names are separated by an underscore (_)
  select(Variable, mean, sd, min, p25, median, p75, max, range) # reorder the variables

# Print the formatted table
print(descriptive_table_num)
## # A tibble: 2 × 9
##   Variable    mean      sd   min   p25 median   p75    max  range
##   <chr>      <dbl>   <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>
## 1 WAGP     51758.  54937.      0 25000  47800 64000 770000 770000
## 2 AGEP        45.7    15.1    16    33     47    59     95     79
# Round all numeric columns to two decimal places
descriptive_table_num <- descriptive_table_num %>%
  mutate(across(where(is.numeric), ~round(.x, 2)))

# Print the rounded table
print(descriptive_table_num)
## # A tibble: 2 × 9
##   Variable    mean      sd   min   p25 median   p75    max  range
##   <chr>      <dbl>   <dbl> <dbl> <dbl>  <dbl> <dbl>  <dbl>  <dbl>
## 1 WAGP     51758   54937.      0 25000  47800 64000 770000 770000
## 2 AGEP        45.7    15.1    16    33     47    59     95     79
cat_vars <- c("DEAR", "DEYE", "CIT_2", "LANX", "DOUT", "NATIVITY", "SEX", "DIS")

get_mode <- function(x) {
  uniq_vals <- unique(x[!is.na(x)])  # Remove NA values
  uniq_vals[which.max(tabulate(match(x, uniq_vals)))]  # Get the most frequent value
}

# Create descriptive table
descriptive_table_cat <- Dairy_worker_data_2 %>%
  select(all_of(cat_vars)) %>%
  pivot_longer(everything(), names_to = "Variable", values_to = "Category") %>%
  group_by(Variable, Category) %>%
  summarise(
    Count = n(),  # Count occurrences
    .groups = "drop"  # Remove grouping after summarization
  ) %>%
  group_by(Variable) %>%
  mutate(
    Total = sum(Count),  # Get total count for each Variable
    Percentage = round((Count / Total) * 100, 2)  # Correct percentage calculation
  ) %>%
  ungroup() %>%  # Remove grouping for proper mode calculation
  mutate(
    Mode = sapply(Variable, function(var) get_mode(Dairy_worker_data_2[[var]]))  # Correct mode calculation for each variable
  ) %>%
  select(-Total) %>%  # Remove unnecessary column
  arrange(Variable, desc(Count))  # Sort by variable and count


# Print the summary table
print(descriptive_table_cat)
## # A tibble: 16 × 5
##    Variable Category Count Percentage  Mode
##    <chr>       <dbl> <int>      <dbl> <dbl>
##  1 CIT_2           1  1176      94.6      1
##  2 CIT_2           2    67       5.39     1
##  3 DEAR            2  1199      96.5      2
##  4 DEAR            1    44       3.54     2
##  5 DEYE            2  1223      98.4      2
##  6 DEYE            1    20       1.61     2
##  7 DIS             2  1131      91.0      2
##  8 DIS             1   112       9.01     2
##  9 DOUT            2  1224      98.5      2
## 10 DOUT            1    19       1.53     2
## 11 LANX            2  1104      88.8      2
## 12 LANX            1   139      11.2      2
## 13 NATIVITY        1  1138      91.6      1
## 14 NATIVITY        2   105       8.45     1
## 15 SEX             1   839      67.5      1
## 16 SEX             2   404      32.5      1
summary_table <- Dairy_worker_data_2 %>%
  summarise(
    # Descriptive statistics for male (count and percentage)
    DEYE_count_0 = sum(DEYE == 0, na.rm = TRUE),
    DEYE_count_1 = sum(DEYE == 1, na.rm = TRUE),
    DEYE_percent_0 = round((DEYE_count_0 / n()) * 100, 2),
    DEYE_percent_1 = round((DEYE_count_1 / n()) * 100, 2),
    
    # Descriptive statistics for salary (mean, median, sd, min, max)
    WAGP_mean = mean(WAGP, na.rm = TRUE),
    WAGP_median = median(WAGP, na.rm = TRUE),
    WAGP_sd = sd(WAGP, na.rm = TRUE),
    WAGP_min = min(WAGP, na.rm = TRUE),
    WAGP_max = max(WAGP, na.rm = TRUE)
)
print(summary_table)
## # A tibble: 1 × 9
##   DEYE_count_0 DEYE_count_1 DEYE_percent_0 DEYE_percent_1 WAGP_mean WAGP_median
##          <int>        <int>          <dbl>          <dbl>     <dbl>       <dbl>
## 1            0           20              0           1.61    51758.       47800
## # ℹ 3 more variables: WAGP_sd <dbl>, WAGP_min <dbl>, WAGP_max <dbl>
contingency_table <- Dairy_worker_data_2 %>%
  group_by(CIT_2) %>%  # Group by male (0 and 1)
  summarise(
    Count = n(),  # Count of observations per male group
    Mean_CIT_2 = round(mean(CIT_2, na.rm = TRUE), 2),  # Mean salary (rounded to 2 decimals)
    SD_dis = round(sd(CIT_2, na.rm = TRUE), 2),  # Standard deviation of salary (rounded to 2 decimals)
    Min_Salary = min(CIT_2, na.rm = TRUE),  # Minimum salary
    Max_Salary = max(CIT_2, na.rm = TRUE),  # Maximum salary
    Median_Salary = median(CIT_2, na.rm = TRUE)  # Median salary
  ) 


# Print the summary table
print(contingency_table)
## # A tibble: 2 × 7
##   CIT_2 Count Mean_CIT_2 SD_dis Min_Salary Max_Salary Median_Salary
##   <dbl> <int>      <dbl>  <dbl>      <dbl>      <dbl>         <dbl>
## 1     1  1176          1      0          1          1             1
## 2     2    67          2      0          2          2             2

Change Themes for the HTML Output

Click the gear symbol, and then click output options.In Apply theme, you can change the style of the HTML output.

You can also change the code chunk style by choosing syntax highlighting.

In the same panel, you can add a table of content for easier navigation. Check the box of Include table of contents.

Notice that the heading of this R markdown document also changed after making edits to the HTML output style.

You can play around with R to see other features you can apply to your HTML output.

Conclude Your R Markdown Document

You can print out the current R version that you use to create this R Markdown. This helps to keep a record in case that R have newer versions in the future and some of your current code won’t run in the same way as it is now.

sessionInfo()
## R version 4.4.2 (2024-10-31)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sequoia 15.3
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: America/New_York
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] readxl_1.4.5    janitor_2.2.1   labelled_2.14.0 magrittr_2.0.3 
##  [5] haven_2.5.4     arsenal_3.6.3   lubridate_1.9.4 forcats_1.0.0  
##  [9] stringr_1.5.1   dplyr_1.1.4     purrr_1.0.4     readr_2.1.5    
## [13] tidyr_1.3.1     tibble_3.2.1    ggplot2_3.5.2   tidyverse_2.0.0
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6      jsonlite_1.9.0    compiler_4.4.2    tidyselect_1.2.1 
##  [5] snakecase_0.11.1  jquerylib_0.1.4   scales_1.3.0      yaml_2.3.10      
##  [9] fastmap_1.2.0     R6_2.6.1          generics_0.1.3    knitr_1.49       
## [13] munsell_0.5.1     bslib_0.9.0       pillar_1.10.1     tzdb_0.4.0       
## [17] rlang_1.1.5       utf8_1.2.4        cachem_1.1.0      stringi_1.8.4    
## [21] xfun_0.51         sass_0.4.9        timechange_0.3.0  cli_3.6.4        
## [25] withr_3.0.2       digest_0.6.37     grid_4.4.2        rstudioapi_0.17.1
## [29] hms_1.1.3         lifecycle_1.0.4   vctrs_0.6.5       evaluate_1.0.3   
## [33] glue_1.8.0        cellranger_1.1.0  colorspace_2.1-1  rmarkdown_2.29   
## [37] tools_4.4.2       pkgconfig_2.0.3   htmltools_0.5.8.1