{
  library(readxl)
  library(writexl)
  library(gtsummary)
  library(dplyr)
library(plyr)
library(parameters)
  
  df = read_xlsx("CRC+Health+Survey_May+28,+2025_09.42/CRC Health Survey_May 28, 2025_09.42.xlsx", sheet = "Updated")
  dictionary = names(df)
  names(df) = unlist(df[1,])
  df = df[-1,]
  
  #age
  df$age = 2025 - as.numeric(df$Q8)
  
  # Let's work on CRC ever and up-to-date status
  # table(df$Q20, df$Q21, useNA = "ifany")
  # table(df$Q22, df$Q23, useNA = "ifany")
  # Assuming Don't know/missing are "No"
  df$ever = "No"
  df$ever[df$Q20 %in% "Yes" | df$Q22 %in% "Yes"] = "Yes"
  df$uptodate = "No"
  df$uptodate[(df$Q20 %in% "Yes" & df$Q21 %in% "Less than 10 years age") |
                df$Q22 %in% "Yes" & df$Q23 %in% "Less than 1 year ago"] = "Yes"
  
  df$Q14 = factor(df$Q14, levels = c("Less than $10,000", "$15,000 to under $20,000",
                                     "$50,000 to under $75,000", "$75,000 to under $100,000",
                                     "$100,000 to under $200,000"))
  df$Q11 = factor(df$Q11, levels = c("8 through 11 years", "12 years or completed high school", "College graduate"))
  
  df$Q17 = factor(df$Q17, levels = c(c("Employed, full time", "Employed, part time", "Self employed", "Homemaker", "Student", "Unemployed", "Disabled")))
  
  df$checkup = df$Q34
  df$checkup[df$checkup %in% c("More than 2 up to 5 years ago", "More than 5 years ago")] = "More than 2 years ago"
  df$checkup = factor(df$checkup, levels = c("One year ago or less", "More than 1 up to 2 years ago", "More than 2 years ago"))
  
  df$generalhealth = df$Q35
  df$generalhealth[df$generalhealth %in% c("Excellent", "Very good")] = c("Very good/Excellent")
  df$generalhealth[df$generalhealth %in% c("Poor", "Fair")] = c("Poor/Fair")
  df$generalhealth = factor(df$generalhealth, levels = c("Poor/Fair", "Good", "Very good/Excellent"))

  }
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:gtsummary':
## 
##     mutate
## New names:
## • `` -> `...12`
## • `Have you ever been diagnosed with one of the following? - Selected Choice`
##   -> `Have you ever been diagnosed with one of the following? - Selected
##   Choice...71`
## • `Have you ever been diagnosed with one of the following? - Selected Choice`
##   -> `Have you ever been diagnosed with one of the following? - Selected
##   Choice...75`
df$Q16[df$Q15 %in% "No"] = "None"

df$healthinsurance = df$Q16

df$healthinsurance = mapvalues(df$healthinsurance, from = c("A plan purchased through an employer or union (includes plans purchased through another person's employer)",
                                                            "Alaska Native, Indian Health Service, Tribal Health Services",
                                                            "Medicaid or other state program", "Medicare", "None"),
                               to = c("Private", "Public","Public", "Public", "None"))

df$healthinsurance = factor(df$healthinsurance, levels = c("None", "Private", "Public"))

df$Q42 = factor(df$Q42, levels = c("No", "Yes", "Don't know"))

#county
table(df$Q4, useNA = "ifany")
## 
##      Amador       Butte      Fresno      Merced  Sacramento San Joaquin 
##           3          11          30          21          16          14 
##        Yolo 
##           5
tab1_columns = c("age", "Q10","Q9", "Q12",
     "Q11","Q14", "Q17", "ever")
dftab1 = df[tab1_columns]
tab1_names = c("Age", "Sex", "Country", "Language spoken at home", "Education", "Household income", "Occupation", "ever")
names(dftab1) = tab1_names
dftab1|>
  gtsummary::tbl_summary(
    by = "ever",
    missing_text = "Missing"
  ) |>
  gtsummary::add_p() |>
  gtsummary::add_overall() |>
  gtsummary::bold_labels() |> 
  gtsummary::separate_p_footnotes()
Characteristic Overall
N = 100
1
No
N = 57
1
Yes
N = 43
1
p-value
Age 54 (50, 63) 54 (50, 60) 54 (50, 65) 0.72
Sex


0.23
    Female 63 (63%) 33 (58%) 30 (70%)
    Male 37 (37%) 24 (42%) 13 (30%)
Country


<0.0014
    Laos 53 (53%) 39 (68%) 14 (33%)
    Thailand 36 (36%) 15 (26%) 21 (49%)
    USA 11 (11%) 3 (5.3%) 8 (19%)
Language spoken at home


<0.0013
    English 17 (17%) 15 (26%) 2 (4.7%)
    English,Hmong 44 (44%) 15 (26%) 29 (67%)
    Hmong 39 (39%) 27 (47%) 12 (28%)
Education


0.64
    8 through 11 years 29 (29%) 15 (26%) 14 (33%)
    12 years or completed high school 63 (63%) 36 (63%) 27 (63%)
    College graduate 8 (8.0%) 6 (11%) 2 (4.7%)
Household income


<0.0014
    Less than $10,000 26 (32%) 15 (38%) 11 (26%)
    $15,000 to under $20,000 18 (22%) 8 (21%) 10 (23%)
    $50,000 to under $75,000 15 (18%) 4 (10%) 11 (26%)
    $75,000 to under $100,000 14 (17%) 3 (7.7%) 11 (26%)
    $100,000 to under $200,000 9 (11%) 9 (23%) 0 (0%)
    Missing 18 18 0
Occupation


<0.0014
    Employed, full time 29 (29%) 13 (23%) 16 (37%)
    Employed, part time 15 (15%) 5 (8.8%) 10 (23%)
    Self employed 6 (6.0%) 0 (0%) 6 (14%)
    Homemaker 10 (10%) 5 (8.8%) 5 (12%)
    Student 6 (6.0%) 6 (11%) 0 (0%)
    Unemployed 30 (30%) 24 (42%) 6 (14%)
    Disabled 4 (4.0%) 4 (7.0%) 0 (0%)
1 Median (Q1, Q3); n (%)
2 Wilcoxon rank sum test
3 Pearson’s Chi-squared test
4 Fisher’s exact test
tab2_columns = c("healthinsurance","generalhealth","Q18", "Q36",
              "checkup","Q26","Q42","ever")
dftab2 = df[tab2_columns]
tab2_names = c("Health insurance","General health","Getting medical advice", "Medical delay",
                  "Routine check-up", "Smoked >100 cigarettes", "Family history of cancer", "ever")
names(dftab2) = tab2_names
dftab2|>
  gtsummary::tbl_summary(
    by = "ever",
    missing_text = "Missing"
  ) |>
  gtsummary::add_p() |>
  gtsummary::add_overall() |>
  gtsummary::bold_labels() |> 
  gtsummary::separate_p_footnotes()
Characteristic Overall
N = 100
1
No
N = 57
1
Yes
N = 43
1
p-value
Health insurance


0.62
    None 9 (10%) 6 (13%) 3 (7.0%)
    Private 25 (28%) 14 (30%) 11 (26%)
    Public 56 (62%) 27 (57%) 29 (67%)
    Missing 10 10 0
General health


0.0163
    Poor/Fair 45 (45%) 23 (40%) 22 (51%)
    Good 38 (38%) 28 (49%) 10 (23%)
    Very good/Excellent 17 (17%) 6 (11%) 11 (26%)
Getting medical advice


0.93
    A little confident 37 (37%) 20 (35%) 17 (40%)
    Somewhat confident 43 (43%) 25 (44%) 18 (42%)
    Very confident 20 (20%) 12 (21%) 8 (19%)
Medical delay


0.0662
    Don't know 3 (3.0%) 3 (5.3%) 0 (0%)
    No 38 (38%) 17 (30%) 21 (49%)
    Yes 59 (59%) 37 (65%) 22 (51%)
Routine check-up


0.23
    One year ago or less 31 (31%) 18 (32%) 13 (30%)
    More than 1 up to 2 years ago 47 (47%) 23 (40%) 24 (56%)
    More than 2 years ago 22 (22%) 16 (28%) 6 (14%)
Smoked >100 cigarettes 36 (36%) 12 (21%) 24 (56%) <0.0013
Family history of cancer


0.43
    No 47 (47%) 28 (49%) 19 (44%)
    Yes 29 (29%) 18 (32%) 11 (26%)
    Don't know 24 (24%) 11 (19%) 13 (30%)
1 n (%)
2 Fisher’s exact test
3 Pearson’s Chi-squared test
dfglm = df[c(tab1_columns[-length(tab1_columns)], tab2_columns)]
names(dfglm) = c(tab1_names[-length(tab1_names)], tab2_names)

dfglm$`Family history of cancer` = factor(dfglm$`Family history of cancer`)

dfglm$ever = ifelse(dftab1$ever %in% "Yes", 1, 0)
mod1 = glm(ever~Age+Sex+Education+Country+`Health insurance` +`Routine check-up`+`Family history of cancer`,family = binomial(link = "logit"), data = dfglm)

mod1 |>
  parameters(exponentiate = TRUE) |>
  print_md()
Parameter Odds Ratio SE 95% CI z p
(Intercept) 0.08 0.30 (4.73e-05, 140.47) -0.67 0.500
Age 1.01 0.05 (0.92, 1.11) 0.19 0.846
Sex (Male) 0.81 0.64 (0.16, 3.74) -0.27 0.790
Education (12 years or completed high school) 0.46 0.37 (0.09, 2.15) -0.98 0.327
Education (College graduate) 0.27 0.35 (0.02, 3.22) -1.01 0.311
Country (Thailand) 39.15 42.18 (6.16, 465.88) 3.40 < .001
Country (USA) 11.41 14.95 (0.94, 174.09) 1.86 0.063
Health insurance (Private) 4.54 7.27 (0.20, 134.20) 0.95 0.344
Health insurance (Public) 0.63 0.77 (0.05, 7.54) -0.38 0.703
Routine check-up (More than 1 up to 2 years ago) 10.79 12.78 (1.23, 142.52) 2.01 0.045
Routine check-up (More than 2 years ago) 3.91 5.11 (0.32, 60.18) 1.04 0.296
Family history of cancer (Yes) 0.11 0.13 (8.78e-03, 1.02) -1.84 0.066
Family history of cancer (Don’t know) 1.05 1.03 (0.15, 7.56) 0.05 0.960