setwd("/Users/michelesantana/Documents/IGPA/Sage Kim Lung Cancer/Rcode")
lung <- read_csv('lungEMRChicago-3-18-2023-Alex.csv') 

1 Question 1: What percentage of patients undergoing diagnostic screening/testing met eligibility for LDCT lung cancer screening?

Eligibility criteria: Aged 50 to 80, 20 pack year smoking history, and no prior history of lung cancer

# total count of all participants
all_n = lung %>%
  select(PATIENT_ID) %>%
  count()
all_n

# for this calculation we make an assumption that those with missing values have no history of lung cancer
n_elig = lung %>%
  filter(agecat >= 2, packyear20 == "1.00", historyca == "0.00" | historyca == "#NULL!") 

n_elig %>%
  filter(smokingstatus >= 2) %>%
  count() / all_n * 100

count(n_elig)
# n = 304

######### create a new variable to indicate patient is lung cancer screening eligible
lung <- lung %>%
  mutate(screen_elig = ifelse(agecat >= 2 & packyear20 == "1.00" & (historyca == "0.00" | historyca == "#NULL!"), 1, 0))

lung %>% 
  filter(screen_elig == 1) %>%
  count()
# n = 304, this matches so the variable is correctly created

The total count of participants in the study is 7,198 patients. 304 patients (4.22%) undergoing diagnostic screening/testing met the eligibility requirements for LDCT lung cancer screening.

2 Question 2: Are their race/ethnic and gender differences in meeting eligibility criteria?

Expectation 1 = Yes for racial differences Expectation 2 = Yes, there is an interaction based on gender with Black men and women less likely to meet screening eligibility criteria.

2.1 Create race/ethnicity as a categorical variable

lung <- lung %>% 
  mutate(raceethnic_cat = ifelse(raceethnic == 1, "White",
                                ifelse(raceethnic == 2, "Black", 
                                       "Hispanic")))
print(
  lung %>% 
  group_by(raceethnic_cat) %>%
    count())
## # A tibble: 3 × 2
## # Groups:   raceethnic_cat [3]
##   raceethnic_cat     n
##   <chr>          <int>
## 1 Black           4622
## 2 Hispanic        1619
## 3 White            957

2.1.1 Change Hispanic to Latinx

lung <- lung %>%
  mutate(raceethnic_cat = ifelse(raceethnic_cat == "Hispanic", "Latinx", raceethnic_cat))

2.2 Eligible Patients by Gender

elig_gender_table <- lung %>%
  filter(gender %in% c("Female", "Male")) %>%
  group_by(gender) %>%
  summarize(Total = n(),
            Eligible_Count = sum(screen_elig == 1),
            Percent_Eligible = Eligible_Count / Total * 100)

colnames(elig_gender_table) <- c("Gender", "Total Patients", "Screening Eligible Count", "Percent Screening Eligible")

elig_gender_table %>%
  kable(align = "lllll", 
        caption = "Lung Cancer Screening Eligible Patients by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Lung Cancer Screening Eligible Patients by Gender
Gender Total Patients Screening Eligible Count Percent Screening Eligible
Female 4,044 146 3.61
Male 3,151 158 5.01

2.3 Eligible Patients by Race

elig_race_table =
  lung %>%
  group_by(raceethnic_cat) %>%
  summarize(Total = n(),
            Eligible_Count = sum(screen_elig == 1),
            Percent_Eligible = Eligible_Count / Total * 100)

colnames(elig_race_table) <- c("Race/Ethnicity", "Total Patients", "Screening Eligible Count", "Percent Screening Eligible")

elig_race_table %>%
  kable(align = "lllll", 
        caption = "Lung Cancer Screening Eligible Patients by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Lung Cancer Screening Eligible Patients by Race
Race/Ethnicity Total Patients Screening Eligible Count Percent Screening Eligible
Black 4,622 212 4.59
Latinx 1,619 39 2.41
White 957 53 5.54

2.4 Eligible Patients by Gender & Race

elig_racegen_table =
  lung %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(Total = n(),
            Eligible_Count = sum(screen_elig == 1),
            Percent_Eligible = Eligible_Count / Total * 100)

colnames(elig_racegen_table) <- c("Gender", "Race/Ethnicity", "Total Patients", "Screening Eligible Count", "Percent Screening Eligible")
# table
elig_racegen_table %>%
  kable(align = "lllll", 
        caption = "Lung Cancer Screening Eligible Patients by Gender and Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Lung Cancer Screening Eligible Patients by Gender and Race
Gender Race/Ethnicity Total Patients Screening Eligible Count Percent Screening Eligible
Female Black 2,808 114 4.06
Female Latinx 805 11 1.37
Female White 431 21 4.87
Male Black 1,812 98 5.41
Male Latinx 813 28 3.44
Male White 526 32 6.08

2.5 Patients who are Current or Former Smokers

# CREATE A DF WITH ALL SMOKERS
n_smoker = 
        lung %>%
        filter(smokingstatus >= 2) 
count(n_smoker)
## # A tibble: 1 × 1
##       n
##   <int>
## 1  4138
# n = 4,138

count(n_smoker) / count(lung) * 100
##          n
## 1 57.48819

Of the 7,198 patients in the study, 4,138 (57.49%) are current or former smokers.

2.5.1 Patients who are Current or Former Smokers By Gender

Counts of current or former smokers by gender

# patients who are smokers by gender
smoker_gender_table =
  lung %>%
  filter(gender %in% c("Female", "Male")) %>% # only include Female or Male genders
  group_by(gender) %>%
  summarize(Total = n(),
            Smoker_Count = sum(smokingstatus >= 2),
            Percent_Smoker = Smoker_Count / Total * 100)

colnames(smoker_gender_table) <- c("Gender", "Total Patients", "Current or Former Smoker Count", "Percent Smokers")

# table
smoker_gender_table %>%
  kable(align = "lllll", 
        caption = "Patients who are Current or Former Smokers by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patients who are Current or Former Smokers by Gender
Gender Total Patients Current or Former Smoker Count Percent Smokers
Female 4,044 2,076 51.34
Male 3,151 2,061 65.41

What percentage of current or former smokers within each gender group are eligible for the lung cancer screening?

smoker_gender_per_table <- lung %>%
  filter(smokingstatus >= 2) %>%
  filter(gender %in% c("Female", "Male")) %>%
  group_by(gender) %>%
  summarize("Current or Former Smoker Count" = n(),
            "Smokers Screening Eligible Count" = sum(screen_elig == 1, na.rm = TRUE),
            "Percent Smokers Screening Eligible" = mean(screen_elig == 1, na.rm = TRUE) * 100)

colnames(smoker_gender_per_table) <- c("Gender", "Current or Former Smoker Count", "Smokers Screening Eligible Count", "Percent Smokers Screening Eligible")

# table
smoker_gender_per_table %>%
  kable(align = "lllll", 
        caption = "Percentage of Current and Former Smokers Eligible for the Lung Cancer Screening by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Current and Former Smokers Eligible for the Lung Cancer Screening by Gender
Gender Current or Former Smoker Count Smokers Screening Eligible Count Percent Smokers Screening Eligible
Female 2,076 146 7.03
Male 2,061 158 7.67

2.5.2 Patients who are Current or Former Smokers By Race

Counts of current or former smokers by race

# patients who are smokers by race/ethnicity
smoker_race_table = 
  lung %>%
  group_by(raceethnic_cat) %>%
  summarize(Total = n(),
            Smoker_Count = sum(smokingstatus >= 2),
            Percent_Smoker = Smoker_Count / Total * 100)

colnames(smoker_race_table) <- c("Race/Ethnicity", "Total Patients", "Current or Former Smoker Count", "Percent Smokers")
# table
smoker_race_table %>%
  kable(align = "lllll", 
        caption = "Patients who are Current or Former Smokers by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patients who are Current or Former Smokers by Race
Race/Ethnicity Total Patients Current or Former Smoker Count Percent Smokers
Black 4,622 2,850 61.66
Latinx 1,619 717 44.29
White 957 571 59.67

What percentage of current or former smokers of within each racial/ethnic group are eligible for the lung cancer screening?

smoker_race_per_table =
  lung %>%
  group_by(raceethnic_cat) %>%
  summarize(
    Total = n(),
    Smoker_Count = sum(smokingstatus >= 2),
    Smoker_Eligible_Count = sum(screen_elig == 1, na.rm = TRUE), 
    per_eligible = Smoker_Eligible_Count / Smoker_Count * 100)
colnames(smoker_race_per_table) <- c("Race/Ethnicity", "Total Patients", "Current or Former Smoker Count", "Smokers Screening Eligible Count", "Percent Smokers Screening Eligible")

# table
smoker_race_per_table %>%
  kable(align = "lllll", 
        caption = "Percentage of Current and Former Smokers Eligible for the Lung Cancer Screening by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Current and Former Smokers Eligible for the Lung Cancer Screening by Race
Race/Ethnicity Total Patients Current or Former Smoker Count Smokers Screening Eligible Count Percent Smokers Screening Eligible
Black 4,622 2,850 212 7.44
Latinx 1,619 717 39 5.44
White 957 571 53 9.28

2.5.3 By Gender & Race

Counts of current or former smokers by gender and race

smoker_racegen_table =
  lung %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total = n(),
    Smoker_Count = sum(smokingstatus >= 2),
    per_smoker = Smoker_Count / Total * 100)

colnames(smoker_racegen_table) <- c("Gender", "Race/Ethnicity", "Total Patients", "Current or Former Smoker Count", "Percent Smokers")
# table
smoker_racegen_table %>%
  kable(
        align = "lllll", 
        caption = "Patients who are Current or Former Smokers by Gender and Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patients who are Current or Former Smokers by Gender and Race
Gender Race/Ethnicity Total Patients Current or Former Smoker Count Percent Smokers
Female Black 2,808 1,582 56.34
Female Latinx 805 255 31.68
Female White 431 239 55.45
Male Black 1,812 1,267 69.92
Male Latinx 813 462 56.83
Male White 526 332 63.12

What percentage of current or former smokers of within each racial/ethnic and gender group are eligible for the lung cancer screening?

smoker_racegen_per_table =
  lung %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total = n(),
    Smoker_Count = sum(smokingstatus >= 2),
    Smoker_Eligible_Count = sum(screen_elig == 1, na.rm = TRUE), 
    per_eligible = Smoker_Eligible_Count / Smoker_Count * 100)

colnames(smoker_racegen_per_table) <- c("Gender", "Race/Ethnicity", "Total Patients", "Current or Former Smoker Count", "Smokers Screening Eligible Count", "Percent Smokers Screening Eligible")

#table
smoker_racegen_per_table %>%
  kable(align = "lllll", 
        caption = "Percentage of Current and Former Smokers Eligible for the Lung Cancer Screening by Gender & Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Current and Former Smokers Eligible for the Lung Cancer Screening by Gender & Race
Gender Race/Ethnicity Total Patients Current or Former Smoker Count Smokers Screening Eligible Count Percent Smokers Screening Eligible
Female Black 2,808 1,582 114 7.21
Female Latinx 805 255 11 4.31
Female White 431 239 21 8.79
Male Black 1,812 1,267 98 7.73
Male Latinx 813 462 28 6.06
Male White 526 332 32 9.64

3 Question 3: What percentage of patients who met screening guidelines were diagnosed with lung cancer?

# VARIABLE = malignanto (0 = no; 1 = yes)
diag_n = 
  lung %>%
  filter(malignanto == 1) 
count(diag_n)
## # A tibble: 1 × 1
##       n
##   <int>
## 1   707
# in total, 707 patients were diagnosed with lung cancer

lung %>%
  filter(screen_elig == 1 & malignanto == 1) %>%
  count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1    33
# Of those eligible for the screening, 33 were diagnosed with lung cancer out of the 304 eligible in total.
# 10.85% of patients who met lung cancer screening guidelines were diagnosed with lung cancer.

In total, 707 patients were diagnosed with lung cancer. Of those eligible for the screening, 10.85% (33 patients) were diagnosed with lung cancer.

4 Question 4: Are there racial/ethnic and gender differences in the relationship between meeting screening eligibility criteria and the development of lung cancer?

Counts of patients who met screening guidelines who were diagnosed with lung cancer.

4.1 Lung Cancer Diagnosis of Screening Eligible Patients by Gender

# by gender
diag_eligg_table = 
  lung %>%
  filter(screen_elig == 1) %>%
  group_by(gender) %>%
  summarize(Total = n(),
            diag_screenelig = sum(malignanto == 1, na.rm = TRUE),
            per_diagse = diag_screenelig / Total * 100)

colnames(diag_eligg_table) <- c("Gender", "Screening Eligible Count", "Diagnosed Screening Eligible Count", "Percent Screening Eligible Diagnosed")
# table
diag_eligg_table %>%
  kable(align = "lllll", 
        caption = "Screening Eligible Patients Diagnosed with Lung Cancer by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Patients Diagnosed with Lung Cancer by Gender
Gender Screening Eligible Count Diagnosed Screening Eligible Count Percent Screening Eligible Diagnosed
Female 146 19 13.01
Male 158 14 8.86

4.2 Lung Cancer Diagnosis of Screening Eligible Patients by Race

diag_eligr_table = 
  lung %>%
  filter(screen_elig == 1) %>%
  group_by(raceethnic_cat) %>%
  summarize(Total = n(),
            diag_screenelig = sum(malignanto == 1, na.rm = TRUE),
            per_diagse = diag_screenelig / Total * 100)
colnames(diag_eligr_table) <- c("Race/Ethnicity", "Screening Eligible Count", "Diagnosed Screening Eligible Count", "Percent Screening Eligible Diagnosed")
diag_eligr_table %>%
  kable(align = "lllll", 
        caption = "Screening Eligible Patients Diagnosed with Lung Cancer by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Patients Diagnosed with Lung Cancer by Race
Race/Ethnicity Screening Eligible Count Diagnosed Screening Eligible Count Percent Screening Eligible Diagnosed
Black 212 25 11.79
Latinx 39 5 12.82
White 53 3 5.66

4.3 Lung Cancer Diagnosis of Screening Eligible Patients by Gender & Race

diag_elig_genra_table = 
  lung %>%
  filter(screen_elig == 1) %>%
  group_by(gender, raceethnic_cat) %>%
  summarize(Total = n(),
            diag_screenelig = sum(malignanto == 1, na.rm = TRUE),
            per_diagse = diag_screenelig / Total * 100)
colnames(diag_elig_genra_table) <- c("Gender", "Race/Ethnicity", "Screening Eligible Count", "Diagnosed Screening Eligible Count", "Percent Screening Eligible Diagnosed")

diag_elig_genra_table %>%
  kable(align = "lllll", 
        caption = "Screening Eligible Patients Diagnosed with Lung Cancer by Gender & Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Patients Diagnosed with Lung Cancer by Gender & Race
Gender Race/Ethnicity Screening Eligible Count Diagnosed Screening Eligible Count Percent Screening Eligible Diagnosed
Female Black 114 14 12.28
Female Latinx 11 3 27.27
Female White 21 2 9.52
Male Black 98 11 11.22
Male Latinx 28 2 7.14
Male White 32 1 3.12

5 Question 5: What % of patients who did not meet the criteria were diagnosed with lung cancer?

Count of patients who are current or former smokers who did not meet screening guidelines who were diagnosed with lung cancer.

n_inelig_diag = 
  lung %>%
  filter(screen_elig == 0 & smokingstatus >= 2 & malignanto == 1)
count(n_inelig_diag)
## # A tibble: 1 × 1
##       n
##   <int>
## 1   492

492 current or former smoker patients who did not meet the screening criteria were diagnosed with lung cancer.

% of patients who did not meet the criteria were diagnosed with lung cancer.

count(n_inelig_diag)/count(n_smoker) * 100
##         n
## 1 11.8898

11.89% of current or former patients who did NOT meet screening eligibility guidelines were diagnosed with lung cancer.

6 Question 6: Are there differences in the rate of diagnosis of lung cancer between Black people and Whites who do not meet screening guidelines. Does gender and exposure to violence play a role in these relationships?

6.1 Differences by Race

6.1.1 Screening Ineligible Smokers by Race/Ethnicity

# ineligible smokers by race
inelig_smoker_race_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(raceethnic_cat) %>%
  summarise(Total = n(),
            screenINelig_smoker = sum(screen_elig == 0, na.rm = TRUE),
            per_ineligsmokers = screenINelig_smoker / Total * 100) 

colnames(inelig_smoker_race_count) <- c("Race/Ethnicity", "Smokers Count", "Screening Ineligible Current or Former Smoker Count", "Percent Screening Ineligible Smokers")

inelig_smoker_race_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers by Race
Race/Ethnicity Smokers Count Screening Ineligible Current or Former Smoker Count Percent Screening Ineligible Smokers
Black 2,850 2,638 92.56
Latinx 717 678 94.56
White 571 518 90.72

6.1.2 Screening Ineligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity

# ineligible diagnosed smokers by race
ineligible_cancerr_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(raceethnic_cat) %>%
  summarise(Total = n(),
            diag_n = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (diag_n / Total * 100),
            screenINelig_smoker = sum(screen_elig == 0 & smokingstatus >= 2, na.rm = TRUE),
            diag_screenINelig = sum(screen_elig == 0 & smokingstatus >= 2 & malignanto == 1, na.rm = TRUE),
            per_diagse = diag_screenINelig / screenINelig_smoker * 100) 

colnames(ineligible_cancerr_count) <- c("Race/Ethnicity", "Smoker Count", "Smokers Diagnosed with Lung Cancer Count", "Percent Diagnosed", "Screening Ineligible Smoker Count", "Screening Ineligible Smokers Diagnosed", "Percent Screening Ineligible Smokers Diagnosed")

ineligible_cancerr_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Race
Race/Ethnicity Smoker Count Smokers Diagnosed with Lung Cancer Count Percent Diagnosed Screening Ineligible Smoker Count Screening Ineligible Smokers Diagnosed Percent Screening Ineligible Smokers Diagnosed
Black 2,850 387 13.58 2,638 362 13.72
Latinx 717 59 8.23 678 54 7.96
White 571 79 13.84 518 76 14.67

6.2 Differences by Gender

6.2.1 Screening Ineligible Smokers by Gender

# ineligible smokers by gender
inelig_smoker_g_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarise(Total = n(),
            screenINelig_smoker = sum(screen_elig == 0, na.rm = TRUE),
            per_ineligsmokers = screenINelig_smoker / Total * 100) 

colnames(inelig_smoker_g_count) <- c("Gender", "Smokers Count", "Screening Ineligible Current or Former Smoker Count", "Percent Screening Ineligible Smokers")

inelig_smoker_g_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers by Gender
Gender Smokers Count Screening Ineligible Current or Former Smoker Count Percent Screening Ineligible Smokers
Female 2,076 1,930 92.97
Male 2,061 1,903 92.33

6.2.2 Screening Ineligible Smokers Diagnosed with Lung Cancer by Gender

# ineligible diagnosed smokers by gender
ineligible_cancerg_count =
  lung %>%
  group_by(gender) %>%
  filter(smokingstatus >= 2) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarise(Total = n(),
            diag_n = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (diag_n / Total * 100),
            screenINelig_smoker = sum(screen_elig == 0 & smokingstatus >= 2, na.rm = TRUE),
            diag_screenINelig = sum(screen_elig == 0 & smokingstatus >= 2 & malignanto == 1, na.rm = TRUE),
            per_diagse = diag_screenINelig / screenINelig_smoker * 100) 

colnames(ineligible_cancerg_count) <- c("Gender", "Smoker Count", "Smokers Diagnosed with Lung Cancer Count", "Percent Smokers Diagnosed", "Screening Ineligible Smokers Count", "Screening Ineligible Smokers Diagnosed", "Percent Screening Ineligible Smokers Diagnosed")

ineligible_cancerg_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender
Gender Smoker Count Smokers Diagnosed with Lung Cancer Count Percent Smokers Diagnosed Screening Ineligible Smokers Count Screening Ineligible Smokers Diagnosed Percent Screening Ineligible Smokers Diagnosed
Female 2,076 264 12.72 1,930 245 12.69
Male 2,061 261 12.66 1,903 247 12.98

6.3 Differences by Gender & Race

6.3.1 Screening Ineligible Smokers by Gender & Race

# ineligible smokers by gender and race
inelig_smoker_rg_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarise(Total = n(),
            screenINelig_smoker = sum(screen_elig == 0 & smokingstatus >= 2, na.rm = TRUE),
            per_ineligsmokers = screenINelig_smoker / Total * 100) 

colnames(inelig_smoker_rg_count) <- c("Gender", "Race/Ethnicity", "Smoker Count", "Screening Ineligible Smokers Count", "Percent Screening Ineligible Smokers")

inelig_smoker_rg_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers by Gender & Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers by Gender & Race
Gender Race/Ethnicity Smoker Count Screening Ineligible Smokers Count Percent Screening Ineligible Smokers
Female Black 1,582 1,468 92.79
Female Latinx 255 244 95.69
Female White 239 218 91.21
Male Black 1,267 1,169 92.27
Male Latinx 462 434 93.94
Male White 332 300 90.36

6.3.2 Screening Ineligible Smokers Diagnosed with Lung Cancer by Gender & Race

# ineligible diagnosed smokers by gender and race
ineligible_cancerg_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarise(Total = n(),
            diag_n = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (diag_n / Total * 100),
            screenINelig_smoker = sum(screen_elig == 0 & smokingstatus >= 2, na.rm = TRUE),
            diag_screenINelig = sum(screen_elig == 0 & smokingstatus >= 2 & malignanto == 1, na.rm = TRUE),
            per_diagse = diag_screenINelig / screenINelig_smoker * 100) 

colnames(ineligible_cancerg_count) <- c("Gender", "Race/Ethnicity", "Smoker Count", "Smokers Diagnosed with Lung Cancer Count", "Percent Smokers Diagnosed", "Screening Ineligible Smoker Count", "Screening Ineligible Smokers Diagnosed", "Percent Screening Ineligible Smokers Diagnosed")

ineligible_cancerg_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race
Gender Race/Ethnicity Smoker Count Smokers Diagnosed with Lung Cancer Count Percent Smokers Diagnosed Screening Ineligible Smoker Count Screening Ineligible Smokers Diagnosed Percent Screening Ineligible Smokers Diagnosed
Female Black 1,582 206 13.02 1,468 192 13.08
Female Latinx 255 24 9.41 244 21 8.61
Female White 239 34 14.23 218 32 14.68
Male Black 1,267 181 14.29 1,169 170 14.54
Male Latinx 462 35 7.58 434 33 7.60
Male White 332 45 13.55 300 44 14.67

6.4 Differences by Exposure to Violence

6.5 Race & Low Exposure to Violence

6.5.1 Screening eligible current or former smokers diagnosed with lung cancer by race and low exposure to violence

elig_smoker_lowvio_r_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(raceethnic_cat) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_elig = sum(screen_elig == 1),
    smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
    lowexpose_vio = sum(homicidegtmean2 == "0", na.rm = TRUE),
    lowexpose_vio_diag = sum(homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_ldiag = (lowexpose_vio_diag / lowexpose_vio * 100),
    lowelig_vio = sum(screen_elig == 1 & homicidegtmean2 == "0", na.rm = TRUE),
    elig_expose_lowvio_diag = sum(screen_elig == 1 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_diag_lowelig_vio = elig_expose_lowvio_diag / lowelig_vio * 100)

colnames(elig_smoker_lowvio_r_count) <- c(
                                            "Race/Ethnicity",
                                           "Current or Former Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Eligible", 
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Smokers Screening Eligible with Low Exposure to Violence",
                                           "Smokers Screening Eligible with Low Exposure to Violence Diagnosed",
                                           "% Smokers Screening Eligible with Low Exposure to Violence
                                           Diagnosed")

# table
elig_smoker_lowvio_r_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Race with Low Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Race with Low Exposure to Violence
Race/Ethnicity Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Eligible Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with Low Exposure to Violence Smokers with Low Exposure to Violence Diagnosed % Smokers with Low Exposure to Violence Diagnosed Smokers Screening Eligible with Low Exposure to Violence Smokers Screening Eligible with Low Exposure to Violence Diagnosed % Smokers Screening Eligible with Low Exposure to Violence Diagnosed
Black 2,850 387 13.58 212 25 11.79 1,283 148 11.54 101 8 7.92
Latinx 717 59 8.23 39 5 12.82 662 55 8.31 37 4 10.81
White 571 79 13.84 53 3 5.66 532 73 13.72 51 3 5.88

6.5.2 Race & High Exposure to Violence

6.5.3 Screening ineligible current or former smokers diagnosed with lung cancer by race and high exposure to violence

# ineligible smokers by race with high exposure to violence
inelig_smokervio_race_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(raceethnic_cat) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_inelig = sum(screen_elig == 0, na.rm = TRUE),
    smoker_inelig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
    per_diag2 = (smoker_inelig_diag / total_smoker_inelig * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_ldiag = (expose_vio_diag / expose_vio * 100),
    inelig_vio = sum(screen_elig == 0 & homicidegtmean2 == "1", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokervio_race_count) <- c("Race/Ethnicity", 
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Ineligible", 
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Smokers Screening Ineligible with High Exposure to Violence",
                                           "Smokers Screening Ineligible with High Exposure to Violence Diagnosed",
                                           "% Smokers Screening Ineligible with High Exposure to Violence
                                           Diagnosed")

# table
inelig_smokervio_race_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Race with High Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Race with High Exposure to Violence
Race/Ethnicity Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Ineligible Smokers Screening Ineligible Diagnosed % Ineligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Smokers Screening Ineligible with High Exposure to Violence Smokers Screening Ineligible with High Exposure to Violence Diagnosed % Smokers Screening Ineligible with High Exposure to Violence Diagnosed
Black 2,850 387 13.58 2,638 362 13.72 1,565 239 15.27 1,454 222 15.27
Latinx 717 59 8.23 678 54 7.96 54 4 7.41 52 3 5.77
White 571 79 13.84 518 76 14.67 36 6 16.67 35 6 17.14

6.6 Gender & Low Exposure to Violence

6.6.1 Screening eligible current or former smokers diagnosed with lung cancer by gender and low exposure to violence

elig_smoker_lowvio_g_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
    smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
    lowexpose_vio = sum(homicidegtmean2 == "0", na.rm = TRUE),
    lowexpose_vio_diag = sum(homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_ldiag = (lowexpose_vio_diag / lowexpose_vio * 100),
    lowelig_vio = sum(screen_elig == 1 & homicidegtmean2 == "0", na.rm = TRUE),
    elig_expose_lowvio_diag = sum(screen_elig == 1 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_diag_lowelig_vio = elig_expose_lowvio_diag / lowelig_vio * 100)

colnames(elig_smoker_lowvio_g_count) <- c(
                                            "Gender",
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Eligible", 
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Smokers Screening Eligible with Low Exposure to Violence",
                                           "Smokers Screening Eligible with Low Exposure to Violence Diagnosed",
                                           "% Smokers Screening Eligible with Low Exposure to Violence
                                           Diagnosed")

# table
elig_smoker_lowvio_g_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender with Low Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender with Low Exposure to Violence
Gender Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Eligible Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with Low Exposure to Violence Smokers with Low Exposure to Violence Diagnosed % Smokers with Low Exposure to Violence Diagnosed Smokers Screening Eligible with Low Exposure to Violence Smokers Screening Eligible with Low Exposure to Violence Diagnosed % Smokers Screening Eligible with Low Exposure to Violence Diagnosed
Female 2,076 264 12.72 146 19 13.01 1,154 126 10.92 84 8 9.52
Male 2,061 261 12.66 158 14 8.86 1,322 150 11.35 105 7 6.67

6.7 Gender & High Exposure to Violence

6.7.1 Screening ineligible current or former smokers diagnosed with lung cancer by gender and high exposure to violence

# ineligible smokers by gender with high exposure to violence
inelig_smokervio_gender_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_inelig = sum(screen_elig == 0, na.rm = TRUE),
    smoker_inelig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_inelig_diag / total_smoker_inelig * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_ldiag = (expose_vio_diag / expose_vio * 100),
    inelig_vio = sum(screen_elig == 0 & homicidegtmean2 == "1", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokervio_gender_count) <- c("Gender", 
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Ineligible", 
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Smokers Screening Ineligible with High Exposure to Violence",
                                           "Smokers Screening Ineligible with High Exposure to Violence Diagnosed",
                                           "% Smokers Screening Ineligible with High Exposure to Violence
                                           Diagnosed")

# table
inelig_smokervio_gender_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender with High Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender with High Exposure to Violence
Gender Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Ineligible Smokers Screening Ineligible Diagnosed % Ineligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Smokers Screening Ineligible with High Exposure to Violence Smokers Screening Ineligible with High Exposure to Violence Diagnosed % Smokers Screening Ineligible with High Exposure to Violence Diagnosed
Female 2,076 264 12.72 1,930 245 12.69 920 138 15.0 858 127 14.80
Male 2,061 261 12.66 1,903 247 12.98 735 111 15.1 683 104 15.23

6.8 Gender, Race & Low Exposure to Violence

6.8.1 Screening eligible current or former smokers diagnosed with lung cancer by gender, race, and low exposure to violence

elig_smoker_lowvio_gr_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
    smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
    lowexpose_vio = sum(homicidegtmean2 == "0", na.rm = TRUE),
    lowexpose_vio_diag = sum(homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_ldiag = (lowexpose_vio_diag / lowexpose_vio * 100),
    lowelig_vio = sum(screen_elig == 1 & homicidegtmean2 == "0", na.rm = TRUE),
    elig_expose_lowvio_diag = sum(screen_elig == 1 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_diag_lowelig_vio = elig_expose_lowvio_diag / lowelig_vio * 100)

colnames(elig_smoker_lowvio_gr_count) <- c(
                                            "Gender",
                                            "Race/Ethnicity",
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Eligible", 
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Smokers Screening Eligible with Low Exposure to Violence",
                                           "Smokers Screening Eligible with Low Exposure to Violence Diagnosed",
                                           "% Smokers Screening Eligible with Low Exposure to Violence
                                           Diagnosed")

# table
elig_smoker_lowvio_gr_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race with Low Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race with Low Exposure to Violence
Gender Race/Ethnicity Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Eligible Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with Low Exposure to Violence Smokers with Low Exposure to Violence Diagnosed % Smokers with Low Exposure to Violence Diagnosed Smokers Screening Eligible with Low Exposure to Violence Smokers Screening Eligible with Low Exposure to Violence Diagnosed % Smokers Screening Eligible with Low Exposure to Violence Diagnosed
Female Black 1,582 206 13.02 114 14 12.28 693 72 10.39 53 4 7.55
Female Latinx 255 24 9.41 11 3 27.27 235 21 8.94 10 2 20.00
Female White 239 34 14.23 21 2 9.52 226 33 14.60 21 2 9.52
Male Black 1,267 181 14.29 98 11 11.22 589 76 12.90 48 4 8.33
Male Latinx 462 35 7.58 28 2 7.14 427 34 7.96 27 2 7.41
Male White 332 45 13.55 32 1 3.12 306 40 13.07 30 1 3.33

6.9 Gender, Race & High Exposure to Violence

6.9.1 Screening eligible current or former smokers diagnosed with lung cancer by gender high exposure to violence

elig_smokervio_gender_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
    smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_vdiag = (expose_vio_diag / expose_vio * 100),
    elig_vio = sum(screen_elig == 1 & homicidegtmean2 == "1", na.rm = TRUE),
    elig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100)

colnames(elig_smokervio_gender_count) <- c("Gender", 
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Eligible", 
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Smokers Screening Eligible with High Exposure to Violence",
                                           "Smokers Screening Eligible with High Exposure to Violence Diagnosed",
                                           "% Smokers Screening Eligible with High Exposure to Violence
                                           Diagnosed")

# table
elig_smokervio_gender_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender with High Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender with High Exposure to Violence
Gender Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Eligible Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Smokers Screening Eligible with High Exposure to Violence Smokers Screening Eligible with High Exposure to Violence Diagnosed % Smokers Screening Eligible with High Exposure to Violence Diagnosed
Female 2,076 264 12.72 146 19 13.01 920 138 15.0 62 11 17.74
Male 2,061 261 12.66 158 14 8.86 735 111 15.1 52 7 13.46

6.9.2 Screening eligible current or former smokers diagnosed with lung cancer by race and high exposure to violence

elig_smokervio_race_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(raceethnic_cat) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
    smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_ldiag = (expose_vio_diag / expose_vio * 100),
    elig_vio = sum(screen_elig == 1 & homicidegtmean2 == "1", na.rm = TRUE),
    elig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100)

colnames(elig_smokervio_race_count) <- c("Race/Ethnicity", 
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                         "% Smokers Diagnosed",
                                           "Smokers Screening Eligible", 
                                           "Smokers Screening Eligible Diagnosed", 
                                         "% Eligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                         "% Smokers with High Exposure to Violence Diagnosed",
                                           "Smokers Screening Eligible with High Exposure to Violence",
                                           "Smokers Screening Eligible with High Exposure to Violence Diagnosed",
                                           "% Smokers Screening Eligible with High Exposure to Violence
                                           Diagnosed")

# table
elig_smokervio_race_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Race with High Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Race with High Exposure to Violence
Race/Ethnicity Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Eligible Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Smokers Screening Eligible with High Exposure to Violence Smokers Screening Eligible with High Exposure to Violence Diagnosed % Smokers Screening Eligible with High Exposure to Violence Diagnosed
Black 2,850 387 13.58 212 25 11.79 1,565 239 15.27 111 17 15.32
Latinx 717 59 8.23 39 5 12.82 54 4 7.41 2 1 50.00
White 571 79 13.84 53 3 5.66 36 6 16.67 1 0 0.00

6.9.3 Screening eligible current or former smokers diagnosed with lung cancer by gender, race and high exposure to violence

elig_smokervio_genderr_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
    smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_hdiag = (expose_vio_diag / expose_vio * 100),
    elig_vio = sum(screen_elig == 1 & homicidegtmean2 == "1", na.rm = TRUE),
    elig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100)

colnames(elig_smokervio_genderr_count) <- c("Gender", 
                                            "Race/Ethnicity",
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Eligible", 
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Smokers Screening Eligible with High Exposure to Violence",
                                           "Smokers Screening Eligible with High Exposure to Violence Diagnosed",
                                           "% Smokers Screening Eligible with High Exposure to Violence
                                           Diagnosed")

# table
elig_smokervio_genderr_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race with High Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race with High Exposure to Violence
Gender Race/Ethnicity Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Eligible Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Smokers Screening Eligible with High Exposure to Violence Smokers Screening Eligible with High Exposure to Violence Diagnosed % Smokers Screening Eligible with High Exposure to Violence Diagnosed
Female Black 1,582 206 13.02 114 14 12.28 888 134 15.09 61 10 16.39
Female Latinx 255 24 9.41 11 3 27.27 19 3 15.79 1 1 100.00
Female White 239 34 14.23 21 2 9.52 13 1 7.69 0 0 NaN
Male Black 1,267 181 14.29 98 11 11.22 677 105 15.51 50 7 14.00
Male Latinx 462 35 7.58 28 2 7.14 35 1 2.86 1 0 0.00
Male White 332 45 13.55 32 1 3.12 23 5 21.74 1 0 0.00

6.9.4 Counts of screening ineligible current or former smokers diagnosed with lung cancer by gender, race and high exposure to violence

inelig_smokervio_genderr_count =
  lung %>%
  filter(smokingstatus >= 2) %>%
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarize(
    Total_smoker = n(),
    Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_diag = (Total_smoker_diag / Total_smoker * 100),
    total_smoker_inelig = sum(screen_elig == 0, na.rm = TRUE),
    smoker_inelig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
    per_inelig_diag = (smoker_inelig_diag / total_smoker_inelig * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_hdiag = (expose_vio_diag / expose_vio * 100),
    inelig_vio = sum(screen_elig == 0 & homicidegtmean2 == "1", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokervio_genderr_count) <- c("Gender", 
                                              "Race/Ethnicity",
                                           "Current or Former Smokers", 
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed",
                                           "Smokers Screening Ineligible", 
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Smokers Screening Ineligible with High Exposure to Violence",
                                           "Smokers Screening Ineligible with High Exposure to Violence Diagnosed",
                                           "% Smokers Screening Ineligible with High Exposure to Violence
                                           Diagnosed")

# table
inelig_smokervio_genderr_count %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race with High Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Current or Former Smokers Diagnosed with Lung Cancer by Gender & Race with High Exposure to Violence
Gender Race/Ethnicity Current or Former Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed Smokers Screening Ineligible Smokers Screening Ineligible Diagnosed % Ineligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Smokers Screening Ineligible with High Exposure to Violence Smokers Screening Ineligible with High Exposure to Violence Diagnosed % Smokers Screening Ineligible with High Exposure to Violence Diagnosed
Female Black 1,582 206 13.02 1,468 192 13.08 888 134 15.09 827 124 14.99
Female Latinx 255 24 9.41 244 21 8.61 19 3 15.79 18 2 11.11
Female White 239 34 14.23 218 32 14.68 13 1 7.69 13 1 7.69
Male Black 1,267 181 14.29 1,169 170 14.54 677 105 15.51 627 98 15.63
Male Latinx 462 35 7.58 434 33 7.60 35 1 2.86 34 1 2.94
Male White 332 45 13.55 300 44 14.67 23 5 21.74 22 5 22.73

7 Question 7: What % of non smokers were Diagnosed?

By race/ethnicity and by homicide

7.1 Nonsmokers diagnosed by race/ethnicity

nonsmoker_diag_r =
  lung %>%
  filter(smokingstatus == "1.00", na.rm = TRUE) %>%
  group_by(raceethnic_cat) %>%
  summarize(
    Total_nonsmoker = n(),
    Total_nonsmoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_nondiag = Total_nonsmoker_diag / Total_nonsmoker * 100)

colnames(nonsmoker_diag_r) <- c("Race/Ethnicity", 
                                           "Nonsmoker Count", 
                                           "Nonsmokers Diagnosed with Lung Cancer",
                                           "Percent Nonsmokers Diagnosed")

# table
nonsmoker_diag_r %>%
  kable(
        align = "lllll", 
        caption = "Nonsmokers Diagnosed with Lung Cancer by Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Nonsmokers Diagnosed with Lung Cancer by Race
Race/Ethnicity Nonsmoker Count Nonsmokers Diagnosed with Lung Cancer Percent Nonsmokers Diagnosed
Black 1,268 53 4.18
Latinx 692 27 3.90
White 257 10 3.89

7.2 Nonsmokers diagnosed by race/ethnicity and exposure to violence

nonsmoker_diag_rvio =
  lung %>%
  filter(smokingstatus == "1.00", na.rm = TRUE) %>%
  group_by(raceethnic_cat) %>%
  summarize(
    Total_nonsmoker = n(),
    Total_nonsmoker_diag = sum(malignanto == 1, na.rm = TRUE),
    per_non_diag =  (Total_nonsmoker_diag / Total_nonsmoker * 100),
    expose_lvio = sum(homicidegtmean2 == "0", na.rm = TRUE),
    expose_lvio_diag = sum(homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_diag_lvio = (expose_lvio_diag / expose_lvio * 100),
    expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
    expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_elig_vio = expose_vio_diag / expose_vio * 100)

colnames(nonsmoker_diag_rvio) <- c("Race/Ethnicity", 
                                           "Nonsmoker Count", 
                                           "Nonsmokers Diagnosed with Lung Cancer",
                                   "% Nonsmokers Diagnosed",
                                           "Nonsmokers with Low Exposure to Violence",
                                   "Nonsmokers with Low Exposure to Violence Diagnosed",
                                   "% Nonsmokers with Low Exposure to Violence Diagnosed",
                                           "Nonsmokers with High Exposure to Violence",
                                           "Nonsmokers with High Exposure to Violence Diagnosed",
                                           "% Nonsmokers with High Exposure to Violence Diagnosed")

# table
nonsmoker_diag_rvio %>%
  kable(
        align = "lllll", 
        caption = "Nonsmokers Diagnosed with Lung Cancer by Race Considering Exposure to Violence", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Nonsmokers Diagnosed with Lung Cancer by Race Considering Exposure to Violence
Race/Ethnicity Nonsmoker Count Nonsmokers Diagnosed with Lung Cancer % Nonsmokers Diagnosed Nonsmokers with Low Exposure to Violence Nonsmokers with Low Exposure to Violence Diagnosed % Nonsmokers with Low Exposure to Violence Diagnosed Nonsmokers with High Exposure to Violence Nonsmokers with High Exposure to Violence Diagnosed % Nonsmokers with High Exposure to Violence Diagnosed
Black 1,268 53 4.18 578 22 3.81 689 31 4.50
Latinx 692 27 3.90 652 25 3.83 40 2 5.00
White 257 10 3.89 243 8 3.29 12 1 8.33

8 Question 8: Differences in Smoking Status of Patients

Looking at smoking never (0) versus smoking ever (1)

# create a new variable
lung$smoker_status_binary = ifelse(lung$smokingstatus >= 2, 1, 0)
# check is correct
lung %>% 
  group_by(smoker_status_binary) %>% 
  select(smokingstatus, smoker_status_binary) 
## # A tibble: 7,198 × 2
## # Groups:   smoker_status_binary [2]
##    smokingstatus smoker_status_binary
##    <chr>                        <dbl>
##  1 1.00                             0
##  2 1.00                             0
##  3 3.00                             1
##  4 2.00                             1
##  5 1.00                             0
##  6 3.00                             1
##  7 3.00                             1
##  8 1.00                             0
##  9 1.00                             0
## 10 2.00                             1
## # ℹ 7,188 more rows
# looks good

8.1 Differences in lung cancer diagnosis by smoking status

8.1.1 Smoking status and diagnosis by race/ethnicity

smokerstat_diag_r =
  lung %>% 
  filter(malignanto == 1, na.rm = TRUE) %>% 
  group_by(raceethnic_cat) %>% 
  summarise(
    Total = n(),
    nonsmoker_count = sum(smoker_status_binary == 0, na.rm = TRUE),
    nonsmoker_percent = mean(smoker_status_binary == 0) * 100,
    smoker_count = sum(smoker_status_binary == 1, na.rm = TRUE),
    smoker_percent = mean(smoker_status_binary == 1) * 100
  )

colnames(smokerstat_diag_r) <- c("Race/Ethnicity", 
                                 "Patients Diagnosed with Lung Cancer",
                                 "Never Smoked Diagnosed Count",
                                           "% Never Smoked Diagnosed with Lung Cancer",
                                        "Ever Smoked Diagnosed Count", 
                                           "% Ever Smoked Diagnosed with Lung Cancer")

# table
smokerstat_diag_r %>%
  kable(
        align = "lllll", 
        caption = "Smoking Status and Lung Cancer Diagnosis by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Smoking Status and Lung Cancer Diagnosis by Race/Ethnicity
Race/Ethnicity Patients Diagnosed with Lung Cancer Never Smoked Diagnosed Count % Never Smoked Diagnosed with Lung Cancer Ever Smoked Diagnosed Count % Ever Smoked Diagnosed with Lung Cancer
Black 509 122 23.97 387 76.03
Latinx 99 40 40.40 59 59.60
White 99 20 20.20 79 79.80

8.1.2 Smoking status and diagnosis by gender

smokerstat_diag_g =
  lung %>% 
  filter(malignanto == 1, na.rm = TRUE) %>% 
  group_by(gender) %>% 
  summarise(
    Total = n(),
    nonsmoker_count = sum(smoker_status_binary == 0, na.rm = TRUE),
    nonsmoker_percent = mean(smoker_status_binary == 0) * 100,
    smoker_count = sum(smoker_status_binary == 1, na.rm = TRUE),
    smoker_percent = mean(smoker_status_binary == 1) * 100
  )

colnames(smokerstat_diag_g) <- c("Gender", 
                                 "Patients Diagnosed with Lung Cancer",
                                 "Never Smoked Diagnosed Count",
                                           "% Never Smoked Diagnosed with Lung Cancer",
                                        "Ever Smoked Diagnosed Count", 
                                           "% Ever Smoked Diagnosed with Lung Cancer")

# table
smokerstat_diag_g %>%
  kable(
        align = "lllll", 
        caption = "Smoking Status and Lung Cancer Diagnosis by Gender", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Smoking Status and Lung Cancer Diagnosis by Gender
Gender Patients Diagnosed with Lung Cancer Never Smoked Diagnosed Count % Never Smoked Diagnosed with Lung Cancer Ever Smoked Diagnosed Count % Ever Smoked Diagnosed with Lung Cancer
Female 380 116 30.53 264 69.47
Male 327 66 20.18 261 79.82

8.1.3 Smoking status and diagnosis by gender and race

smokerstat_diag_gr =
  lung %>% 
  filter(malignanto == 1, na.rm = TRUE) %>% 
  group_by(gender, raceethnic_cat) %>%
  filter(gender %in% c("Female", "Male")) %>%
  summarise(
    Total = n(),
    nonsmoker_count = sum(smoker_status_binary == 0, na.rm = TRUE),
    nonsmoker_percent = mean(smoker_status_binary == 0) * 100,
    smoker_count = sum(smoker_status_binary == 1, na.rm = TRUE),
    smoker_percent = mean(smoker_status_binary == 1) * 100
  )

colnames(smokerstat_diag_gr) <- c("Gender", 
                                  "Race/Ethnicity",
                                 "Patients Diagnosed with Lung Cancer",
                                 "Never Smoked Diagnosed Count",
                                           "% Never Smoked Diagnosed with Lung Cancer",
                                        "Ever Smoked Diagnosed Count", 
                                           "% Ever Smoked Diagnosed with Lung Cancer")

# table
smokerstat_diag_gr %>%
  kable(
        align = "lllll", 
        caption = "Smoking Status and Lung Cancer Diagnosis by Gender & Race", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Smoking Status and Lung Cancer Diagnosis by Gender & Race
Gender Race/Ethnicity Patients Diagnosed with Lung Cancer Never Smoked Diagnosed Count % Never Smoked Diagnosed with Lung Cancer Ever Smoked Diagnosed Count % Ever Smoked Diagnosed with Lung Cancer
Female Black 283 77 27.21 206 72.79
Female Latinx 51 27 52.94 24 47.06
Female White 46 12 26.09 34 73.91
Male Black 226 45 19.91 181 80.09
Male Latinx 48 13 27.08 35 72.92
Male White 53 8 15.09 45 84.91

9 Question 9: Look at “packyear” 5-year and 10-year ranges for variables

# Convert column to numeric and leave NAs
lung$packyear2 <- as.numeric(lung$packyear)
lung$packyear2[is.na(lung$packyear2)] <- NA
# create new variables
lung$packyear_range <- cut(
  lung$packyear2, 
  breaks = c(-Inf, 9.9999, 19.9999, Inf), 
  labels = c("0-10", "10-20", "20+"),
  include.lowest = TRUE
)
# check is correct
lung %>% 
  group_by(smokingstatus) %>% 
  select(smokingstatus, packyear2, packyear_range) 
## # A tibble: 7,198 × 3
## # Groups:   smokingstatus [4]
##    smokingstatus packyear2 packyear_range
##    <chr>             <dbl> <fct>         
##  1 1.00               NA   <NA>          
##  2 1.00               NA   <NA>          
##  3 3.00                9.2 0-10          
##  4 2.00               21   20+           
##  5 1.00               NA   <NA>          
##  6 3.00                3.3 0-10          
##  7 3.00               13.3 10-20         
##  8 1.00               NA   <NA>          
##  9 1.00               NA   <NA>          
## 10 2.00               NA   <NA>          
## # ℹ 7,188 more rows
# looks good
# create new variables
lung$packyear_range5 <- cut(
  lung$packyear2, 
  breaks = c(-Inf, 4.9999, 9.9999, 14.9999, 19.9999, Inf), 
  labels = c("0-5", "5-10", "10-15", "15-20", "20+"),
  include.lowest = TRUE
)
# check is correct
lung %>% 
  group_by(smokingstatus) %>% 
  select(smokingstatus, packyear2, packyear_range5) 
## # A tibble: 7,198 × 3
## # Groups:   smokingstatus [4]
##    smokingstatus packyear2 packyear_range5
##    <chr>             <dbl> <fct>          
##  1 1.00               NA   <NA>           
##  2 1.00               NA   <NA>           
##  3 3.00                9.2 5-10           
##  4 2.00               21   20+            
##  5 1.00               NA   <NA>           
##  6 3.00                3.3 0-5            
##  7 3.00               13.3 10-15          
##  8 1.00               NA   <NA>           
##  9 1.00               NA   <NA>           
## 10 2.00               NA   <NA>           
## # ℹ 7,188 more rows
# looks good

9.1 By Gender

9.1.1 Screening Eligible Packyear count ranges by gender of patients

py_screenelig_gender_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(gender %in% c("Female", "Male")) %>%
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(gender, packyear_range) %>%
  summarize("Patient Count" = n(),
            "Screening Eligible Count" = sum(screen_elig == 1, na.rm = TRUE),
            "Percent Screening Eligible" = mean(screen_elig == 1, na.rm = TRUE) * 100)

colnames(py_screenelig_gender_table) <- c("Gender", "Pack Year Range", "Patient Count", "Screening Eligible Count", "Percent Screening Eligible")

# table
py_screenelig_gender_table %>%
  kable(align = "lllll", 
        caption = "Percentage of Smokers Eligible for the Lung Cancer Screening by Gender by Pack Year Avg", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Smokers Eligible for the Lung Cancer Screening by Gender by Pack Year Avg
Gender Pack Year Range Patient Count Screening Eligible Count Percent Screening Eligible
Female 0-10 250 0 0.00
Female 10-20 123 0 0.00
Female 20+ 167 146 87.43
Male 0-10 221 0 0.00
Male 10-20 127 0 0.00
Male 20+ 181 158 87.29

9.1.2 Screening Inligible Packyear count ranges by gender of patients

py_screeninelig_gender_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(gender %in% c("Female", "Male")) %>%
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(gender, packyear_range) %>%
  summarize("Patient Count" = n(),
            "Screening Eligible Count" = sum(screen_elig == 0, na.rm = TRUE),
            "Percent Screening Eligible" = mean(screen_elig == 0, na.rm = TRUE) * 100)

colnames(py_screeninelig_gender_table) <- c("Gender", "Pack Year Range", "Patient Count", "Screening Ineligible Count", "Percent Screening Ineligible")

# table
py_screeninelig_gender_table %>%
  kable(align = "lllll", 
        caption = "Percentage of Smokers Ineligible for the Lung Cancer Screening by Gender by Pack Year Avg", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Smokers Ineligible for the Lung Cancer Screening by Gender by Pack Year Avg
Gender Pack Year Range Patient Count Screening Ineligible Count Percent Screening Ineligible
Female 0-10 250 250 100.00
Female 10-20 123 123 100.00
Female 20+ 167 21 12.57
Male 0-10 221 221 100.00
Male 10-20 127 127 100.00
Male 20+ 181 23 12.71

9.1.3 Patient Counts, Screening Eligibility and Lung Cancer Diagnosis by Gender and Pack Year Avg Ranges

py_screeneligdiag_gender_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(gender %in% c("Female", "Male")) %>%
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(gender, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 1, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100))

colnames(py_screeneligdiag_gender_table) <- c("Gender", "Pack Year Range", "Patient Count", "Screening Eligible", "% Screening Eligible", "Diagnosed with Lung Cancer", "% Diagnosed with Lung Cancer", "Screening Eligible Diagnosed", "% Screening Eligible Diagnosed")

# table
py_screeneligdiag_gender_table %>%
  kable(align = "lllll", 
        caption = "Patient Counts, Screening Eligibility and Lung Cancer Diagnosis by Gender and Pack Year Avg Ranges", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patient Counts, Screening Eligibility and Lung Cancer Diagnosis by Gender and Pack Year Avg Ranges
Gender Pack Year Range Patient Count Screening Eligible % Screening Eligible Diagnosed with Lung Cancer % Diagnosed with Lung Cancer Screening Eligible Diagnosed % Screening Eligible Diagnosed
Female 0-10 250 0 0.00 27 10.80 0 NaN
Female 10-20 123 0 0.00 12 9.76 0 NaN
Female 20+ 167 146 87.43 29 17.37 19 13.01
Male 0-10 221 0 0.00 14 6.33 0 NaN
Male 10-20 127 0 0.00 23 18.11 0 NaN
Male 20+ 181 158 87.29 27 14.92 14 8.86

9.1.4 Patient Counts, Screening Ineligibility and Lung Cancer Diagnosis by Gender and Pack Year Avg Ranges

py_screenieligdiag_gender_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(gender %in% c("Female", "Male")) %>%
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(gender, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 0, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 0, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100))

colnames(py_screenieligdiag_gender_table) <- c("Gender", "Pack Year Range", "Patient Count", "Screening Ineligible", "% Screening Ineligible", "Diagnosed with Lung Cancer", "% Diagnosed with Lung Cancer", "Screening Ineligible Diagnosed", "% Screening Ineligible Diagnosed")

# table
py_screenieligdiag_gender_table %>%
  kable(align = "lllll", 
        caption = "Patient Counts, Screening Ineligibility and Lung Cancer Diagnosis by Gender and Pack Year Avg Ranges", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patient Counts, Screening Ineligibility and Lung Cancer Diagnosis by Gender and Pack Year Avg Ranges
Gender Pack Year Range Patient Count Screening Ineligible % Screening Ineligible Diagnosed with Lung Cancer % Diagnosed with Lung Cancer Screening Ineligible Diagnosed % Screening Ineligible Diagnosed
Female 0-10 250 250 100.00 27 10.80 27 10.80
Female 10-20 123 123 100.00 12 9.76 12 9.76
Female 20+ 167 21 12.57 29 17.37 10 47.62
Male 0-10 221 221 100.00 14 6.33 14 6.33
Male 10-20 127 127 100.00 23 18.11 23 18.11
Male 20+ 181 23 12.71 27 14.92 13 56.52

9.2 By Race/Ethnicity

9.2.1 Screening Eligible Packyear count ranges by race/ethnicity of patients

py_screenelig_r_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize("Patient Count" = n(),
            "Screening Eligible Patient Count" = sum(screen_elig == 1, na.rm = TRUE),
            "Percent Screening Eligible" = mean(screen_elig == 1, na.rm = TRUE) * 100) %>%
  mutate("Percent Patients in Pack Year Range" = prop.table(`Patient Count`) * 100)

colnames(py_screenelig_r_table) <- c("Race/Ethnicity", "Pack Year Range", "Patient Count", "Screening Eligible Patient Count", "Percent Screening Eligible", "Percent Patients in Pack Year Range by Race/Ethnicity")

col_order <- c("Race/Ethnicity", "Pack Year Range", "Patient Count", "Percent Patients in Pack Year Range by Race/Ethnicity", "Screening Eligible Patient Count", "Percent Screening Eligible")
py_screenelig_r_table <- py_screenelig_r_table[, col_order]

# table
py_screenelig_r_table %>%
  kable(align = "llllll", 
        caption = "Percentage of Smokers Eligible for the Lung Cancer Screening by Race/Ethnicity and Pack Year Avg Range", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Smokers Eligible for the Lung Cancer Screening by Race/Ethnicity and Pack Year Avg Range
Race/Ethnicity Pack Year Range Patient Count Percent Patients in Pack Year Range by Race/Ethnicity Screening Eligible Patient Count Percent Screening Eligible
Black 0-10 355 46.53 0 0.00
Black 10-20 170 22.28 0 0.00
Black 20+ 238 31.19 212 89.08
Latinx 0-10 78 49.37 0 0.00
Latinx 10-20 36 22.78 0 0.00
Latinx 20+ 44 27.85 39 88.64
White 0-10 38 25.68 0 0.00
White 10-20 44 29.73 0 0.00
White 20+ 66 44.59 53 80.30

9.2.2 Screening Ineligible Packyear count ranges by race/ethnicity of patients

py_screenielig_r_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize("Patient Count" = n(),
            "Screening Ineligible Patient Count" = sum(screen_elig == 0, na.rm = TRUE),
            "Percent Screening Ineligible" = mean(screen_elig == 0, na.rm = TRUE) * 100) %>%
  mutate("Percent Patients in Pack Year Range" = prop.table(`Patient Count`) * 100)

colnames(py_screenielig_r_table) <- c("Race/Ethnicity", "Pack Year Range", "Patient Count", "Screening Ineligible Patient Count", "Percent Screening Ineligible", "Percent Patients in Pack Year Range by Race/Ethnicity")

col_order2 <- c("Race/Ethnicity", "Pack Year Range", "Patient Count", "Percent Patients in Pack Year Range by Race/Ethnicity", "Screening Ineligible Patient Count", "Percent Screening Ineligible")
py_screenielig_r_table <- py_screenielig_r_table[, col_order2]

# table
py_screenielig_r_table %>%
  kable(align = "llllll", 
        caption = "Percentage of Smokers Ineligible for the Lung Cancer Screening by Race/Ethnicity and Pack Year Avg Range", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Percentage of Smokers Ineligible for the Lung Cancer Screening by Race/Ethnicity and Pack Year Avg Range
Race/Ethnicity Pack Year Range Patient Count Percent Patients in Pack Year Range by Race/Ethnicity Screening Ineligible Patient Count Percent Screening Ineligible
Black 0-10 355 46.53 355 100.00
Black 10-20 170 22.28 170 100.00
Black 20+ 238 31.19 26 10.92
Latinx 0-10 78 49.37 78 100.00
Latinx 10-20 36 22.78 36 100.00
Latinx 20+ 44 27.85 5 11.36
White 0-10 38 25.68 38 100.00
White 10-20 44 29.73 44 100.00
White 20+ 66 44.59 13 19.70

9.2.3 Patient Counts, Screening Eligibility and Lung Cancer Diagnosis by Race/Ethnicity and Pack Year Avg Ranges

py_screeneligdiag_race_table <- 
  lung %>%
  filter(smokingstatus >= 2, na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 1, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100)) %>%
  mutate("% Patients in Pack Year Range by Race/Ethnicity" = Total_smoker / sum(Total_smoker) * 100)
  # Reorder columns

colnames(py_screeneligdiag_race_table) <- c("Race/Ethnicity", "Pack Year Range", "Patient Count", "Screening Eligible Patients", "% Screening Eligible", "Patients Diagnosed with Lung Cancer", "% Diagnosed with Lung Cancer", "Screening Eligible Diagnosed", "% Screening Eligible Diagnosed", "% Patients in Pack Year Range by Race/Ethnicity")

col_order3 <- c("Race/Ethnicity", "Pack Year Range", "Patient Count", "% Patients in Pack Year Range by Race/Ethnicity", "Screening Eligible Patients", "% Screening Eligible", "Patients Diagnosed with Lung Cancer", "% Diagnosed with Lung Cancer", "Screening Eligible Diagnosed", "% Screening Eligible Diagnosed")
py_screeneligdiag_race_table <- py_screeneligdiag_race_table[, col_order3]

# Replace NaN values with 0 in "% Screening Eligible Diagnosed" column
py_screeneligdiag_race_table$`% Screening Eligible Diagnosed` <- replace(py_screeneligdiag_race_table$`% Screening Eligible Diagnosed`, !is.finite(py_screeneligdiag_race_table$`% Screening Eligible Diagnosed`), 0)

py_screeneligdiag_race_table %>%
  kable(align = "llllllll", 
        caption = "Patient Counts, Screening Eligibility and Lung Cancer Diagnosis by Race/Ethnicity and Pack Year Avg Ranges", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patient Counts, Screening Eligibility and Lung Cancer Diagnosis by Race/Ethnicity and Pack Year Avg Ranges
Race/Ethnicity Pack Year Range Patient Count % Patients in Pack Year Range by Race/Ethnicity Screening Eligible Patients % Screening Eligible Patients Diagnosed with Lung Cancer % Diagnosed with Lung Cancer Screening Eligible Diagnosed % Screening Eligible Diagnosed
Black 0-10 355 46.53 0 0.00 37 10.42 0 0.00
Black 10-20 170 22.28 0 0.00 23 13.53 0 0.00
Black 20+ 238 31.19 212 89.08 42 17.65 25 11.79
Latinx 0-10 78 49.37 0 0.00 3 3.85 0 0.00
Latinx 10-20 36 22.78 0 0.00 5 13.89 0 0.00
Latinx 20+ 44 27.85 39 88.64 7 15.91 5 12.82
White 0-10 38 25.68 0 0.00 1 2.63 0 0.00
White 10-20 44 29.73 0 0.00 7 15.91 0 0.00
White 20+ 66 44.59 53 80.30 7 10.61 3 5.66

9.2.4 Smoker Patient Counts, Screening Ineligibility and Lung Cancer Diagnosis by Race/Ethnicity and Pack Year Avg Ranges

py_screenieligdiag_race_table <- 
  lung %>%
  filter(smokingstatus >= 2, na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_inelig = sum(screen_elig == 0, na.rm = TRUE),
            per_screen_inelig = mean(screen_elig == 0, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 0, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_inelig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_inelig_diag / total_smoker_inelig * 100)) %>%
  mutate("% Smokers in Pack Year Range by Race/Ethnicity" = Total_smoker / sum(Total_smoker) * 100)
  # Reorder columns

colnames(py_screenieligdiag_race_table) <- c("Race/Ethnicity", "Pack Year Range", "Former or Current Smoker Count", "Screening Ineligible Smoker Count", "% Screening Ineligible Smokers", "Smokers Diagnosed with Lung Cancer", "% Smokers Diagnosed with Lung Cancer", "Screening Ineligible Smokers Diagnosed", "% Screening Ineligible Smokers Diagnosed", "% Smokers in Pack Year Range by Race/Ethnicity")

col_order4 <- c("Race/Ethnicity", "Pack Year Range", "Former or Current Smoker Count", "% Smokers in Pack Year Range by Race/Ethnicity", "Screening Ineligible Smoker Count", "% Screening Ineligible Smokers", "Smokers Diagnosed with Lung Cancer", "% Smokers Diagnosed with Lung Cancer", "Screening Ineligible Smokers Diagnosed", "% Screening Ineligible Smokers Diagnosed")

py_screenieligdiag_race_table <- py_screenieligdiag_race_table[, col_order4]


py_screenieligdiag_race_table %>%
  kable(align = "llllllll", 
        caption = "Patient Counts, Screening Ineligibility and Lung Cancer Diagnosis by Race/Ethnicity and Pack Year Avg Ranges", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Patient Counts, Screening Ineligibility and Lung Cancer Diagnosis by Race/Ethnicity and Pack Year Avg Ranges
Race/Ethnicity Pack Year Range Former or Current Smoker Count % Smokers in Pack Year Range by Race/Ethnicity Screening Ineligible Smoker Count % Screening Ineligible Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed with Lung Cancer Screening Ineligible Smokers Diagnosed % Screening Ineligible Smokers Diagnosed
Black 0-10 355 46.53 355 100.00 318 89.58 37 10.42
Black 10-20 170 22.28 170 100.00 147 86.47 23 13.53
Black 20+ 238 31.19 26 10.92 196 82.35 17 65.38
Latinx 0-10 78 49.37 78 100.00 75 96.15 3 3.85
Latinx 10-20 36 22.78 36 100.00 31 86.11 5 13.89
Latinx 20+ 44 27.85 5 11.36 37 84.09 2 40.00
White 0-10 38 25.68 38 100.00 37 97.37 1 2.63
White 10-20 44 29.73 44 100.00 37 84.09 7 15.91
White 20+ 66 44.59 13 19.70 59 89.39 4 30.77

9.3 Screening eligibility, diagnosis, and exposure to violence by race/ethnicity and avg pack year range

9.3.1 Screening eligible smokers diagnosed with lung cancer by race/ethnicity and high exposure to violence by avg pack year range

py_screenelig_grhivio_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 1, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
            expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
            expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
            per_hdiag = (expose_vio_diag / expose_vio * 100),
            elig_vio = sum(screen_elig == 1 & homicidegtmean2 == "1", na.rm = TRUE),
            elig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
            per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100) %>%
  mutate("% Smokers in Pack Year Range by Race/Ethnicity" = Total_smoker / sum(Total_smoker) * 100)

colnames(py_screenelig_grhivio_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                                           "Screening Eligible Smokers", 
                                           "% Screening Eligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Screening Eligible Smokers High Exposure to Violence",
                                           "Screening Eligible Smokers High Exposure to Violence Diagnosed",
                                           "% Screening Eligible Smokers High Exposure to Violence Diagnosed",
                                           "% Smokers in Pack Year Range by Race/Ethnicity")

col_order5 <- c("Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                "% Smokers in Pack Year Range by Race/Ethnicity",
                                           "Screening Eligible Smokers", 
                                           "% Screening Eligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Eligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Screening Eligible Smokers High Exposure to Violence",
                                           "Screening Eligible Smokers High Exposure to Violence Diagnosed",
                                           "% Screening Eligible Smokers High Exposure to Violence Diagnosed")
py_screenelig_grhivio_table <- py_screenelig_grhivio_table[, col_order5]

# Replace NaN values with 0 in "% Eligible Smokers Diagnosed" column
py_screenelig_grhivio_table$`% Eligible Smokers Diagnosed` <- replace(py_screenelig_grhivio_table$`% Eligible Smokers Diagnosed`, !is.finite(py_screenelig_grhivio_table$`% Eligible Smokers Diagnosed`), 0)

# Replace NaN values with 0 in "% Screening Eligible Smokers High Exposure to Violence Diagnosed" column
py_screenelig_grhivio_table$`% Screening Eligible Smokers High Exposure to Violence Diagnosed` <- replace(py_screenelig_grhivio_table$`% Screening Eligible Smokers High Exposure to Violence Diagnosed`, !is.finite(py_screenelig_grhivio_table$`% Screening Eligible Smokers High Exposure to Violence Diagnosed`), 0)

# table
py_screenelig_grhivio_table %>%
  kable(
        align = "lllll", 
        caption = "Exploring Screening Eligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with High Exposure to Violence (by avg pack year range)", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Exploring Screening Eligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with High Exposure to Violence (by avg pack year range)
Race/Ethnicity Pack Year Range Smoker Count % Smokers in Pack Year Range by Race/Ethnicity Screening Eligible Smokers % Screening Eligible Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed with Lung Cancer Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Screening Eligible Smokers High Exposure to Violence Screening Eligible Smokers High Exposure to Violence Diagnosed % Screening Eligible Smokers High Exposure to Violence Diagnosed
Black 0-10 355 46.53 0 0.00 37 10.42 0 0.00 194 20 10.31 0 0 0.00
Black 10-20 170 22.28 0 0.00 23 13.53 0 0.00 95 13 13.68 0 0 0.00
Black 20+ 238 31.19 212 89.08 42 17.65 25 11.79 130 27 20.77 111 17 15.32
Latinx 0-10 78 49.37 0 0.00 3 3.85 0 0.00 9 1 11.11 0 0 0.00
Latinx 10-20 36 22.78 0 0.00 5 13.89 0 0.00 2 1 50.00 0 0 0.00
Latinx 20+ 44 27.85 39 88.64 7 15.91 5 12.82 3 1 33.33 2 1 50.00
White 0-10 38 25.68 0 0.00 1 2.63 0 0.00 3 0 0.00 0 0 0.00
White 10-20 44 29.73 0 0.00 7 15.91 0 0.00 3 1 33.33 0 0 0.00
White 20+ 66 44.59 53 80.30 7 10.61 3 5.66 3 0 0.00 1 0 0.00

9.3.2 Screening eligible smokers diagnosed with lung cancer by race/ethnicity and LOW exposure to violence by avg pack year range

py_screenelig_grlowvio_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 1, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 1, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
            expose_vio = sum(homicidegtmean2 == "0", na.rm = TRUE),
            expose_vio_diag = sum(homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
            per_hdiag = (expose_vio_diag / expose_vio * 100),
            elig_vio = sum(screen_elig == 1 & homicidegtmean2 == "0", na.rm = TRUE),
            elig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
            per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100) %>%
  mutate("% Smokers in Pack Year Range by Race/Ethnicity" = Total_smoker / sum(Total_smoker) * 100)

colnames(py_screenelig_grlowvio_table) <- c("Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                                           "Screening Eligible Smokers", 
                                           "% Screening Eligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Screening Eligible Smokers Low Exposure to Violence",
                                           "Screening Eligible Smokers Low Exposure to Violence Diagnosed",
                                           "% Screening Eligible Smokers Low Exposure to Violence Diagnosed",
                                           "% Smokers in Pack Year Range by Race/Ethnicity")

col_order6 <- c("Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                "% Smokers in Pack Year Range by Race/Ethnicity",
                                           "Screening Eligible Smokers", 
                                           "% Screening Eligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Eligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Screening Eligible Smokers Low Exposure to Violence",
                                           "Screening Eligible Smokers Low Exposure to Violence Diagnosed",
                                           "% Screening Eligible Smokers Low Exposure to Violence Diagnosed")
py_screenelig_grlowvio_table <- py_screenelig_grlowvio_table[, col_order6]

# Replace NaN values with 0 in "% Eligible Smokers Diagnosed" column
py_screenelig_grlowvio_table$`% Eligible Smokers Diagnosed` <- replace(py_screenelig_grlowvio_table$`% Eligible Smokers Diagnosed`, !is.finite(py_screenelig_grlowvio_table$`% Eligible Smokers Diagnosed`), 0)

# Replace NaN values with 0 in "% Screening Eligible Smokers Low Exposure to Violence Diagnosed" column
py_screenelig_grlowvio_table$`% Screening Eligible Smokers Low Exposure to Violence Diagnosed` <- replace(py_screenelig_grlowvio_table$`% Screening Eligible Smokers Low Exposure to Violence Diagnosed`, !is.finite(py_screenelig_grlowvio_table$`% Screening Eligible Smokers Low Exposure to Violence Diagnosed`), 0)

# table
py_screenelig_grlowvio_table %>%
  kable(
        align = "lllll", 
        caption = "Exploring Screening Eligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with Low Exposure to Violence (by avg pack year range)", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Exploring Screening Eligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with Low Exposure to Violence (by avg pack year range)
Race/Ethnicity Pack Year Range Smoker Count % Smokers in Pack Year Range by Race/Ethnicity Screening Eligible Smokers % Screening Eligible Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed with Lung Cancer Smokers Screening Ineligible Diagnosed % Eligible Smokers Diagnosed Smokers with Low Exposure to Violence Smokers with Low Exposure to Violence Diagnosed % Smokers with Low Exposure to Violence Diagnosed Screening Eligible Smokers Low Exposure to Violence Screening Eligible Smokers Low Exposure to Violence Diagnosed % Screening Eligible Smokers Low Exposure to Violence Diagnosed
Black 0-10 355 46.53 0 0.00 37 10.42 0 0.00 161 17 10.56 0 0 0.00
Black 10-20 170 22.28 0 0.00 23 13.53 0 0.00 75 10 13.33 0 0 0.00
Black 20+ 238 31.19 212 89.08 42 17.65 25 11.79 108 15 13.89 101 8 7.92
Latinx 0-10 78 49.37 0 0.00 3 3.85 0 0.00 69 2 2.90 0 0 0.00
Latinx 10-20 36 22.78 0 0.00 5 13.89 0 0.00 34 4 11.76 0 0 0.00
Latinx 20+ 44 27.85 39 88.64 7 15.91 5 12.82 41 6 14.63 37 4 10.81
White 0-10 38 25.68 0 0.00 1 2.63 0 0.00 35 1 2.86 0 0 0.00
White 10-20 44 29.73 0 0.00 7 15.91 0 0.00 40 6 15.00 0 0 0.00
White 20+ 66 44.59 53 80.30 7 10.61 3 5.66 62 7 11.29 51 3 5.88

9.4 Screening ineligibility, diagnosis, and exposure to violence by race/ethnicity and avg pack year range

9.4.1 Screening ineligible smokers diagnosed with lung cancer by race and HIGH exposure to violence by avg pack year range

py_screeninelig_grhivio_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 0, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 0, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
            expose_vio = sum(homicidegtmean2 == "1", na.rm = TRUE),
            expose_vio_diag = sum(homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
            per_hdiag = (expose_vio_diag / expose_vio * 100),
            elig_vio = sum(screen_elig == 0 & homicidegtmean2 == "1", na.rm = TRUE),
            elig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
            per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100) %>%
  mutate("% Smokers in Pack Year Range by Race/Ethnicity" = Total_smoker / sum(Total_smoker) * 100)

colnames(py_screeninelig_grhivio_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                "% Smokers in Pack Year Range by Race/Ethnicity",
                                           "Screening Ineligible Smokers", 
                                           "% Screening Ineligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Screening Ineligible Smokers High Exposure to Violence",
                                           "Screening Ineligible Smokers High Exposure to Violence Diagnosed",
                                           "% Screening Ineligible Smokers High Exposure to Violence Diagnosed")

col_order7 <- c("Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                "% Smokers in Pack Year Range by Race/Ethnicity",
                                           "Screening Ineligible Smokers", 
                                           "% Screening Ineligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with High Exposure to Violence",
                                           "Smokers with High Exposure to Violence Diagnosed",
                                           "% Smokers with High Exposure to Violence Diagnosed",
                                           "Screening Ineligible Smokers High Exposure to Violence",
                                           "% Screening Ineligible Smokers High Exposure to Violence Diagnosed")
py_screeninelig_grhivio_table <- py_screeninelig_grhivio_table[, col_order7]

# Replace NaN values with 0 in "% Ineligible Smokers Diagnosed" column
py_screeninelig_grhivio_table$`% Ineligible Smokers Diagnosed` <- replace(py_screeninelig_grhivio_table$`% Ineligible Smokers Diagnosed`, !is.finite(py_screeninelig_grhivio_table$`% Ineligible Smokers Diagnosed`), 0)

# Replace NaN values with 0 in "% Smokers Screening Ineligible with High Exposure to Violence Diagnosed" column
py_screeninelig_grhivio_table$`% Screening Ineligible Smokers High Exposure to Violence Diagnosed` <- replace(py_screeninelig_grhivio_table$`% Screening Ineligible Smokers High Exposure to Violence Diagnosed`, !is.finite(py_screeninelig_grhivio_table$`% Screening Ineligible Smokers High Exposure to Violence Diagnosed`), 0)

# table
py_screenelig_grhivio_table %>%
  kable(
        align = "lllll", 
        caption = "Exploring Screening Ineligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with High Exposure to Violence (by avg pack year range)", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Exploring Screening Ineligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with High Exposure to Violence (by avg pack year range)
Race/Ethnicity Pack Year Range Smoker Count % Smokers in Pack Year Range by Race/Ethnicity Screening Eligible Smokers % Screening Eligible Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed with Lung Cancer Smokers Screening Eligible Diagnosed % Eligible Smokers Diagnosed Smokers with High Exposure to Violence Smokers with High Exposure to Violence Diagnosed % Smokers with High Exposure to Violence Diagnosed Screening Eligible Smokers High Exposure to Violence Screening Eligible Smokers High Exposure to Violence Diagnosed % Screening Eligible Smokers High Exposure to Violence Diagnosed
Black 0-10 355 46.53 0 0.00 37 10.42 0 0.00 194 20 10.31 0 0 0.00
Black 10-20 170 22.28 0 0.00 23 13.53 0 0.00 95 13 13.68 0 0 0.00
Black 20+ 238 31.19 212 89.08 42 17.65 25 11.79 130 27 20.77 111 17 15.32
Latinx 0-10 78 49.37 0 0.00 3 3.85 0 0.00 9 1 11.11 0 0 0.00
Latinx 10-20 36 22.78 0 0.00 5 13.89 0 0.00 2 1 50.00 0 0 0.00
Latinx 20+ 44 27.85 39 88.64 7 15.91 5 12.82 3 1 33.33 2 1 50.00
White 0-10 38 25.68 0 0.00 1 2.63 0 0.00 3 0 0.00 0 0 0.00
White 10-20 44 29.73 0 0.00 7 15.91 0 0.00 3 1 33.33 0 0 0.00
White 20+ 66 44.59 53 80.30 7 10.61 3 5.66 3 0 0.00 1 0 0.00

9.4.2 Screening ineligible smokers diagnosed with lung cancer by race and LOW exposure to violence by avg pack year range

py_screeninelig_grlvio_table <- 
  lung %>%
  filter(smokingstatus >= 2 , na.rm = TRUE) %>% 
  filter(packyear_range %in% c("0-10", "10-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range) %>%
  summarize(Total_smoker = n(),
            total_smoker_elig = sum(screen_elig == 0, na.rm = TRUE),
            per_screen_elig = mean(screen_elig == 0, na.rm = TRUE) * 100,
            Total_smoker_diag = sum(malignanto == 1, na.rm = TRUE),
            per_diag = (Total_smoker_diag / Total_smoker * 100),
            smoker_elig_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
            per_Sdiag = (smoker_elig_diag / total_smoker_elig * 100),
            expose_vio = sum(homicidegtmean2 == "0", na.rm = TRUE),
            expose_vio_diag = sum(homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
            per_hdiag = (expose_vio_diag / expose_vio * 100),
            elig_vio = sum(screen_elig == 0 & homicidegtmean2 == "0", na.rm = TRUE),
            elig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
            per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100)  %>%
  mutate("% Smokers in Pack Year Range by Race/Ethnicity" = Total_smoker / sum(Total_smoker) * 100)

colnames(py_screeninelig_grlvio_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                                           "Screening Ineligible Smokers", 
                                           "% Screening Ineligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Screening Ineligible Smokers Low Exposure to Violence",
                                           "Screening Ineligible Smokers Low Exposure to Violence Diagnosed",
                                           "% Screening Ineligible Smokers Low Exposure to Violence Diagnosed",
                                           "% Smokers in Pack Year Range by Race/Ethnicity")


col_order8 <- c("Race/Ethnicity",
                                            "Pack Year Range",
                                           "Smoker Count", 
                "% Smokers in Pack Year Range by Race/Ethnicity",
                                           "Screening Ineligible Smokers", 
                                           "% Screening Ineligible Smokers",
                                           "Smokers Diagnosed with Lung Cancer",
                                           "% Smokers Diagnosed with Lung Cancer",
                                           "Smokers Screening Ineligible Diagnosed", 
                                           "% Ineligible Smokers Diagnosed",
                                           "Smokers with Low Exposure to Violence",
                                           "Smokers with Low Exposure to Violence Diagnosed",
                                           "% Smokers with Low Exposure to Violence Diagnosed",
                                           "Screening Ineligible Smokers Low Exposure to Violence",
                "Screening Ineligible Smokers Low Exposure to Violence Diagnosed",
                                           "% Screening Ineligible Smokers Low Exposure to Violence Diagnosed")
py_screeninelig_grlvio_table <- py_screeninelig_grlvio_table[, col_order8]



# table
py_screeninelig_grlvio_table %>%
  kable(
        align = "lllll", 
        caption = "Exploring Screening Ineligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with Low Exposure to Violence (by avg pack year range)", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Exploring Screening Ineligible Smokers Diagnosed with Lung Cancer by Race/Ethnicity with Low Exposure to Violence (by avg pack year range)
Race/Ethnicity Pack Year Range Smoker Count % Smokers in Pack Year Range by Race/Ethnicity Screening Ineligible Smokers % Screening Ineligible Smokers Smokers Diagnosed with Lung Cancer % Smokers Diagnosed with Lung Cancer Smokers Screening Ineligible Diagnosed % Ineligible Smokers Diagnosed Smokers with Low Exposure to Violence Smokers with Low Exposure to Violence Diagnosed % Smokers with Low Exposure to Violence Diagnosed Screening Ineligible Smokers Low Exposure to Violence Screening Ineligible Smokers Low Exposure to Violence Diagnosed % Screening Ineligible Smokers Low Exposure to Violence Diagnosed
Black 0-10 355 46.53 355 100.00 37 10.42 37 10.42 161 17 10.56 161 17 10.56
Black 10-20 170 22.28 170 100.00 23 13.53 23 13.53 75 10 13.33 75 10 13.33
Black 20+ 238 31.19 26 10.92 42 17.65 17 65.38 108 15 13.89 7 7 100.00
Latinx 0-10 78 49.37 78 100.00 3 3.85 3 3.85 69 2 2.90 69 2 2.90
Latinx 10-20 36 22.78 36 100.00 5 13.89 5 13.89 34 4 11.76 34 4 11.76
Latinx 20+ 44 27.85 5 11.36 7 15.91 2 40.00 41 6 14.63 4 2 50.00
White 0-10 38 25.68 38 100.00 1 2.63 1 2.63 35 1 2.86 35 1 2.86
White 10-20 44 29.73 44 100.00 7 15.91 7 15.91 40 6 15.00 40 6 15.00
White 20+ 66 44.59 13 19.70 7 10.61 4 30.77 62 7 11.29 11 4 36.36

10 Question 10: Simple Tables

10.1 Ineligible Smokers Dx

# ineligible smokers diagnosed
inelig_smokerdx =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("0-5", "5-10", "10-15", "15-20", "20+")) %>%
  group_by(packyear_range5) %>%
  summarize(
    inelig_vio = sum(screen_elig == 0, na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokerdx) <- c(
                                        
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
inelig_smokerdx %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Smokers Dx Lung Cancer", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Smokers Dx Lung Cancer
Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
0-5 279 24 8.60
5-10 192 17 8.85
10-15 143 18 12.59
15-20 107 17 15.89
20+ 44 23 52.27

10.2 Screening eligible Smokers Dx

# eligible smokers diagnosed
elig_smokerdx =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("20+")) %>%
  group_by(packyear_range5) %>%
  summarize(
    elig_vio = sum(screen_elig == 1, na.rm = TRUE),
    elig_expose_vio_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100)

colnames(elig_smokerdx) <- c(
                                          
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
elig_smokerdx %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Smokers Dx Lung Cancer", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Smokers Dx Lung Cancer
Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
20+ 304 33 10.86

10.3 Ineligible Smokers Dx by Race/Ethnicity

# ineligible smokers by race diagnosed
inelig_smokerdx5 =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("0-5", "5-10", "10-15", "15-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range5) %>%
  summarize(
    inelig_vio = sum(screen_elig == 0, na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokerdx5) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
inelig_smokerdx5 %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Smokers Dx Lung Cancer by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Smokers Dx Lung Cancer by Race/Ethnicity
Race/Ethnicity Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
Black 0-5 203 20 9.85
Black 5-10 152 17 11.18
Black 10-15 99 10 10.10
Black 15-20 71 13 18.31
Black 20+ 26 17 65.38
Latinx 0-5 52 3 5.77
Latinx 5-10 26 0 0.00
Latinx 10-15 24 3 12.50
Latinx 15-20 12 2 16.67
Latinx 20+ 5 2 40.00
White 0-5 24 1 4.17
White 5-10 14 0 0.00
White 10-15 20 5 25.00
White 15-20 24 2 8.33
White 20+ 13 4 30.77

10.4 Screening eligible Smokers Dx by Race/Ethnicity

# eligible smokers by race diagnosed
elig_smokerdx5 =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("20+")) %>%
  group_by(raceethnic_cat, packyear_range5) %>%
  summarize(
    elig_vio = sum(screen_elig == 1, na.rm = TRUE),
    elig_expose_vio_diag = sum(screen_elig == 1 & malignanto == 1, na.rm = TRUE),
    per_diag_elig_vio = elig_expose_vio_diag / elig_vio * 100)

colnames(elig_smokerdx5) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
elig_smokerdx5 %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Smokers Dx Lung Cancer by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Smokers Dx Lung Cancer by Race/Ethnicity
Race/Ethnicity Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
Black 20+ 212 25 11.79
Latinx 20+ 39 5 12.82
White 20+ 53 3 5.66

10.5 High Violence, Ineligible Smokers Dx by Race/Ethnicity

# ineligible smokers by race diagnosed with high exposure to violence
inelig_smokerhvio_race_count_table =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("0-5", "5-10", "10-15", "15-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range5) %>%
  summarize(
    inelig_vio = sum(screen_elig == 0 & homicidegtmean2 == "1", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokerhvio_race_count_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
inelig_smokerhvio_race_count_table %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Smokers with High Violence Exposure Dx Lung Cancer by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Smokers with High Violence Exposure Dx Lung Cancer by Race/Ethnicity
Race/Ethnicity Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
Black 0-5 112 11 9.82
Black 5-10 82 9 10.98
Black 10-15 64 6 9.38
Black 15-20 31 7 22.58
Black 20+ 19 10 52.63
Latinx 0-5 7 1 14.29
Latinx 5-10 2 0 0.00
Latinx 10-15 1 0 0.00
Latinx 15-20 1 1 100.00
Latinx 20+ 1 0 0.00
White 0-5 2 0 0.00
White 5-10 1 0 0.00
White 10-15 2 1 50.00
White 15-20 1 0 0.00
White 20+ 2 0 0.00

10.6 High Violence, Eligible Smokers Dx by Race/Ethnicity

# eligible smokers by race diagnosed with high exposure to violence
elig_smokerhvio_race_count_table =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("20+")) %>%
  group_by(raceethnic_cat, packyear_range5) %>%
  summarize(
    inelig_vio = sum(screen_elig == 1 & homicidegtmean2 == "1", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "1" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(elig_smokerhvio_race_count_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
elig_smokerhvio_race_count_table %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Smokers with High Violence Exposure Dx Lung Cancer by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Smokers with High Violence Exposure Dx Lung Cancer by Race/Ethnicity
Race/Ethnicity Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
Black 20+ 111 17 15.32
Latinx 20+ 2 1 50.00
White 20+ 1 0 0.00

10.7 Low Violence, Ineligible Smokers Dx

# ineligible smokers by race diagnosed with low exposure to violence
inelig_smokerlow_race_count_table =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("0-5", "5-10", "10-15", "15-20", "20+")) %>%
  group_by(raceethnic_cat, packyear_range5) %>%
  summarize(
    inelig_vio = sum(screen_elig == 0 & homicidegtmean2 == "0", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 0 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(inelig_smokerlow_race_count_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
inelig_smokerlow_race_count_table %>%
  kable(
        align = "lllll", 
        caption = "Screening Ineligible Smokers with Low Violence Exposure Dx Lung Cancer by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Ineligible Smokers with Low Violence Exposure Dx Lung Cancer by Race/Ethnicity
Race/Ethnicity Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
Black 0-5 91 9 9.89
Black 5-10 70 8 11.43
Black 10-15 35 4 11.43
Black 15-20 40 6 15.00
Black 20+ 7 7 100.00
Latinx 0-5 45 2 4.44
Latinx 5-10 24 0 0.00
Latinx 10-15 23 3 13.04
Latinx 15-20 11 1 9.09
Latinx 20+ 4 2 50.00
White 0-5 22 1 4.55
White 5-10 13 0 0.00
White 10-15 17 4 23.53
White 15-20 23 2 8.70
White 20+ 11 4 36.36

10.8 Low Violence, Eligible Smokers Dx by Race/Ethnicity

# eligible smokers by race diagnosed with low exposure to violence
elig_smokerlowvio_race_count_table =
  lung %>%
  filter(smokingstatus >= 2) %>%
  filter(packyear_range5 %in% c("20+")) %>%
  group_by(raceethnic_cat, packyear_range5) %>%
  summarize(
    inelig_vio = sum(screen_elig == 1 & homicidegtmean2 == "0", na.rm = TRUE),
    inelig_expose_vio_diag = sum(screen_elig == 1 & homicidegtmean2 == "0" & malignanto == 1, na.rm = TRUE),
    per_diag_inelig_vio = inelig_expose_vio_diag / inelig_vio * 100)

colnames(elig_smokerlowvio_race_count_table) <- c(
                                            "Race/Ethnicity",
                                            "Pack Year Range",
                                           "Patient Count", 
                                           "Dx Lung Cancer",
                                           "% Dx Lung Cancer")

# table
elig_smokerlowvio_race_count_table %>%
  kable(
        align = "lllll", 
        caption = "Screening Eligible Smokers with Low Violence Exposure Dx Lung Cancer by Race/Ethnicity", 
        digits = 2, 
        format.args = list(big.mark = ",")) %>%
  kable_classic(font_size = 15,
                full_width = F,
                html_font = "Cambria") %>%
  kable_styling(bootstrap_options = "striped", full_width = F, position = "left")
Screening Eligible Smokers with Low Violence Exposure Dx Lung Cancer by Race/Ethnicity
Race/Ethnicity Pack Year Range Patient Count Dx Lung Cancer % Dx Lung Cancer
Black 20+ 101 8 7.92
Latinx 20+ 37 4 10.81
White 20+ 51 3 5.88