1. Population Estimate

n1 <- sum(df$encounter_history %in% c("10", "11"))
n2 <- sum(df$encounter_history %in% c("01", "11"))
m2 <- sum(df$encounter_history == "11")
N_est <- (((n1 + 1) * (n2 + 1)) / (m2 + 1)) - 1
variance <- ((n1 + 1) * (n2 + 1) * (n1 - m2) * (n2 - m2)) / (((m2 + 1)^2) * (m2 + 2))
se <- sqrt(variance)

tab_pop <- data.frame(
  Metric = c("Marked (Day 1)", "Resighted (Day 2)", "Matches", "Estimated Pop (N)", "Lower 95% CI", "Upper 95% CI"),
  Value = c(n1, n2, m2, round(N_est), round(N_est - 1.96*se), round(N_est + 1.96*se))
)
kable(tab_pop, caption = "Chapman Mark-Resight Population Estimate")
Chapman Mark-Resight Population Estimate
Metric Value
Marked (Day 1) 45
Resighted (Day 2) 68
Matches 26
Estimated Pop (N) 117
Lower 95% CI 95
Upper 95% CI 138
# 2. Age and Sex distribution
tab_age_sex <- df %>% filter(Sex != "Unknown", Age != "Unknown") %>%
tabyl(Age, Sex) %>% adorn_totals(c("row", "col")) %>%
adorn_percentages("all") %>% adorn_pct_formatting(digits = 1) %>%
adorn_ns()

kable(tab_age_sex, caption = "Age and Sex Demographics")
Age and Sex Demographics
Age Female Male Total
Adult 33.8% (27) 26.2% (21) 60.0% (48)
Juvenile 18.8% (15) 3.8% (3) 22.5% (18)
Pup 8.8% (7) 8.8% (7) 17.5% (14)
Total 61.3% (49) 38.8% (31) 100.0% (80)
ggplot(df %>% filter(Sex != "Unknown", Age != "Unknown"), aes(x = Age,
fill = Sex)) + geom_bar(position = "dodge", color = "black") +
scale_fill_manual(values = c("Female" = "#FF9999", "Male" =
"#99CCFF")) + theme_minimal() + labs(title = "Age and Sex Distribution",
x = "Age Group", y = "Count")

# 3. Coat patterns and colour distribution
tab_coat <- df %>% tabyl(coat_patrn, prm_ct_clr) %>%
adorn_totals(c("row", "col")) 
kable(tab_coat, caption = "Primary Coat
Colour by Pattern")
Primary Coat Colour by Pattern
coat_patrn Black Brown other White NA_ Total
biclrd 16 16 1 9 0 42
solid 2 27 3 8 0 40
triclrd 2 1 0 1 0 4
NA 0 0 0 0 1 1
Total 20 44 4 18 1 87
ggplot(df, aes(x = coat_patrn, fill = prm_ct_clr)) + geom_bar(position =
"fill", color="black") + scale_y_continuous(labels = scales::percent) +
theme_minimal() + labs(title = "Primary Coat Colour Distribution by Coat
Pattern", x = "Coat Pattern", y = "Proportion", fill="Primary Color")

# 4. Size vs Sex

tab_size <- df %>% filter(Sex != "Unknown") %>% tabyl(size, Sex) %>%
adorn_totals("row") 
kable(tab_size, caption = "Dog Size Distribution by
Sex")
Dog Size Distribution by Sex
size Female Male
larg 14 15
medium 27 10
small 8 6
Total 49 31
# Run the test
chi_sq_size_sex <- chisq.test(table(df$size, df$Sex), simulate.p.value = TRUE)
# Create a formal reporting table
stat_tab_size <- data.frame(
  "Test" = "Chi-Square (Simulated)",
  "Variables" = "Size vs Sex",
  "Chi_Square_Value" = round(chi_sq_size_sex$statistic, 3),
  "P_Value" = format.pval(chi_sq_size_sex$p.value, eps = 0.001)
)
# Print it to the report
kable(stat_tab_size, caption = "Table 4a: Statistical Correlation for Size and Sex", row.names = FALSE)
Table 4a: Statistical Correlation for Size and Sex
Test Variables Chi_Square_Value P_Value
Chi-Square (Simulated) Size vs Sex 5.317 0.26187
# 5. Sterilisation coverage

tab_abc_sex <- df %>% filter(Sex != "Unknown", !is.na(ear_ntch)) %>%
tabyl(Sex, ear_ntch) %>% adorn_totals("row") %>%
adorn_percentages("row") %>% adorn_pct_formatting(digits = 1) %>%
adorn_ns()

kable(tab_abc_sex, caption = "Sterilisation (Ear Notch) Coverage by
Sex")
Sterilisation (Ear Notch) Coverage by Sex
Sex absnt not_ver
Female 100.0% (49) 0.0% (0)
Male 96.7% (29) 3.3% (1)
Total 98.7% (78) 1.3% (1)
ggplot(df %>% filter(Sex != "Unknown", !is.na(ear_ntch)), aes(x = Sex,
fill = ear_ntch)) + geom_bar(position = "fill", color="black") +
scale_fill_manual(values = c("absnt" = "#EF5350", "prsnt" =
"#66BB6A")) + scale_y_continuous(labels = scales::percent) +
theme_minimal() + labs(title = "Sterilisation Coverage by Sex", x =
"Sex", y = "Percentage", fill="Ear Notch")

## 6. Sterilisation by Body Condition & Correlation


# 1. Create a filtered dataset for accurate testing
df_body_abc <- df %>% filter(!is.na(bdy_cdtn), !is.na(ear_ntch), bdy_cdtn != "", ear_ntch != "")

# 2. Generate the summary table
tab_abc_body <- df_body_abc %>% 
  tabyl(bdy_cdtn, ear_ntch) %>% 
  adorn_percentages("row") %>% 
  adorn_pct_formatting(digits = 1) %>% 
  adorn_ns()

kable(tab_abc_body, caption = "Sterilisation Status vs Body Condition")
Sterilisation Status vs Body Condition
bdy_cdtn absnt not_ver
fair 100.0% (10) 0.0% (0)
good 97.1% (66) 2.9% (2)
poor 100.0% (7) 0.0% (0)
# 3. Perform the Chi-Square test for correlation
chi_sq_body_abc <- chisq.test(table(df_body_abc$bdy_cdtn, df_body_abc$ear_ntch))
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
stat_tab_body <- data.frame(
  "Test" = "Chi-Square (Simulated)",
  "Variables" = "Body Condition vs Sterilisation",
  "Chi_Square_Value" = round(chi_sq_body_abc$statistic, 3),
  "P_Value" = format.pval(chi_sq_body_abc$p.value, eps = 0.001)
)
kable(stat_tab_body, caption = "Table 6a: Statistical Correlation for Body Condition vs Sterilisation", row.names = FALSE)
Table 6a: Statistical Correlation for Body Condition vs Sterilisation
Test Variables Chi_Square_Value P_Value
Chi-Square (Simulated) Body Condition vs Sterilisation 0.512 0.77412
# 4. Generate the plot
ggplot(df_body_abc, aes(x = bdy_cdtn, fill = ear_ntch)) +
  geom_bar(position = "fill", color="black") +
  scale_fill_manual(values = c("absnt" = "#EF5350", "prsnt" = "#66BB6A")) +
  theme_minimal() +
  labs(title = "Sterilisation Coverage vs Body Condition", x = "Body Condition", y = "Proportion", fill="Ear Notch")

## 7. Proximity to Resources (Spatial Dependency)
tab_prox <- df %>%
  select(starts_with("prxm_")) %>%
  summarize(across(everything(), ~sum(. == TRUE, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "Resource", values_to = "Dog_Count") %>%
  filter(Dog_Count > 0) %>% # Filter out resources with zero dogs for an accurate statistical test
  arrange(desc(Dog_Count))

# Clean up the resource names for the chart
tab_prox$Resource <- gsub("prxm_", "", tab_prox$Resource)
tab_prox$Resource <- gsub("_20m?", " (<20m)", tab_prox$Resource) 

kable(tab_prox, caption = "Proximity to Resources")
Proximity to Resources
Resource Dog_Count
no (<20m) 38
other 37
eatry (<20m) 28
tmple (<20m) 9
garb (<20m) 7
# Perform Chi-Square Goodness of Fit test
chi_sq_prox <- chisq.test(tab_prox$Dog_Count)
stat_tab_prox <- data.frame(
  "Test" = "Chi-Square Goodness-of-Fit (Simulated)",
  "Variable" = "Resource Proximity",
  "Chi_Square_Value" = round(chi_sq_prox$statistic, 3),
  "P_Value" = format.pval(chi_sq_prox$p.value, eps = 0.001)
)
kable(stat_tab_prox, caption = "Table 7a: Statistical Goodness-of-Fit for Resource Preference", row.names = FALSE)
Table 7a: Statistical Goodness-of-Fit for Resource Preference
Test Variable Chi_Square_Value P_Value
Chi-Square Goodness-of-Fit (Simulated) Resource Proximity 37.597 < 0.001
ggplot(tab_prox, aes(x = reorder(Resource, Dog_Count), y = Dog_Count)) +
  geom_col(fill = "#FFA726", color = "black") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Proximity to Resources", x = "Resource Category", y = "Number of Dogs Observed")

8. Advanced Demographic & Health Correlations

# Helper function to calculate Cramér's V (Strength of Correlation)
get_cramers_v <- function(tbl) {
  chi_sq <- suppressWarnings(chisq.test(tbl)$statistic)
  n <- sum(tbl)
  v <- sqrt(chi_sq / (n * min(nrow(tbl) - 1, ncol(tbl) - 1)))
  return(round(v, 3))
}

# 1. Body Condition vs Sex
df_bc_sex <- df %>% filter(Sex != "Unknown", !is.na(bdy_cdtn), bdy_cdtn != "")
tbl_bc_sex <- table(df_bc_sex$bdy_cdtn, df_bc_sex$Sex)
test_bc_sex <- chisq.test(tbl_bc_sex, simulate.p.value = TRUE)

# 2. Body Condition vs Age
df_bc_age <- df %>% filter(Age != "Unknown", !is.na(bdy_cdtn), bdy_cdtn != "")
tbl_bc_age <- table(df_bc_age$bdy_cdtn, df_bc_age$Age)
test_bc_age <- chisq.test(tbl_bc_age, simulate.p.value = TRUE)

# 3. Sterilisation vs Sex
df_abc_sex <- df %>% filter(Sex != "Unknown", !is.na(ear_ntch), ear_ntch != "")
tbl_abc_sex <- table(df_abc_sex$ear_ntch, df_abc_sex$Sex)
test_abc_sex <- chisq.test(tbl_abc_sex, simulate.p.value = TRUE)

# 4. Coat Color vs Pattern
df_coat <- df %>% filter(!is.na(prm_ct_clr), prm_ct_clr != "", !is.na(coat_patrn), coat_patrn != "")
tbl_coat <- table(df_coat$prm_ct_clr, df_coat$coat_patrn)
test_coat <- chisq.test(tbl_coat, simulate.p.value = TRUE)

# Combine into a formal table
adv_health_stats <- data.frame(
  "Variables_Tested" = c("Body Condition vs Sex", "Body Condition vs Age", "Sterilisation vs Sex", "Coat Color vs Pattern"),
  "Chi_Square_Value" = c(round(test_bc_sex$statistic, 2), round(test_bc_age$statistic, 2), round(test_abc_sex$statistic, 2), round(test_coat$statistic, 2)),
  "P_Value" = c(format.pval(test_bc_sex$p.value, eps = 0.001), format.pval(test_bc_age$p.value, eps = 0.001), format.pval(test_abc_sex$p.value, eps = 0.001), format.pval(test_coat$p.value, eps = 0.001)),
  "Cramers_V_Strength" = c(get_cramers_v(tbl_bc_sex), get_cramers_v(tbl_bc_age), get_cramers_v(tbl_abc_sex), get_cramers_v(tbl_coat))
)

kable(adv_health_stats, caption = "Table 8: Demographic & Health Correlations", row.names = FALSE)
Table 8: Demographic & Health Correlations
Variables_Tested Chi_Square_Value P_Value Cramers_V_Strength
Body Condition vs Sex 0.80 0.72364 0.100
Body Condition vs Age 2.64 0.67116 0.124
Sterilisation vs Sex 1.65 0.36932 0.028
Coat Color vs Pattern 16.37 0.014993 0.308
get_cramers_v <- function(tbl) {
  chi_sq <- suppressWarnings(chisq.test(tbl)$statistic)
  n <- sum(tbl)
  v <- sqrt(chi_sq / (n * min(nrow(tbl) - 1, ncol(tbl) - 1)))
  return(round(v, 3))
}
# Create a specialized dataset to handle multiple proximity tags per dog
df_prox_long <- df %>%
  mutate(Dog_ID = row_number()) %>%
  pivot_longer(cols = starts_with("prxm_"), names_to = "Resource", values_to = "Present") %>%
  filter(Present == TRUE) %>%
  mutate(Resource = gsub("prxm_", "", Resource)) %>%
  mutate(Resource = gsub("_20m?", " (<20m)", Resource))

# 1. Proximity vs Body Condition
df_prox_bc <- df_prox_long %>% filter(!is.na(bdy_cdtn), bdy_cdtn != "")
tbl_prox_bc <- table(df_prox_bc$Resource, df_prox_bc$bdy_cdtn)
test_prox_bc <- chisq.test(tbl_prox_bc, simulate.p.value = TRUE)

# 2. Proximity vs Sex
df_prox_sex <- df_prox_long %>% filter(Sex != "Unknown")
tbl_prox_sex <- table(df_prox_sex$Resource, df_prox_sex$Sex)
test_prox_sex <- chisq.test(tbl_prox_sex, simulate.p.value = TRUE)

# 3. Proximity vs Age
df_prox_age <- df_prox_long %>% filter(Age != "Unknown")
tbl_prox_age <- table(df_prox_age$Resource, df_prox_age$Age)
test_prox_age <- chisq.test(tbl_prox_age, simulate.p.value = TRUE)

# 4. Proximity vs Sterilisation
df_prox_abc <- df_prox_long %>% filter(!is.na(ear_ntch), ear_ntch != "")
tbl_prox_abc <- table(df_prox_abc$Resource, df_prox_abc$ear_ntch)
test_prox_abc <- chisq.test(tbl_prox_abc, simulate.p.value = TRUE)

# Combine into a formal table
adv_prox_stats <- data.frame(
  "Variables_Tested" = c("Proximity vs Body Condition", "Proximity vs Sex", "Proximity vs Age", "Proximity vs Sterilisation"),
  "Chi_Square_Value" = c(round(test_prox_bc$statistic, 2), round(test_prox_sex$statistic, 2), round(test_prox_age$statistic, 2), round(test_prox_abc$statistic, 2)),
  "P_Value" = c(format.pval(test_prox_bc$p.value, eps = 0.001), format.pval(test_prox_sex$p.value, eps = 0.001), format.pval(test_prox_age$p.value, eps = 0.001), format.pval(test_prox_abc$p.value, eps = 0.001)),
  "Cramers_V_Strength" = c(get_cramers_v(tbl_prox_bc), get_cramers_v(tbl_prox_sex), get_cramers_v(tbl_prox_age), get_cramers_v(tbl_prox_abc))
)

kable(adv_prox_stats, caption = "Table 9: Resource Proximity Correlations", row.names = FALSE)
Table 9: Resource Proximity Correlations
Variables_Tested Chi_Square_Value P_Value Cramers_V_Strength
Proximity vs Body Condition 9.58 0.29385 0.201
Proximity vs Sex 3.02 0.57771 0.165
Proximity vs Age 15.65 0.030485 0.258
Proximity vs Sterilisation 4.58 0.3913 0.198

8. Body Condition vs Sterilisation (Journal Format)

# Filter for clean data
df_clean <- df %>% filter(!is.na(bdy_cdtn), bdy_cdtn != "", !is.na(ear_ntch), ear_ntch != "")

# Create the journal-ready table
df_clean %>%
  select(bdy_cdtn, ear_ntch) %>%
  tbl_cross(
    row = bdy_cdtn, 
    col = ear_ntch, 
    percent = "row",
    label = list(bdy_cdtn ~ "Body Condition", ear_ntch ~ "Sterilisation (Ear Notch)")
  ) %>%
  add_p() %>% # This automatically runs the Chi-Square test and adds the P-value!
  bold_labels()
Sterilisation (Ear Notch)
Total p-value1
absnt not_ver
Body Condition


>0.9
    fair 10 (100%) 0 (0%) 10 (100%)
    good 66 (97%) 2 (2.9%) 68 (100%)
    poor 7 (100%) 0 (0%) 7 (100%)
Total 83 (98%) 2 (2.4%) 85 (100%)
1 Fisher’s exact test

8. Association Analysis: Health & Demographics

#These tables show the actual distribution of dogs across categories. The P-value (Pearson's Chi-square with Monte Carlo simulation) indicates if the relationship is statistically significant.
# 1. Body Condition vs Sterilisation Status
df %>%
  filter(!is.na(bdy_cdtn), bdy_cdtn != "", !is.na(ear_ntch), ear_ntch != "") %>%
  select(bdy_cdtn, ear_ntch) %>%
  tbl_cross(
    row = bdy_cdtn, 
    col = ear_ntch, 
    percent = "row",
    label = list(bdy_cdtn ~ "Body Condition", ear_ntch ~ "Ear Notch (ABC Status)")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>% # Simplified test call
  bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `bdy_cdtn` (`ear_ntch`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect
Ear Notch (ABC Status)
Total p-value1
absnt not_ver
Body Condition


0.8
    fair 10 (100%) 0 (0%) 10 (100%)
    good 66 (97%) 2 (2.9%) 68 (100%)
    poor 7 (100%) 0 (0%) 7 (100%)
Total 83 (98%) 2 (2.4%) 85 (100%)
1 Pearson’s Chi-squared test
# 2. Body Condition vs Sex
df %>%
  filter(!is.na(bdy_cdtn), bdy_cdtn != "", Sex != "Unknown") %>%
  select(bdy_cdtn, Sex) %>%
  tbl_cross(
    row = bdy_cdtn, 
    col = Sex, 
    percent = "row",
    label = list(bdy_cdtn ~ "Body Condition", Sex ~ "Sex")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `bdy_cdtn` (`Sex`) and "statistic", "p.value", and "parameter"
##   statistics: Chi-squared approximation may be incorrect
Sex
Total p-value1
Female Male
Body Condition


0.7
    fair 7 (70%) 3 (30%) 10 (100%)
    good 37 (59%) 26 (41%) 63 (100%)
    poor 5 (71%) 2 (29%) 7 (100%)
Total 49 (61%) 31 (39%) 80 (100%)
1 Pearson’s Chi-squared test
# 3. Size vs Sex
df %>%
  filter(!is.na(size), size != "", Sex != "Unknown") %>%
  select(size, Sex) %>%
  tbl_cross(
    row = size, 
    col = Sex, 
    percent = "row",
    label = list(size ~ "Body Size", Sex ~ "Sex")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()
Sex
Total p-value1
Female Male
Body Size


0.12
    larg 14 (48%) 15 (52%) 29 (100%)
    medium 27 (73%) 10 (27%) 37 (100%)
    small 8 (57%) 6 (43%) 14 (100%)
Total 49 (61%) 31 (39%) 80 (100%)
1 Pearson’s Chi-squared test
# 1. Resource Type vs Body Condition
df_prox_long %>%
  filter(!is.na(bdy_cdtn), bdy_cdtn != "") %>%
  select(Resource, bdy_cdtn) %>%
  tbl_cross(
    row = Resource, 
    col = bdy_cdtn, 
    percent = "row",
    label = list(Resource ~ "Resource Category", bdy_cdtn ~ "Body Condition Score")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `Resource` (`bdy_cdtn`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect
Body Condition Score
Total p-value1
fair good poor
Resource Category



0.3
    eatry (<20m) 4 (15%) 22 (81%) 1 (3.7%) 27 (100%)
    garb (<20m) 1 (14%) 6 (86%) 0 (0%) 7 (100%)
    no (<20m) 4 (11%) 31 (82%) 3 (7.9%) 38 (100%)
    other 5 (14%) 28 (76%) 4 (11%) 37 (100%)
    tmple (<20m) 2 (22%) 4 (44%) 3 (33%) 9 (100%)
Total 16 (14%) 91 (77%) 11 (9.3%) 118 (100%)
1 Pearson’s Chi-squared test
# 2. Resource Type vs Sex
df_prox_long %>%
  filter(Sex != "Unknown") %>%
  select(Resource, Sex) %>%
  tbl_cross(
    row = Resource, 
    col = Sex, 
    percent = "row",
    label = list(Resource ~ "Resource Category", Sex ~ "Sex")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `Resource` (`Sex`) and "statistic", "p.value", and "parameter"
##   statistics: Chi-squared approximation may be incorrect
Sex
Total p-value1
Female Male
Resource Category


0.6
    eatry (<20m) 19 (73%) 7 (27%) 26 (100%)
    garb (<20m) 5 (71%) 2 (29%) 7 (100%)
    no (<20m) 20 (56%) 16 (44%) 36 (100%)
    other 18 (55%) 15 (45%) 33 (100%)
    tmple (<20m) 5 (56%) 4 (44%) 9 (100%)
Total 67 (60%) 44 (40%) 111 (100%)
1 Pearson’s Chi-squared test