1. Population Estimate
n1 <- sum(df$encounter_history %in% c("10", "11"))
n2 <- sum(df$encounter_history %in% c("01", "11"))
m2 <- sum(df$encounter_history == "11")
N_est <- (((n1 + 1) * (n2 + 1)) / (m2 + 1)) - 1
variance <- ((n1 + 1) * (n2 + 1) * (n1 - m2) * (n2 - m2)) / (((m2 + 1)^2) * (m2 + 2))
se <- sqrt(variance)
tab_pop <- data.frame(
Metric = c("Marked (Day 1)", "Resighted (Day 2)", "Matches", "Estimated Pop (N)", "Lower 95% CI", "Upper 95% CI"),
Value = c(n1, n2, m2, round(N_est), round(N_est - 1.96*se), round(N_est + 1.96*se))
)
kable(tab_pop, caption = "Chapman Mark-Resight Population Estimate")
Chapman Mark-Resight Population Estimate
| Marked (Day 1) |
45 |
| Resighted (Day 2) |
68 |
| Matches |
26 |
| Estimated Pop (N) |
117 |
| Lower 95% CI |
95 |
| Upper 95% CI |
138 |
# 2. Age and Sex distribution
tab_age_sex <- df %>% filter(Sex != "Unknown", Age != "Unknown") %>%
tabyl(Age, Sex) %>% adorn_totals(c("row", "col")) %>%
adorn_percentages("all") %>% adorn_pct_formatting(digits = 1) %>%
adorn_ns()
kable(tab_age_sex, caption = "Age and Sex Demographics")
Age and Sex Demographics
| Adult |
33.8% (27) |
26.2% (21) |
60.0% (48) |
| Juvenile |
18.8% (15) |
3.8% (3) |
22.5% (18) |
| Pup |
8.8% (7) |
8.8% (7) |
17.5% (14) |
| Total |
61.3% (49) |
38.8% (31) |
100.0% (80) |
ggplot(df %>% filter(Sex != "Unknown", Age != "Unknown"), aes(x = Age,
fill = Sex)) + geom_bar(position = "dodge", color = "black") +
scale_fill_manual(values = c("Female" = "#FF9999", "Male" =
"#99CCFF")) + theme_minimal() + labs(title = "Age and Sex Distribution",
x = "Age Group", y = "Count")

# 3. Coat patterns and colour distribution
tab_coat <- df %>% tabyl(coat_patrn, prm_ct_clr) %>%
adorn_totals(c("row", "col"))
kable(tab_coat, caption = "Primary Coat
Colour by Pattern")
Primary Coat Colour by Pattern
| biclrd |
16 |
16 |
1 |
9 |
0 |
42 |
| solid |
2 |
27 |
3 |
8 |
0 |
40 |
| triclrd |
2 |
1 |
0 |
1 |
0 |
4 |
| NA |
0 |
0 |
0 |
0 |
1 |
1 |
| Total |
20 |
44 |
4 |
18 |
1 |
87 |
ggplot(df, aes(x = coat_patrn, fill = prm_ct_clr)) + geom_bar(position =
"fill", color="black") + scale_y_continuous(labels = scales::percent) +
theme_minimal() + labs(title = "Primary Coat Colour Distribution by Coat
Pattern", x = "Coat Pattern", y = "Proportion", fill="Primary Color")

# 4. Size vs Sex
tab_size <- df %>% filter(Sex != "Unknown") %>% tabyl(size, Sex) %>%
adorn_totals("row")
kable(tab_size, caption = "Dog Size Distribution by
Sex")
Dog Size Distribution by Sex
| larg |
14 |
15 |
| medium |
27 |
10 |
| small |
8 |
6 |
| Total |
49 |
31 |
# Run the test
chi_sq_size_sex <- chisq.test(table(df$size, df$Sex), simulate.p.value = TRUE)
# Create a formal reporting table
stat_tab_size <- data.frame(
"Test" = "Chi-Square (Simulated)",
"Variables" = "Size vs Sex",
"Chi_Square_Value" = round(chi_sq_size_sex$statistic, 3),
"P_Value" = format.pval(chi_sq_size_sex$p.value, eps = 0.001)
)
# Print it to the report
kable(stat_tab_size, caption = "Table 4a: Statistical Correlation for Size and Sex", row.names = FALSE)
Table 4a: Statistical Correlation for Size and Sex
| Chi-Square (Simulated) |
Size vs Sex |
5.317 |
0.26187 |
# 5. Sterilisation coverage
tab_abc_sex <- df %>% filter(Sex != "Unknown", !is.na(ear_ntch)) %>%
tabyl(Sex, ear_ntch) %>% adorn_totals("row") %>%
adorn_percentages("row") %>% adorn_pct_formatting(digits = 1) %>%
adorn_ns()
kable(tab_abc_sex, caption = "Sterilisation (Ear Notch) Coverage by
Sex")
Sterilisation (Ear Notch) Coverage by Sex
| Female |
100.0% (49) |
0.0% (0) |
| Male |
96.7% (29) |
3.3% (1) |
| Total |
98.7% (78) |
1.3% (1) |
ggplot(df %>% filter(Sex != "Unknown", !is.na(ear_ntch)), aes(x = Sex,
fill = ear_ntch)) + geom_bar(position = "fill", color="black") +
scale_fill_manual(values = c("absnt" = "#EF5350", "prsnt" =
"#66BB6A")) + scale_y_continuous(labels = scales::percent) +
theme_minimal() + labs(title = "Sterilisation Coverage by Sex", x =
"Sex", y = "Percentage", fill="Ear Notch")

## 6. Sterilisation by Body Condition & Correlation
# 1. Create a filtered dataset for accurate testing
df_body_abc <- df %>% filter(!is.na(bdy_cdtn), !is.na(ear_ntch), bdy_cdtn != "", ear_ntch != "")
# 2. Generate the summary table
tab_abc_body <- df_body_abc %>%
tabyl(bdy_cdtn, ear_ntch) %>%
adorn_percentages("row") %>%
adorn_pct_formatting(digits = 1) %>%
adorn_ns()
kable(tab_abc_body, caption = "Sterilisation Status vs Body Condition")
Sterilisation Status vs Body Condition
| fair |
100.0% (10) |
0.0% (0) |
| good |
97.1% (66) |
2.9% (2) |
| poor |
100.0% (7) |
0.0% (0) |
# 3. Perform the Chi-Square test for correlation
chi_sq_body_abc <- chisq.test(table(df_body_abc$bdy_cdtn, df_body_abc$ear_ntch))
## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect
stat_tab_body <- data.frame(
"Test" = "Chi-Square (Simulated)",
"Variables" = "Body Condition vs Sterilisation",
"Chi_Square_Value" = round(chi_sq_body_abc$statistic, 3),
"P_Value" = format.pval(chi_sq_body_abc$p.value, eps = 0.001)
)
kable(stat_tab_body, caption = "Table 6a: Statistical Correlation for Body Condition vs Sterilisation", row.names = FALSE)
Table 6a: Statistical Correlation for Body Condition vs
Sterilisation
| Chi-Square (Simulated) |
Body Condition vs Sterilisation |
0.512 |
0.77412 |
# 4. Generate the plot
ggplot(df_body_abc, aes(x = bdy_cdtn, fill = ear_ntch)) +
geom_bar(position = "fill", color="black") +
scale_fill_manual(values = c("absnt" = "#EF5350", "prsnt" = "#66BB6A")) +
theme_minimal() +
labs(title = "Sterilisation Coverage vs Body Condition", x = "Body Condition", y = "Proportion", fill="Ear Notch")

## 7. Proximity to Resources (Spatial Dependency)
tab_prox <- df %>%
select(starts_with("prxm_")) %>%
summarize(across(everything(), ~sum(. == TRUE, na.rm = TRUE))) %>%
pivot_longer(cols = everything(), names_to = "Resource", values_to = "Dog_Count") %>%
filter(Dog_Count > 0) %>% # Filter out resources with zero dogs for an accurate statistical test
arrange(desc(Dog_Count))
# Clean up the resource names for the chart
tab_prox$Resource <- gsub("prxm_", "", tab_prox$Resource)
tab_prox$Resource <- gsub("_20m?", " (<20m)", tab_prox$Resource)
kable(tab_prox, caption = "Proximity to Resources")
Proximity to Resources
| no (<20m) |
38 |
| other |
37 |
| eatry (<20m) |
28 |
| tmple (<20m) |
9 |
| garb (<20m) |
7 |
# Perform Chi-Square Goodness of Fit test
chi_sq_prox <- chisq.test(tab_prox$Dog_Count)
stat_tab_prox <- data.frame(
"Test" = "Chi-Square Goodness-of-Fit (Simulated)",
"Variable" = "Resource Proximity",
"Chi_Square_Value" = round(chi_sq_prox$statistic, 3),
"P_Value" = format.pval(chi_sq_prox$p.value, eps = 0.001)
)
kable(stat_tab_prox, caption = "Table 7a: Statistical Goodness-of-Fit for Resource Preference", row.names = FALSE)
Table 7a: Statistical Goodness-of-Fit for Resource
Preference
| Chi-Square Goodness-of-Fit (Simulated) |
Resource Proximity |
37.597 |
< 0.001 |
ggplot(tab_prox, aes(x = reorder(Resource, Dog_Count), y = Dog_Count)) +
geom_col(fill = "#FFA726", color = "black") +
coord_flip() +
theme_minimal() +
labs(title = "Proximity to Resources", x = "Resource Category", y = "Number of Dogs Observed")

8. Advanced Demographic & Health Correlations
# Helper function to calculate Cramér's V (Strength of Correlation)
get_cramers_v <- function(tbl) {
chi_sq <- suppressWarnings(chisq.test(tbl)$statistic)
n <- sum(tbl)
v <- sqrt(chi_sq / (n * min(nrow(tbl) - 1, ncol(tbl) - 1)))
return(round(v, 3))
}
# 1. Body Condition vs Sex
df_bc_sex <- df %>% filter(Sex != "Unknown", !is.na(bdy_cdtn), bdy_cdtn != "")
tbl_bc_sex <- table(df_bc_sex$bdy_cdtn, df_bc_sex$Sex)
test_bc_sex <- chisq.test(tbl_bc_sex, simulate.p.value = TRUE)
# 2. Body Condition vs Age
df_bc_age <- df %>% filter(Age != "Unknown", !is.na(bdy_cdtn), bdy_cdtn != "")
tbl_bc_age <- table(df_bc_age$bdy_cdtn, df_bc_age$Age)
test_bc_age <- chisq.test(tbl_bc_age, simulate.p.value = TRUE)
# 3. Sterilisation vs Sex
df_abc_sex <- df %>% filter(Sex != "Unknown", !is.na(ear_ntch), ear_ntch != "")
tbl_abc_sex <- table(df_abc_sex$ear_ntch, df_abc_sex$Sex)
test_abc_sex <- chisq.test(tbl_abc_sex, simulate.p.value = TRUE)
# 4. Coat Color vs Pattern
df_coat <- df %>% filter(!is.na(prm_ct_clr), prm_ct_clr != "", !is.na(coat_patrn), coat_patrn != "")
tbl_coat <- table(df_coat$prm_ct_clr, df_coat$coat_patrn)
test_coat <- chisq.test(tbl_coat, simulate.p.value = TRUE)
# Combine into a formal table
adv_health_stats <- data.frame(
"Variables_Tested" = c("Body Condition vs Sex", "Body Condition vs Age", "Sterilisation vs Sex", "Coat Color vs Pattern"),
"Chi_Square_Value" = c(round(test_bc_sex$statistic, 2), round(test_bc_age$statistic, 2), round(test_abc_sex$statistic, 2), round(test_coat$statistic, 2)),
"P_Value" = c(format.pval(test_bc_sex$p.value, eps = 0.001), format.pval(test_bc_age$p.value, eps = 0.001), format.pval(test_abc_sex$p.value, eps = 0.001), format.pval(test_coat$p.value, eps = 0.001)),
"Cramers_V_Strength" = c(get_cramers_v(tbl_bc_sex), get_cramers_v(tbl_bc_age), get_cramers_v(tbl_abc_sex), get_cramers_v(tbl_coat))
)
kable(adv_health_stats, caption = "Table 8: Demographic & Health Correlations", row.names = FALSE)
Table 8: Demographic & Health Correlations
| Body Condition vs Sex |
0.80 |
0.72364 |
0.100 |
| Body Condition vs Age |
2.64 |
0.67116 |
0.124 |
| Sterilisation vs Sex |
1.65 |
0.36932 |
0.028 |
| Coat Color vs Pattern |
16.37 |
0.014993 |
0.308 |
get_cramers_v <- function(tbl) {
chi_sq <- suppressWarnings(chisq.test(tbl)$statistic)
n <- sum(tbl)
v <- sqrt(chi_sq / (n * min(nrow(tbl) - 1, ncol(tbl) - 1)))
return(round(v, 3))
}
# Create a specialized dataset to handle multiple proximity tags per dog
df_prox_long <- df %>%
mutate(Dog_ID = row_number()) %>%
pivot_longer(cols = starts_with("prxm_"), names_to = "Resource", values_to = "Present") %>%
filter(Present == TRUE) %>%
mutate(Resource = gsub("prxm_", "", Resource)) %>%
mutate(Resource = gsub("_20m?", " (<20m)", Resource))
# 1. Proximity vs Body Condition
df_prox_bc <- df_prox_long %>% filter(!is.na(bdy_cdtn), bdy_cdtn != "")
tbl_prox_bc <- table(df_prox_bc$Resource, df_prox_bc$bdy_cdtn)
test_prox_bc <- chisq.test(tbl_prox_bc, simulate.p.value = TRUE)
# 2. Proximity vs Sex
df_prox_sex <- df_prox_long %>% filter(Sex != "Unknown")
tbl_prox_sex <- table(df_prox_sex$Resource, df_prox_sex$Sex)
test_prox_sex <- chisq.test(tbl_prox_sex, simulate.p.value = TRUE)
# 3. Proximity vs Age
df_prox_age <- df_prox_long %>% filter(Age != "Unknown")
tbl_prox_age <- table(df_prox_age$Resource, df_prox_age$Age)
test_prox_age <- chisq.test(tbl_prox_age, simulate.p.value = TRUE)
# 4. Proximity vs Sterilisation
df_prox_abc <- df_prox_long %>% filter(!is.na(ear_ntch), ear_ntch != "")
tbl_prox_abc <- table(df_prox_abc$Resource, df_prox_abc$ear_ntch)
test_prox_abc <- chisq.test(tbl_prox_abc, simulate.p.value = TRUE)
# Combine into a formal table
adv_prox_stats <- data.frame(
"Variables_Tested" = c("Proximity vs Body Condition", "Proximity vs Sex", "Proximity vs Age", "Proximity vs Sterilisation"),
"Chi_Square_Value" = c(round(test_prox_bc$statistic, 2), round(test_prox_sex$statistic, 2), round(test_prox_age$statistic, 2), round(test_prox_abc$statistic, 2)),
"P_Value" = c(format.pval(test_prox_bc$p.value, eps = 0.001), format.pval(test_prox_sex$p.value, eps = 0.001), format.pval(test_prox_age$p.value, eps = 0.001), format.pval(test_prox_abc$p.value, eps = 0.001)),
"Cramers_V_Strength" = c(get_cramers_v(tbl_prox_bc), get_cramers_v(tbl_prox_sex), get_cramers_v(tbl_prox_age), get_cramers_v(tbl_prox_abc))
)
kable(adv_prox_stats, caption = "Table 9: Resource Proximity Correlations", row.names = FALSE)
Table 9: Resource Proximity Correlations
| Proximity vs Body Condition |
9.58 |
0.29385 |
0.201 |
| Proximity vs Sex |
3.02 |
0.57771 |
0.165 |
| Proximity vs Age |
15.65 |
0.030485 |
0.258 |
| Proximity vs Sterilisation |
4.58 |
0.3913 |
0.198 |
8. Body Condition vs Sterilisation (Journal Format)
# Filter for clean data
df_clean <- df %>% filter(!is.na(bdy_cdtn), bdy_cdtn != "", !is.na(ear_ntch), ear_ntch != "")
# Create the journal-ready table
df_clean %>%
select(bdy_cdtn, ear_ntch) %>%
tbl_cross(
row = bdy_cdtn,
col = ear_ntch,
percent = "row",
label = list(bdy_cdtn ~ "Body Condition", ear_ntch ~ "Sterilisation (Ear Notch)")
) %>%
add_p() %>% # This automatically runs the Chi-Square test and adds the P-value!
bold_labels()
|
Sterilisation (Ear Notch)
|
Total |
p-value |
| absnt |
not_ver |
| Body Condition |
|
|
|
>0.9 |
| fair |
10 (100%) |
0 (0%) |
10 (100%) |
|
| good |
66 (97%) |
2 (2.9%) |
68 (100%) |
|
| poor |
7 (100%) |
0 (0%) |
7 (100%) |
|
| Total |
83 (98%) |
2 (2.4%) |
85 (100%) |
|
8. Association Analysis: Health & Demographics
#These tables show the actual distribution of dogs across categories. The P-value (Pearson's Chi-square with Monte Carlo simulation) indicates if the relationship is statistically significant.
# 1. Body Condition vs Sterilisation Status
df %>%
filter(!is.na(bdy_cdtn), bdy_cdtn != "", !is.na(ear_ntch), ear_ntch != "") %>%
select(bdy_cdtn, ear_ntch) %>%
tbl_cross(
row = bdy_cdtn,
col = ear_ntch,
percent = "row",
label = list(bdy_cdtn ~ "Body Condition", ear_ntch ~ "Ear Notch (ABC Status)")
) %>%
add_p(test = "chisq.test.no.correct") %>% # Simplified test call
bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `bdy_cdtn` (`ear_ntch`) and "statistic", "p.value", and
## "parameter" statistics: Chi-squared approximation may be incorrect
|
Ear Notch (ABC Status)
|
Total |
p-value |
| absnt |
not_ver |
| Body Condition |
|
|
|
0.8 |
| fair |
10 (100%) |
0 (0%) |
10 (100%) |
|
| good |
66 (97%) |
2 (2.9%) |
68 (100%) |
|
| poor |
7 (100%) |
0 (0%) |
7 (100%) |
|
| Total |
83 (98%) |
2 (2.4%) |
85 (100%) |
|
# 2. Body Condition vs Sex
df %>%
filter(!is.na(bdy_cdtn), bdy_cdtn != "", Sex != "Unknown") %>%
select(bdy_cdtn, Sex) %>%
tbl_cross(
row = bdy_cdtn,
col = Sex,
percent = "row",
label = list(bdy_cdtn ~ "Body Condition", Sex ~ "Sex")
) %>%
add_p(test = "chisq.test.no.correct") %>%
bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `bdy_cdtn` (`Sex`) and "statistic", "p.value", and "parameter"
## statistics: Chi-squared approximation may be incorrect
|
Sex
|
Total |
p-value |
| Female |
Male |
| Body Condition |
|
|
|
0.7 |
| fair |
7 (70%) |
3 (30%) |
10 (100%) |
|
| good |
37 (59%) |
26 (41%) |
63 (100%) |
|
| poor |
5 (71%) |
2 (29%) |
7 (100%) |
|
| Total |
49 (61%) |
31 (39%) |
80 (100%) |
|
# 3. Size vs Sex
df %>%
filter(!is.na(size), size != "", Sex != "Unknown") %>%
select(size, Sex) %>%
tbl_cross(
row = size,
col = Sex,
percent = "row",
label = list(size ~ "Body Size", Sex ~ "Sex")
) %>%
add_p(test = "chisq.test.no.correct") %>%
bold_labels()
|
Sex
|
Total |
p-value |
| Female |
Male |
| Body Size |
|
|
|
0.12 |
| larg |
14 (48%) |
15 (52%) |
29 (100%) |
|
| medium |
27 (73%) |
10 (27%) |
37 (100%) |
|
| small |
8 (57%) |
6 (43%) |
14 (100%) |
|
| Total |
49 (61%) |
31 (39%) |
80 (100%) |
|
# 1. Resource Type vs Body Condition
df_prox_long %>%
filter(!is.na(bdy_cdtn), bdy_cdtn != "") %>%
select(Resource, bdy_cdtn) %>%
tbl_cross(
row = Resource,
col = bdy_cdtn,
percent = "row",
label = list(Resource ~ "Resource Category", bdy_cdtn ~ "Body Condition Score")
) %>%
add_p(test = "chisq.test.no.correct") %>%
bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `Resource` (`bdy_cdtn`) and "statistic", "p.value", and
## "parameter" statistics: Chi-squared approximation may be incorrect
|
Body Condition Score
|
Total |
p-value |
| fair |
good |
poor |
| Resource Category |
|
|
|
|
0.3 |
| eatry (<20m) |
4 (15%) |
22 (81%) |
1 (3.7%) |
27 (100%) |
|
| garb (<20m) |
1 (14%) |
6 (86%) |
0 (0%) |
7 (100%) |
|
| no (<20m) |
4 (11%) |
31 (82%) |
3 (7.9%) |
38 (100%) |
|
| other |
5 (14%) |
28 (76%) |
4 (11%) |
37 (100%) |
|
| tmple (<20m) |
2 (22%) |
4 (44%) |
3 (33%) |
9 (100%) |
|
| Total |
16 (14%) |
91 (77%) |
11 (9.3%) |
118 (100%) |
|
# 2. Resource Type vs Sex
df_prox_long %>%
filter(Sex != "Unknown") %>%
select(Resource, Sex) %>%
tbl_cross(
row = Resource,
col = Sex,
percent = "row",
label = list(Resource ~ "Resource Category", Sex ~ "Sex")
) %>%
add_p(test = "chisq.test.no.correct") %>%
bold_labels()
## The following warnings were returned during `add_p()`:
## ! For variable `Resource` (`Sex`) and "statistic", "p.value", and "parameter"
## statistics: Chi-squared approximation may be incorrect
|
Sex
|
Total |
p-value |
| Female |
Male |
| Resource Category |
|
|
|
0.6 |
| eatry (<20m) |
19 (73%) |
7 (27%) |
26 (100%) |
|
| garb (<20m) |
5 (71%) |
2 (29%) |
7 (100%) |
|
| no (<20m) |
20 (56%) |
16 (44%) |
36 (100%) |
|
| other |
18 (55%) |
15 (45%) |
33 (100%) |
|
| tmple (<20m) |
5 (56%) |
4 (44%) |
9 (100%) |
|
| Total |
67 (60%) |
44 (40%) |
111 (100%) |
|