Report_kottakkal

1. Population Estimate

n1 <- sum(df$encounter_history %in% c("10", "11"))
n2 <- sum(df$encounter_history %in% c("01", "11"))
m2 <- sum(df$encounter_history == "11")
N_est <- (((n1 + 1) * (n2 + 1)) / (m2 + 1)) - 1
variance <- ((n1 + 1) * (n2 + 1) * (n1 - m2) * (n2 - m2)) / (((m2 + 1)^2) * (m2 + 2))
se <- sqrt(variance)

tab_pop <- data.frame(
  Metric = c("Marked (Day 1)", "Resighted (Day 2)", "Matches", "Estimated Pop (N)", "Lower 95% CI", "Upper 95% CI"),
  Value = c(n1, n2, m2, round(N_est), round(N_est - 1.96*se), round(N_est + 1.96*se))
)
kable(tab_pop, caption = "Chapman Mark-Resight Population Estimate")

Chapman Mark-Resight Population Estimate
Metric	Value
Marked (Day 1)	45
Resighted (Day 2)	68
Matches	26
Estimated Pop (N)	117
Lower 95% CI	95
Upper 95% CI	138

# 2. Age and Sex distribution
tab_age_sex <- df %>% filter(Sex != "Unknown", Age != "Unknown") %>%
tabyl(Age, Sex) %>% adorn_totals(c("row", "col")) %>%
adorn_percentages("all") %>% adorn_pct_formatting(digits = 1) %>%
adorn_ns()

kable(tab_age_sex, caption = "Age and Sex Demographics")

Age and Sex Demographics
Age	Female	Male	Total
Adult	33.8% (27)	26.2% (21)	60.0% (48)
Juvenile	18.8% (15)	3.8% (3)	22.5% (18)
Pup	8.8% (7)	8.8% (7)	17.5% (14)
Total	61.3% (49)	38.8% (31)	100.0% (80)

ggplot(df %>% filter(Sex != "Unknown", Age != "Unknown"), aes(x = Age,
fill = Sex)) + geom_bar(position = "dodge", color = "black") +
scale_fill_manual(values = c("Female" = "#FF9999", "Male" =
"#99CCFF")) + theme_minimal() + labs(title = "Age and Sex Distribution",
x = "Age Group", y = "Count")

# 3. Coat patterns and colour distribution
tab_coat <- df %>% tabyl(coat_patrn, prm_ct_clr) %>%
adorn_totals(c("row", "col")) 
kable(tab_coat, caption = "Primary Coat
Colour by Pattern")

Primary Coat Colour by Pattern
coat_patrn	Black	Brown	other	White	NA_	Total
biclrd	16	16	1	9	0	42
solid	2	27	3	8	0	40
triclrd	2	1	0	1	0	4
NA	0	0	0	0	1	1
Total	20	44	4	18	1	87

ggplot(df, aes(x = coat_patrn, fill = prm_ct_clr)) + geom_bar(position =
"fill", color="black") + scale_y_continuous(labels = scales::percent) +
theme_minimal() + labs(title = "Primary Coat Colour Distribution by Coat
Pattern", x = "Coat Pattern", y = "Proportion", fill="Primary Color")

# 4. Size vs Sex

tab_size <- df %>% filter(Sex != "Unknown") %>% tabyl(size, Sex) %>%
adorn_totals("row") 
kable(tab_size, caption = "Dog Size Distribution by
Sex")

Dog Size Distribution by Sex
size	Female	Male
larg	14	15
medium	27	10
small	8	6
Total	49	31

# Run the test
chi_sq_size_sex <- chisq.test(table(df$size, df$Sex), simulate.p.value = TRUE)
# Create a formal reporting table
stat_tab_size <- data.frame(
  "Test" = "Chi-Square (Simulated)",
  "Variables" = "Size vs Sex",
  "Chi_Square_Value" = round(chi_sq_size_sex$statistic, 3),
  "P_Value" = format.pval(chi_sq_size_sex$p.value, eps = 0.001)
)
# Print it to the report
kable(stat_tab_size, caption = "Table 4a: Statistical Correlation for Size and Sex", row.names = FALSE)

Table 4a: Statistical Correlation for Size and Sex
Test	Variables	Chi_Square_Value	P_Value
Chi-Square (Simulated)	Size vs Sex	5.317	0.26187

# 5. Sterilisation coverage

tab_abc_sex <- df %>% filter(Sex != "Unknown", !is.na(ear_ntch)) %>%
tabyl(Sex, ear_ntch) %>% adorn_totals("row") %>%
adorn_percentages("row") %>% adorn_pct_formatting(digits = 1) %>%
adorn_ns()

kable(tab_abc_sex, caption = "Sterilisation (Ear Notch) Coverage by
Sex")

Sterilisation (Ear Notch) Coverage by Sex
Sex	absnt	not_ver
Female	100.0% (49)	0.0% (0)
Male	96.7% (29)	3.3% (1)
Total	98.7% (78)	1.3% (1)

ggplot(df %>% filter(Sex != "Unknown", !is.na(ear_ntch)), aes(x = Sex,
fill = ear_ntch)) + geom_bar(position = "fill", color="black") +
scale_fill_manual(values = c("absnt" = "#EF5350", "prsnt" =
"#66BB6A")) + scale_y_continuous(labels = scales::percent) +
theme_minimal() + labs(title = "Sterilisation Coverage by Sex", x =
"Sex", y = "Percentage", fill="Ear Notch")

## 6. Sterilisation by Body Condition & Correlation


# 1. Create a filtered dataset for accurate testing
df_body_abc <- df %>% filter(!is.na(bdy_cdtn), !is.na(ear_ntch), bdy_cdtn != "", ear_ntch != "")

# 2. Generate the summary table
tab_abc_body <- df_body_abc %>% 
  tabyl(bdy_cdtn, ear_ntch) %>% 
  adorn_percentages("row") %>% 
  adorn_pct_formatting(digits = 1) %>% 
  adorn_ns()

kable(tab_abc_body, caption = "Sterilisation Status vs Body Condition")

Sterilisation Status vs Body Condition
bdy_cdtn	absnt	not_ver
fair	100.0% (10)	0.0% (0)
good	97.1% (66)	2.9% (2)
poor	100.0% (7)	0.0% (0)

# 3. Perform the Chi-Square test for correlation
chi_sq_body_abc <- chisq.test(table(df_body_abc$bdy_cdtn, df_body_abc$ear_ntch))

## Warning in stats::chisq.test(x, y, ...): Chi-squared approximation may be
## incorrect

stat_tab_body <- data.frame(
  "Test" = "Chi-Square (Simulated)",
  "Variables" = "Body Condition vs Sterilisation",
  "Chi_Square_Value" = round(chi_sq_body_abc$statistic, 3),
  "P_Value" = format.pval(chi_sq_body_abc$p.value, eps = 0.001)
)
kable(stat_tab_body, caption = "Table 6a: Statistical Correlation for Body Condition vs Sterilisation", row.names = FALSE)

Table 6a: Statistical Correlation for Body Condition vs Sterilisation
Test	Variables	Chi_Square_Value	P_Value
Chi-Square (Simulated)	Body Condition vs Sterilisation	0.512	0.77412

# 4. Generate the plot
ggplot(df_body_abc, aes(x = bdy_cdtn, fill = ear_ntch)) +
  geom_bar(position = "fill", color="black") +
  scale_fill_manual(values = c("absnt" = "#EF5350", "prsnt" = "#66BB6A")) +
  theme_minimal() +
  labs(title = "Sterilisation Coverage vs Body Condition", x = "Body Condition", y = "Proportion", fill="Ear Notch")

## 7. Proximity to Resources (Spatial Dependency)
tab_prox <- df %>%
  select(starts_with("prxm_")) %>%
  summarize(across(everything(), ~sum(. == TRUE, na.rm = TRUE))) %>%
  pivot_longer(cols = everything(), names_to = "Resource", values_to = "Dog_Count") %>%
  filter(Dog_Count > 0) %>% # Filter out resources with zero dogs for an accurate statistical test
  arrange(desc(Dog_Count))

# Clean up the resource names for the chart
tab_prox$Resource <- gsub("prxm_", "", tab_prox$Resource)
tab_prox$Resource <- gsub("_20m?", " (<20m)", tab_prox$Resource) 

kable(tab_prox, caption = "Proximity to Resources")

Proximity to Resources
Resource	Dog_Count
no (<20m)	38
other	37
eatry (<20m)	28
tmple (<20m)	9
garb (<20m)	7

# Perform Chi-Square Goodness of Fit test
chi_sq_prox <- chisq.test(tab_prox$Dog_Count)
stat_tab_prox <- data.frame(
  "Test" = "Chi-Square Goodness-of-Fit (Simulated)",
  "Variable" = "Resource Proximity",
  "Chi_Square_Value" = round(chi_sq_prox$statistic, 3),
  "P_Value" = format.pval(chi_sq_prox$p.value, eps = 0.001)
)
kable(stat_tab_prox, caption = "Table 7a: Statistical Goodness-of-Fit for Resource Preference", row.names = FALSE)

Table 7a: Statistical Goodness-of-Fit for Resource Preference
Test	Variable	Chi_Square_Value	P_Value
Chi-Square Goodness-of-Fit (Simulated)	Resource Proximity	37.597	< 0.001

ggplot(tab_prox, aes(x = reorder(Resource, Dog_Count), y = Dog_Count)) +
  geom_col(fill = "#FFA726", color = "black") +
  coord_flip() +
  theme_minimal() +
  labs(title = "Proximity to Resources", x = "Resource Category", y = "Number of Dogs Observed")

8. Advanced Demographic & Health Correlations

# Helper function to calculate Cramér's V (Strength of Correlation)
get_cramers_v <- function(tbl) {
  chi_sq <- suppressWarnings(chisq.test(tbl)$statistic)
  n <- sum(tbl)
  v <- sqrt(chi_sq / (n * min(nrow(tbl) - 1, ncol(tbl) - 1)))
  return(round(v, 3))
}

# 1. Body Condition vs Sex
df_bc_sex <- df %>% filter(Sex != "Unknown", !is.na(bdy_cdtn), bdy_cdtn != "")
tbl_bc_sex <- table(df_bc_sex$bdy_cdtn, df_bc_sex$Sex)
test_bc_sex <- chisq.test(tbl_bc_sex, simulate.p.value = TRUE)

# 2. Body Condition vs Age
df_bc_age <- df %>% filter(Age != "Unknown", !is.na(bdy_cdtn), bdy_cdtn != "")
tbl_bc_age <- table(df_bc_age$bdy_cdtn, df_bc_age$Age)
test_bc_age <- chisq.test(tbl_bc_age, simulate.p.value = TRUE)

# 3. Sterilisation vs Sex
df_abc_sex <- df %>% filter(Sex != "Unknown", !is.na(ear_ntch), ear_ntch != "")
tbl_abc_sex <- table(df_abc_sex$ear_ntch, df_abc_sex$Sex)
test_abc_sex <- chisq.test(tbl_abc_sex, simulate.p.value = TRUE)

# 4. Coat Color vs Pattern
df_coat <- df %>% filter(!is.na(prm_ct_clr), prm_ct_clr != "", !is.na(coat_patrn), coat_patrn != "")
tbl_coat <- table(df_coat$prm_ct_clr, df_coat$coat_patrn)
test_coat <- chisq.test(tbl_coat, simulate.p.value = TRUE)

# Combine into a formal table
adv_health_stats <- data.frame(
  "Variables_Tested" = c("Body Condition vs Sex", "Body Condition vs Age", "Sterilisation vs Sex", "Coat Color vs Pattern"),
  "Chi_Square_Value" = c(round(test_bc_sex$statistic, 2), round(test_bc_age$statistic, 2), round(test_abc_sex$statistic, 2), round(test_coat$statistic, 2)),
  "P_Value" = c(format.pval(test_bc_sex$p.value, eps = 0.001), format.pval(test_bc_age$p.value, eps = 0.001), format.pval(test_abc_sex$p.value, eps = 0.001), format.pval(test_coat$p.value, eps = 0.001)),
  "Cramers_V_Strength" = c(get_cramers_v(tbl_bc_sex), get_cramers_v(tbl_bc_age), get_cramers_v(tbl_abc_sex), get_cramers_v(tbl_coat))
)

kable(adv_health_stats, caption = "Table 8: Demographic & Health Correlations", row.names = FALSE)

Table 8: Demographic & Health Correlations
Variables_Tested	Chi_Square_Value	P_Value	Cramers_V_Strength
Body Condition vs Sex	0.80	0.72364	0.100
Body Condition vs Age	2.64	0.67116	0.124
Sterilisation vs Sex	1.65	0.36932	0.028
Coat Color vs Pattern	16.37	0.014993	0.308

get_cramers_v <- function(tbl) {
  chi_sq <- suppressWarnings(chisq.test(tbl)$statistic)
  n <- sum(tbl)
  v <- sqrt(chi_sq / (n * min(nrow(tbl) - 1, ncol(tbl) - 1)))
  return(round(v, 3))
}
# Create a specialized dataset to handle multiple proximity tags per dog
df_prox_long <- df %>%
  mutate(Dog_ID = row_number()) %>%
  pivot_longer(cols = starts_with("prxm_"), names_to = "Resource", values_to = "Present") %>%
  filter(Present == TRUE) %>%
  mutate(Resource = gsub("prxm_", "", Resource)) %>%
  mutate(Resource = gsub("_20m?", " (<20m)", Resource))

# 1. Proximity vs Body Condition
df_prox_bc <- df_prox_long %>% filter(!is.na(bdy_cdtn), bdy_cdtn != "")
tbl_prox_bc <- table(df_prox_bc$Resource, df_prox_bc$bdy_cdtn)
test_prox_bc <- chisq.test(tbl_prox_bc, simulate.p.value = TRUE)

# 2. Proximity vs Sex
df_prox_sex <- df_prox_long %>% filter(Sex != "Unknown")
tbl_prox_sex <- table(df_prox_sex$Resource, df_prox_sex$Sex)
test_prox_sex <- chisq.test(tbl_prox_sex, simulate.p.value = TRUE)

# 3. Proximity vs Age
df_prox_age <- df_prox_long %>% filter(Age != "Unknown")
tbl_prox_age <- table(df_prox_age$Resource, df_prox_age$Age)
test_prox_age <- chisq.test(tbl_prox_age, simulate.p.value = TRUE)

# 4. Proximity vs Sterilisation
df_prox_abc <- df_prox_long %>% filter(!is.na(ear_ntch), ear_ntch != "")
tbl_prox_abc <- table(df_prox_abc$Resource, df_prox_abc$ear_ntch)
test_prox_abc <- chisq.test(tbl_prox_abc, simulate.p.value = TRUE)

# Combine into a formal table
adv_prox_stats <- data.frame(
  "Variables_Tested" = c("Proximity vs Body Condition", "Proximity vs Sex", "Proximity vs Age", "Proximity vs Sterilisation"),
  "Chi_Square_Value" = c(round(test_prox_bc$statistic, 2), round(test_prox_sex$statistic, 2), round(test_prox_age$statistic, 2), round(test_prox_abc$statistic, 2)),
  "P_Value" = c(format.pval(test_prox_bc$p.value, eps = 0.001), format.pval(test_prox_sex$p.value, eps = 0.001), format.pval(test_prox_age$p.value, eps = 0.001), format.pval(test_prox_abc$p.value, eps = 0.001)),
  "Cramers_V_Strength" = c(get_cramers_v(tbl_prox_bc), get_cramers_v(tbl_prox_sex), get_cramers_v(tbl_prox_age), get_cramers_v(tbl_prox_abc))
)

kable(adv_prox_stats, caption = "Table 9: Resource Proximity Correlations", row.names = FALSE)

Table 9: Resource Proximity Correlations
Variables_Tested	Chi_Square_Value	P_Value	Cramers_V_Strength
Proximity vs Body Condition	9.58	0.29385	0.201
Proximity vs Sex	3.02	0.57771	0.165
Proximity vs Age	15.65	0.030485	0.258
Proximity vs Sterilisation	4.58	0.3913	0.198

8. Body Condition vs Sterilisation (Journal Format)

# Filter for clean data
df_clean <- df %>% filter(!is.na(bdy_cdtn), bdy_cdtn != "", !is.na(ear_ntch), ear_ntch != "")

# Create the journal-ready table
df_clean %>%
  select(bdy_cdtn, ear_ntch) %>%
  tbl_cross(
    row = bdy_cdtn, 
    col = ear_ntch, 
    percent = "row",
    label = list(bdy_cdtn ~ "Body Condition", ear_ntch ~ "Sterilisation (Ear Notch)")
  ) %>%
  add_p() %>% # This automatically runs the Chi-Square test and adds the P-value!
  bold_labels()

	Sterilisation (Ear Notch)		Total	p-value¹
	absnt	not_ver	Total	p-value¹
Body Condition				>0.9
fair	10 (100%)	0 (0%)	10 (100%)
good	66 (97%)	2 (2.9%)	68 (100%)
poor	7 (100%)	0 (0%)	7 (100%)
Total	83 (98%)	2 (2.4%)	85 (100%)
¹ Fisher’s exact test

8. Association Analysis: Health & Demographics

#These tables show the actual distribution of dogs across categories. The P-value (Pearson's Chi-square with Monte Carlo simulation) indicates if the relationship is statistically significant.
# 1. Body Condition vs Sterilisation Status
df %>%
  filter(!is.na(bdy_cdtn), bdy_cdtn != "", !is.na(ear_ntch), ear_ntch != "") %>%
  select(bdy_cdtn, ear_ntch) %>%
  tbl_cross(
    row = bdy_cdtn, 
    col = ear_ntch, 
    percent = "row",
    label = list(bdy_cdtn ~ "Body Condition", ear_ntch ~ "Ear Notch (ABC Status)")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>% # Simplified test call
  bold_labels()

## The following warnings were returned during `add_p()`:
## ! For variable `bdy_cdtn` (`ear_ntch`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect

	Ear Notch (ABC Status)		Total	p-value¹
	absnt	not_ver	Total	p-value¹
Body Condition				0.8
fair	10 (100%)	0 (0%)	10 (100%)
good	66 (97%)	2 (2.9%)	68 (100%)
poor	7 (100%)	0 (0%)	7 (100%)
Total	83 (98%)	2 (2.4%)	85 (100%)
¹ Pearson’s Chi-squared test

# 2. Body Condition vs Sex
df %>%
  filter(!is.na(bdy_cdtn), bdy_cdtn != "", Sex != "Unknown") %>%
  select(bdy_cdtn, Sex) %>%
  tbl_cross(
    row = bdy_cdtn, 
    col = Sex, 
    percent = "row",
    label = list(bdy_cdtn ~ "Body Condition", Sex ~ "Sex")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()

## The following warnings were returned during `add_p()`:
## ! For variable `bdy_cdtn` (`Sex`) and "statistic", "p.value", and "parameter"
##   statistics: Chi-squared approximation may be incorrect

	Sex		Total	p-value¹
	Female	Male	Total	p-value¹
Body Condition				0.7
fair	7 (70%)	3 (30%)	10 (100%)
good	37 (59%)	26 (41%)	63 (100%)
poor	5 (71%)	2 (29%)	7 (100%)
Total	49 (61%)	31 (39%)	80 (100%)
¹ Pearson’s Chi-squared test

# 3. Size vs Sex
df %>%
  filter(!is.na(size), size != "", Sex != "Unknown") %>%
  select(size, Sex) %>%
  tbl_cross(
    row = size, 
    col = Sex, 
    percent = "row",
    label = list(size ~ "Body Size", Sex ~ "Sex")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()

	Sex		Total	p-value¹
	Female	Male	Total	p-value¹
Body Size				0.12
larg	14 (48%)	15 (52%)	29 (100%)
medium	27 (73%)	10 (27%)	37 (100%)
small	8 (57%)	6 (43%)	14 (100%)
Total	49 (61%)	31 (39%)	80 (100%)
¹ Pearson’s Chi-squared test

# 1. Resource Type vs Body Condition
df_prox_long %>%
  filter(!is.na(bdy_cdtn), bdy_cdtn != "") %>%
  select(Resource, bdy_cdtn) %>%
  tbl_cross(
    row = Resource, 
    col = bdy_cdtn, 
    percent = "row",
    label = list(Resource ~ "Resource Category", bdy_cdtn ~ "Body Condition Score")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()

## The following warnings were returned during `add_p()`:
## ! For variable `Resource` (`bdy_cdtn`) and "statistic", "p.value", and
##   "parameter" statistics: Chi-squared approximation may be incorrect

	Body Condition Score			Total	p-value¹
	fair	good	poor	Total	p-value¹
Resource Category					0.3
eatry (<20m)	4 (15%)	22 (81%)	1 (3.7%)	27 (100%)
garb (<20m)	1 (14%)	6 (86%)	0 (0%)	7 (100%)
no (<20m)	4 (11%)	31 (82%)	3 (7.9%)	38 (100%)
other	5 (14%)	28 (76%)	4 (11%)	37 (100%)
tmple (<20m)	2 (22%)	4 (44%)	3 (33%)	9 (100%)
Total	16 (14%)	91 (77%)	11 (9.3%)	118 (100%)
¹ Pearson’s Chi-squared test

# 2. Resource Type vs Sex
df_prox_long %>%
  filter(Sex != "Unknown") %>%
  select(Resource, Sex) %>%
  tbl_cross(
    row = Resource, 
    col = Sex, 
    percent = "row",
    label = list(Resource ~ "Resource Category", Sex ~ "Sex")
  ) %>%
  add_p(test = "chisq.test.no.correct") %>%
  bold_labels()

## The following warnings were returned during `add_p()`:
## ! For variable `Resource` (`Sex`) and "statistic", "p.value", and "parameter"
##   statistics: Chi-squared approximation may be incorrect

	Sex		Total	p-value¹
	Female	Male	Total	p-value¹
Resource Category				0.6
eatry (<20m)	19 (73%)	7 (27%)	26 (100%)
garb (<20m)	5 (71%)	2 (29%)	7 (100%)
no (<20m)	20 (56%)	16 (44%)	36 (100%)
other	18 (55%)	15 (45%)	33 (100%)
tmple (<20m)	5 (56%)	4 (44%)	9 (100%)
Total	67 (60%)	44 (40%)	111 (100%)
¹ Pearson’s Chi-squared test

Report_kottakkal_phase1

Dr Narayanan. S. B.

2026-04-29

1. Population Estimate

8. Advanced Demographic & Health Correlations

8. Body Condition vs Sterilisation (Journal Format)

8. Association Analysis: Health & Demographics