library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)
library(ggplot2)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract

Data Set

acs <- read.csv("acs_animals.csv")

# Check Structure
str(acs)
## 'data.frame':    62235 obs. of  19 variables:
##  $ Intake.Fiscal.Year            : int  2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
##  $ Intake.Fiscal.Period          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Intake.Date                   : chr  "10/1/2023" "10/1/2023" "10/1/2023" "10/1/2023" ...
##  $ Intake.Section                : chr  "OVER-THE-COUNTER (OTC)" "OVER-THE-COUNTER (OTC)" "FIELD" "FIELD" ...
##  $ Intake.Subsection             : chr  "Quarantine" "Stray" "Stray" "Stray" ...
##  $ Animal.ID                     : chr  "A680497" "A685667" "A689476" "A689477" ...
##  $ Kennel.Identity               : int  545763 545750 545744 545745 545746 545747 545748 545749 545752 545753 ...
##  $ Species                       : chr  "DOG" "CAT" "DOG" "DOG" ...
##  $ Current.Sex                   : chr  "N" "N" "M" "F" ...
##  $ Council.District              : chr  "5" "10" "2" "5" ...
##  $ Zip.Code                      : int  78204 78217 78203 78225 78201 78237 78237 78233 78226 78210 ...
##  $ Animal.Size                   : chr  "MED" "SMALL" "LARGE" "LARGE" ...
##  $ Breed.Group                   : chr  "MASTIFF" "SHORTHAIR" "MASTIFF" "SETTER/RETRIEVE" ...
##  $ Primary.Color                 : chr  "WHITE" "RED" "GRAY" "BLACK" ...
##  $ Animal.Age.Group              : chr  "1 Yr - 5 Yr" "7 wks - 6 Mo" "1 Yr - 5 Yr" "1 Yr - 5 Yr" ...
##  $ Sterilization.Status.at.Intake: chr  "Sterilized at Intake" "Sterilized at Intake" "Intact at Intake" "Intact at Intake" ...
##  $ Owned.Status                  : chr  "Owner Infomation Found" "No Owner Info Found" "No Owner Info Found" "Owner Infomation Found" ...
##  $ Sterilized_DV                 : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ Stray_DV                      : int  0 1 1 0 1 1 1 0 1 0 ...
head(acs)
##   Intake.Fiscal.Year Intake.Fiscal.Period Intake.Date         Intake.Section
## 1               2024                    1   10/1/2023 OVER-THE-COUNTER (OTC)
## 2               2024                    1   10/1/2023 OVER-THE-COUNTER (OTC)
## 3               2024                    1   10/1/2023                  FIELD
## 4               2024                    1   10/1/2023                  FIELD
## 5               2024                    1   10/1/2023                  FIELD
## 6               2024                    1   10/1/2023                  FIELD
##   Intake.Subsection Animal.ID Kennel.Identity Species Current.Sex
## 1        Quarantine   A680497          545763     DOG           N
## 2             Stray   A685667          545750     CAT           N
## 3             Stray   A689476          545744     DOG           M
## 4             Stray   A689477          545745     DOG           F
## 5             Stray   A689478          545746     DOG           N
## 6             Stray   A689480          545747     DOG           M
##   Council.District Zip.Code Animal.Size     Breed.Group Primary.Color
## 1                5    78204         MED         MASTIFF         WHITE
## 2               10    78217       SMALL       SHORTHAIR           RED
## 3                2    78203       LARGE         MASTIFF          GRAY
## 4                5    78225       LARGE SETTER/RETRIEVE         BLACK
## 5                7    78201       LARGE        SHEPHERD           TAN
## 6                5    78237         MED SETTER/RETRIEVE         BLACK
##   Animal.Age.Group Sterilization.Status.at.Intake           Owned.Status
## 1      1 Yr - 5 Yr           Sterilized at Intake Owner Infomation Found
## 2     7 wks - 6 Mo           Sterilized at Intake    No Owner Info Found
## 3      1 Yr - 5 Yr               Intact at Intake    No Owner Info Found
## 4      1 Yr - 5 Yr               Intact at Intake Owner Infomation Found
## 5      1 Yr - 5 Yr               Intact at Intake    No Owner Info Found
## 6      1 Yr - 5 Yr               Intact at Intake    No Owner Info Found
##   Sterilized_DV Stray_DV
## 1             1        0
## 2             1        1
## 3             0        1
## 4             0        0
## 5             0        1
## 6             0        1

Numeric Variables and Correlations

# Key Numeric Variables of Interest
vars <- acs[, c("Sterilized_DV", "Stray_DV", "Zip.Code")]

# Correlation Matrix (using Pearson’s method)
cor_matrix <- cor(vars, use = "complete.obs")
print(cor_matrix)
##               Sterilized_DV      Stray_DV      Zip.Code
## Sterilized_DV   1.000000000 -0.2444498826 -0.0074377490
## Stray_DV       -0.244449883  1.0000000000  0.0006778959
## Zip.Code       -0.007437749  0.0006778959  1.0000000000

Variable Visualization

pairs(vars, main = "Pairwise Scatterplots of Selected Variables")

Correlated Variables

This compares the relationship between sterilization and stray status. Since both are binary variables, Spearman’s method is most appropriate.

cor_test <- cor.test(acs$Sterilized_DV, acs$Stray_DV, method = "spearman")
## Warning in cor.test.default(acs$Sterilized_DV, acs$Stray_DV, method =
## "spearman"): Cannot compute exact p-value with ties
print(cor_test)
## 
##  Spearman's rank correlation rho
## 
## data:  acs$Sterilized_DV and acs$Stray_DV
## S = 4.9995e+13, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
##        rho 
## -0.2444499

Findings and Justification

The correlation analysis examined the relationship between sterilization status and stray status. The Spearman Method was used because both variables are binary (0/1) and not normally distributed, making non-parametric methods more appropriate.

The results showed a moderate negative correlation (ρ = -0.24, p < 0.001), indicating that stray animals are significantly less likely to be sterilized at intake compared to owned animals. This finding supports the idea that unowned or community animals contribute more to uncontrolled reproduction in the city.

Overall, the use of Spearman’s method was justified given the categorical nature of the data, and the results highlight a meaningful disparity in sterilization rates between stray and owned animal populations.