library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)
library(ggplot2)
library(pastecs)
## 
## Attaching package: 'pastecs'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## The following object is masked from 'package:tidyr':
## 
##     extract
library(readr)
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following object is masked from 'package:purrr':
## 
##     some
library(broom)

Data Set

acs <- read.csv("acs_animals.csv")

# Check Structure
str(acs)
## 'data.frame':    62235 obs. of  19 variables:
##  $ Intake.Fiscal.Year            : int  2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
##  $ Intake.Fiscal.Period          : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ Intake.Date                   : chr  "10/1/2023" "10/1/2023" "10/1/2023" "10/1/2023" ...
##  $ Intake.Section                : chr  "OVER-THE-COUNTER (OTC)" "OVER-THE-COUNTER (OTC)" "FIELD" "FIELD" ...
##  $ Intake.Subsection             : chr  "Quarantine" "Stray" "Stray" "Stray" ...
##  $ Animal.ID                     : chr  "A680497" "A685667" "A689476" "A689477" ...
##  $ Kennel.Identity               : int  545763 545750 545744 545745 545746 545747 545748 545749 545752 545753 ...
##  $ Species                       : chr  "DOG" "CAT" "DOG" "DOG" ...
##  $ Current.Sex                   : chr  "N" "N" "M" "F" ...
##  $ Council.District              : chr  "5" "10" "2" "5" ...
##  $ Zip.Code                      : int  78204 78217 78203 78225 78201 78237 78237 78233 78226 78210 ...
##  $ Animal.Size                   : chr  "MED" "SMALL" "LARGE" "LARGE" ...
##  $ Breed.Group                   : chr  "MASTIFF" "SHORTHAIR" "MASTIFF" "SETTER/RETRIEVE" ...
##  $ Primary.Color                 : chr  "WHITE" "RED" "GRAY" "BLACK" ...
##  $ Animal.Age.Group              : chr  "1 Yr - 5 Yr" "7 wks - 6 Mo" "1 Yr - 5 Yr" "1 Yr - 5 Yr" ...
##  $ Sterilization.Status.at.Intake: chr  "Sterilized at Intake" "Sterilized at Intake" "Intact at Intake" "Intact at Intake" ...
##  $ Owned.Status                  : chr  "Owner Infomation Found" "No Owner Info Found" "No Owner Info Found" "Owner Infomation Found" ...
##  $ Sterilized_DV                 : int  1 1 0 0 0 0 0 0 0 0 ...
##  $ Stray_DV                      : int  0 1 1 0 1 1 1 0 1 0 ...
head(acs)
##   Intake.Fiscal.Year Intake.Fiscal.Period Intake.Date         Intake.Section
## 1               2024                    1   10/1/2023 OVER-THE-COUNTER (OTC)
## 2               2024                    1   10/1/2023 OVER-THE-COUNTER (OTC)
## 3               2024                    1   10/1/2023                  FIELD
## 4               2024                    1   10/1/2023                  FIELD
## 5               2024                    1   10/1/2023                  FIELD
## 6               2024                    1   10/1/2023                  FIELD
##   Intake.Subsection Animal.ID Kennel.Identity Species Current.Sex
## 1        Quarantine   A680497          545763     DOG           N
## 2             Stray   A685667          545750     CAT           N
## 3             Stray   A689476          545744     DOG           M
## 4             Stray   A689477          545745     DOG           F
## 5             Stray   A689478          545746     DOG           N
## 6             Stray   A689480          545747     DOG           M
##   Council.District Zip.Code Animal.Size     Breed.Group Primary.Color
## 1                5    78204         MED         MASTIFF         WHITE
## 2               10    78217       SMALL       SHORTHAIR           RED
## 3                2    78203       LARGE         MASTIFF          GRAY
## 4                5    78225       LARGE SETTER/RETRIEVE         BLACK
## 5                7    78201       LARGE        SHEPHERD           TAN
## 6                5    78237         MED SETTER/RETRIEVE         BLACK
##   Animal.Age.Group Sterilization.Status.at.Intake           Owned.Status
## 1      1 Yr - 5 Yr           Sterilized at Intake Owner Infomation Found
## 2     7 wks - 6 Mo           Sterilized at Intake    No Owner Info Found
## 3      1 Yr - 5 Yr               Intact at Intake    No Owner Info Found
## 4      1 Yr - 5 Yr               Intact at Intake Owner Infomation Found
## 5      1 Yr - 5 Yr               Intact at Intake    No Owner Info Found
## 6      1 Yr - 5 Yr               Intact at Intake    No Owner Info Found
##   Sterilized_DV Stray_DV
## 1             1        0
## 2             1        1
## 3             0        1
## 4             0        0
## 5             0        1
## 6             0        1
glimpse("acs_animals.csv")
##  chr "acs_animals.csv"
# Clean the dataset — keep the variables needed
acs_clean <- acs %>%
  select(Sterilized_DV, Species, Zip.Code) %>%
  rename(Zip.Code = Zip.Code) %>%   # rename for model readability
  filter(!is.na(Sterilized_DV),
         !is.na(Species),
         !is.na(Zip.Code))

# Build logistic regression model
model <- glm(
  Sterilized_DV ~ Species + Zip.Code,
  data = acs_clean,
  family = binomial
)

# Show model results
summary(model)
## 
## Call:
## glm(formula = Sterilized_DV ~ Species + Zip.Code, family = binomial, 
##     data = acs_clean)
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -1.150e+00  8.143e-01  -1.413   0.1578    
## SpeciesDOG   1.644e-01  3.472e-02   4.734  2.2e-06 ***
## Zip.Code    -1.886e-05  1.041e-05  -1.812   0.0701 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 33309  on 62234  degrees of freedom
## Residual deviance: 33284  on 62232  degrees of freedom
## AIC: 33290
## 
## Number of Fisher Scoring iterations: 5