library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(dplyr)
library(ggplot2)
library(pastecs)
##
## Attaching package: 'pastecs'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
## The following object is masked from 'package:tidyr':
##
## extract
library(readr)
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(broom)
Data Set
acs <- read.csv("acs_animals.csv")
# Check Structure
str(acs)
## 'data.frame': 62235 obs. of 19 variables:
## $ Intake.Fiscal.Year : int 2024 2024 2024 2024 2024 2024 2024 2024 2024 2024 ...
## $ Intake.Fiscal.Period : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Intake.Date : chr "10/1/2023" "10/1/2023" "10/1/2023" "10/1/2023" ...
## $ Intake.Section : chr "OVER-THE-COUNTER (OTC)" "OVER-THE-COUNTER (OTC)" "FIELD" "FIELD" ...
## $ Intake.Subsection : chr "Quarantine" "Stray" "Stray" "Stray" ...
## $ Animal.ID : chr "A680497" "A685667" "A689476" "A689477" ...
## $ Kennel.Identity : int 545763 545750 545744 545745 545746 545747 545748 545749 545752 545753 ...
## $ Species : chr "DOG" "CAT" "DOG" "DOG" ...
## $ Current.Sex : chr "N" "N" "M" "F" ...
## $ Council.District : chr "5" "10" "2" "5" ...
## $ Zip.Code : int 78204 78217 78203 78225 78201 78237 78237 78233 78226 78210 ...
## $ Animal.Size : chr "MED" "SMALL" "LARGE" "LARGE" ...
## $ Breed.Group : chr "MASTIFF" "SHORTHAIR" "MASTIFF" "SETTER/RETRIEVE" ...
## $ Primary.Color : chr "WHITE" "RED" "GRAY" "BLACK" ...
## $ Animal.Age.Group : chr "1 Yr - 5 Yr" "7 wks - 6 Mo" "1 Yr - 5 Yr" "1 Yr - 5 Yr" ...
## $ Sterilization.Status.at.Intake: chr "Sterilized at Intake" "Sterilized at Intake" "Intact at Intake" "Intact at Intake" ...
## $ Owned.Status : chr "Owner Infomation Found" "No Owner Info Found" "No Owner Info Found" "Owner Infomation Found" ...
## $ Sterilized_DV : int 1 1 0 0 0 0 0 0 0 0 ...
## $ Stray_DV : int 0 1 1 0 1 1 1 0 1 0 ...
head(acs)
## Intake.Fiscal.Year Intake.Fiscal.Period Intake.Date Intake.Section
## 1 2024 1 10/1/2023 OVER-THE-COUNTER (OTC)
## 2 2024 1 10/1/2023 OVER-THE-COUNTER (OTC)
## 3 2024 1 10/1/2023 FIELD
## 4 2024 1 10/1/2023 FIELD
## 5 2024 1 10/1/2023 FIELD
## 6 2024 1 10/1/2023 FIELD
## Intake.Subsection Animal.ID Kennel.Identity Species Current.Sex
## 1 Quarantine A680497 545763 DOG N
## 2 Stray A685667 545750 CAT N
## 3 Stray A689476 545744 DOG M
## 4 Stray A689477 545745 DOG F
## 5 Stray A689478 545746 DOG N
## 6 Stray A689480 545747 DOG M
## Council.District Zip.Code Animal.Size Breed.Group Primary.Color
## 1 5 78204 MED MASTIFF WHITE
## 2 10 78217 SMALL SHORTHAIR RED
## 3 2 78203 LARGE MASTIFF GRAY
## 4 5 78225 LARGE SETTER/RETRIEVE BLACK
## 5 7 78201 LARGE SHEPHERD TAN
## 6 5 78237 MED SETTER/RETRIEVE BLACK
## Animal.Age.Group Sterilization.Status.at.Intake Owned.Status
## 1 1 Yr - 5 Yr Sterilized at Intake Owner Infomation Found
## 2 7 wks - 6 Mo Sterilized at Intake No Owner Info Found
## 3 1 Yr - 5 Yr Intact at Intake No Owner Info Found
## 4 1 Yr - 5 Yr Intact at Intake Owner Infomation Found
## 5 1 Yr - 5 Yr Intact at Intake No Owner Info Found
## 6 1 Yr - 5 Yr Intact at Intake No Owner Info Found
## Sterilized_DV Stray_DV
## 1 1 0
## 2 1 1
## 3 0 1
## 4 0 0
## 5 0 1
## 6 0 1
glimpse("acs_animals.csv")
## chr "acs_animals.csv"
# Clean the dataset — keep the variables needed
acs_clean <- acs %>%
select(Sterilized_DV, Species, Zip.Code) %>%
rename(Zip.Code = Zip.Code) %>% # rename for model readability
filter(!is.na(Sterilized_DV),
!is.na(Species),
!is.na(Zip.Code))
# Build logistic regression model
model <- glm(
Sterilized_DV ~ Species + Zip.Code,
data = acs_clean,
family = binomial
)
# Show model results
summary(model)
##
## Call:
## glm(formula = Sterilized_DV ~ Species + Zip.Code, family = binomial,
## data = acs_clean)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.150e+00 8.143e-01 -1.413 0.1578
## SpeciesDOG 1.644e-01 3.472e-02 4.734 2.2e-06 ***
## Zip.Code -1.886e-05 1.041e-05 -1.812 0.0701 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 33309 on 62234 degrees of freedom
## Residual deviance: 33284 on 62232 degrees of freedom
## AIC: 33290
##
## Number of Fisher Scoring iterations: 5