Data source

Australia Pets

Load library

library(tidyverse)
library(skimr)
library(RColorBrewer)
library(networkD3)
library(htmlwidgets)
library(waffle)
library(lubridate)
library(Hmisc)
library(viridis)
library(plotly)

Part 1: RSPCA Animal Outcomes

Import data

animal_outcomes <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/animal_outcomes.csv')

Summary

dim(animal_outcomes)
## [1] 664  12
ao = animal_outcomes %>% mutate_at(vars(year,animal_type,outcome), list(factor))
skim(ao)
Data summary
Name ao
Number of rows 664
Number of columns 12
_______________________
Column type frequency:
factor 3
numeric 9
________________________
Group variables

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
year 0 1 FALSE 20 200: 35, 200: 35, 200: 35, 200: 35
animal_type 0 1 FALSE 6 Cat: 114, Dog: 114, Hor: 114, Liv: 114
outcome 0 1 FALSE 8 Eut: 120, Oth: 120, Tra: 102, Rec: 100

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
ACT 0 1.00 201.92 354.55 0 1.00 36.0 191.00 1628 ▇▁▁▁▁
NSW 0 1.00 970.56 2182.39 0 9.00 78.5 514.75 13267 ▇▁▁▁▁
NT 11 0.98 210.77 806.20 0 0.00 1.0 60.00 8150 ▇▁▁▁▁
QLD 0 1.00 1289.88 2562.50 0 15.00 198.5 854.00 15690 ▇▁▁▁▁
SA 2 1.00 318.32 662.24 0 2.00 34.5 226.75 4252 ▇▁▁▁▁
TAS 1 1.00 141.28 288.04 0 3.00 21.0 96.00 1974 ▇▁▁▁▁
VIC 0 1.00 992.26 2009.94 0 13.75 108.5 680.75 10567 ▇▁▁▁▁
WA 0 1.00 88.47 319.11 0 0.00 2.0 46.25 6035 ▇▁▁▁▁
Total 3 1.00 4199.37 7693.90 0 162.00 1015.0 2839.00 42731 ▇▁▁▁▁
Hmisc::describe(ao$year)
## ao$year 
##        n  missing distinct 
##      664        0       20 
## 
## lowest : 1999 2000 2001 2002 2003, highest: 2014 2015 2016 2017 2018
##                                                                             
## Value       1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  2009
## Frequency     23    23    23    35    35    35    35    35    35    35    35
## Proportion 0.035 0.035 0.035 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053
##                                                                 
## Value       2010  2011  2012  2013  2014  2015  2016  2017  2018
## Frequency     35    35    35    35    35    35    35    35    35
## Proportion 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053 0.053

Visualization

Animal intake types throughout the years

intake = animal_outcomes %>% group_by(animal_type,year) %>% tally(Total) %>% ggplot(aes(x= year, y = n, group = animal_type, color=animal_type)) + geom_line() + geom_point() + theme_classic() + theme(legend.position="top") + scale_color_brewer(palette="Dark2") + labs(x="Year", y="Intake count", title="Animal intake types throughout the years", subtitle= "RSPCA Australia animal outcomes 1999-2018")
intake

Animal types and outcomes in 2018

Data preparation

ao18 = animal_outcomes %>% filter(year == "2018") %>% as.data.frame
links = subset(ao18, select=-c(ACT,NSW,NT,QLD,SA,TAS,VIC,WA,year))
colnames(links)[1] = "source"
colnames(links)[2] = "target"
colnames(links)[3] = "value"
nodes = data.frame(name=c(as.character(links$source),as.character(links$target)) %>% unique())
links$IDsource <- match(links$source, nodes$name)-1 
links$IDtarget <- match(links$target, nodes$name)-1

Plot

summary18 <- sankeyNetwork(Links = links, Nodes = nodes,
              Source = "IDsource", Target = "IDtarget",
              Value = "value", NodeID = "name", 
              sinksRight=FALSE, fontSize = 14)
summary18 <- htmlwidgets::prependContent(summary18, htmltools::tags$h3("Australia RSPCA animal types and outcomes in 2018"))
summary18                                    

Australia RSPCA animal types and outcomes in 2018

Percentage of euthanized cats and dogs

Subset

cd_int = animal_outcomes %>% filter(animal_type == "Dogs" | animal_type == "Cats") %>% group_by(animal_type,year) %>% tally(Total) %>% mutate_at(vars(year,animal_type), list(factor)) %>% as.data.frame()
cd_eth = animal_outcomes %>% filter(animal_type == "Dogs" | animal_type == "Cats") %>% filter(outcome=="Euthanized") %>% group_by(animal_type,year, outcome) %>% tally(Total) %>% mutate_at(vars(year,animal_type,outcome), list(factor)) %>% as.data.frame() 
cd_reh = animal_outcomes %>% filter(animal_type == "Dogs" | animal_type == "Cats") %>% filter(outcome=="Rehomed") %>% group_by(animal_type,year, outcome) %>% tally(Total) %>% mutate_at(vars(year,animal_type,outcome), list(factor)) %>% as.data.frame()

Get proportion

cd_int$euth_por= ((cd_eth$n/cd_int$n))
head(cd_int)

Plot

cd2 = ggplot(cd_int, aes(x=year, y=euth_por)) +
  geom_segment( aes(x=year, xend=year, y=0, yend=euth_por), color="grey49") +
  geom_point(aes(x=year, y=euth_por, color= animal_type),size=2) + facet_wrap(~animal_type, nrow=2) + scale_color_brewer(palette="Dark2") + 
  theme_light() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank()
  ) + theme(axis.text.x = element_text(angle=90)) + scale_y_continuous(labels = scales::percent_format(), limits=c(0,1)) + labs(x="Year", y="Euthanized %", title="Proportion of euthanized dogs and cats intakes", subtitle = "RSPCA Australia animal outcomes 1999-2018")
cd2

Percentage of euthanized cats and dogs

Get proportion

cd_int$reh_por = ((cd_reh$n/cd_int$n))
head(cd_int)

Plot

rp2 = ggplot(cd_int, aes(x=year, y=reh_por)) +
  geom_segment( aes(x=year, xend=year, y=0, yend=reh_por), color="dodgerblue4") +
  geom_point(aes(x=year, y=reh_por, color= animal_type),size=2) + facet_wrap(~animal_type, nrow=2) + scale_color_brewer(palette="Dark2") + 
  theme_light() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank()
  ) + theme(axis.text.x = element_text(angle=90)) + scale_y_continuous(labels = scales::percent_format(), limits=c(0,1)) + labs(x="Year", y="Rehomed %", title="Proportion of rehomed dogs and cats intakes", subtitle = "RSPCA Australia animal outcomes 1999-2018")
rp2

Dogs outcome in 2018

dogsOutcome = c('Reclaimed'=13107,'Rehomed'=12872,'Currently In Care'=1941,'Transferred'=1391,'Other'=244,'Euthanized'=4308)
waffle1 = waffle(dogsOutcome/200, rows=5, size =0.5, colors=c("#00509d","#fdc500","#538d22","#f18701","#dad7cd","#2f3e46") ,title="Dogs outcomes in 2018", xlab="1 square = 200 dogs") + theme(legend.position="bottom")
waffle1

Cats outcome in 2018

catsOutcome = c('Reclaimed'=2665,'Rehomed'=31105,'Currently In Care'=3944,'Transferred'=1033,'Other'=683,'Euthanized'=11740)
waffle2 = waffle(catsOutcome/200, rows=5, size =0.7, colors=c("#00509d","#fdc500","#538d22","#f18701","#dad7cd","#2f3e46") ,title="Cats outcomes in 2018", xlab="1 square = 200 catss") + theme(legend.position="bottom")
waffle2

Part 2: Townsville Animal Complaints

Import data

animal_compliants = read.csv("animal_c.csv", header=TRUE)

Change variable type

colnames(animal_compliants)
## [1] "animalType2"       "complaintType"     "dateRecieved"     
## [4] "suburb"            "electoralDivision" "date2"
animal_compliants$date2= as.Date(animal_compliants$date2)
animal_compliants$month = month(animal_compliants$date2)
animal_compliants$year = year(animal_compliants$date2)
ac = animal_compliants %>% mutate_at(vars(animalType2,complaintType,suburb, dateRecieved, electoralDivision),list(factor)) %>% as.data.frame()
str(animal_compliants)
## 'data.frame':    42413 obs. of  8 variables:
##  $ animalType2      : Factor w/ 2 levels "cat","dog": 2 2 2 2 2 2 2 2 2 2 ...
##  $ complaintType    : Factor w/ 6 levels "Aggressive Animal",..: 1 4 4 5 6 2 3 6 3 3 ...
##  $ dateRecieved     : Factor w/ 81 levels "Apr-14","Apr-15",..: 47 47 47 47 47 47 47 47 47 47 ...
##  $ suburb           : Factor w/ 85 levels "Aitkenvale","Alice River",..: 2 2 2 2 2 10 10 10 11 11 ...
##  $ electoralDivision: Factor w/ 11 levels "Division 1","Division 10",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ date2            : Date, format: "2020-06-01" "2020-06-01" ...
##  $ month            : num  6 6 6 6 6 6 6 6 6 6 ...
##  $ year             : num  2020 2020 2020 2020 2020 2020 2020 2020 2020 2020 ...

Visualization

Complaints about dogs (by month and year)

gt1= ac %>% filter(animalType2 == "dog") %>% group_by(year, month) %>% tally() %>% ggplot(aes(x=month, y=year)) + geom_tile(aes(fill = n), color = "black") +
  scale_fill_gradientn(colors = c("#a8dadc", "#457b9d", "#1d3557")) + 
  theme_classic() + scale_x_continuous(breaks = seq(1, 12, 1), labels = month.abb) + theme_classic() + scale_y_continuous(breaks=seq(2013,2020, by =1)) +
labs(x = "Month(date recieved)", y = "Year(date recieved)", fill = "Count", title = "Complaints about dog", subtitle= "Townsville Animal Complaints")
gt1

Complaints about cats (by month and year)

gt2= ac %>% filter(animalType2 == "cat") %>% group_by(year, month) %>% tally() %>% ggplot(aes(x=month, y=year)) + geom_tile(aes(fill = n), color = "black") +
  scale_fill_gradientn(colors = c("#F4F269", "#606c38", "#283618")) + 
  theme_classic() + scale_x_continuous(breaks = seq(1, 12, 1), labels = month.abb) + theme_classic() + scale_y_continuous(breaks=seq(2013,2020, by =1)) +
labs(x = "Month(date recieved)", y = "Year(date recieved)", fill = "Count", title = "Complaints about cats", subtitle= "Townsville Animal Complaints")
gt2

Complaints types over the years

bp1 = ggplot(ac) + geom_bar(aes(x=year, fill= complaintType)) + facet_wrap(~animalType2, nrow=2) + theme_classic() + scale_fill_brewer(palette="Dark2") + labs(x="Year(date recieved)", y = "Count", title= "Complaints over the years", subtitle= "Townsville Animal Complaints 2013 to 2020")
bp1

trend1= ac %>% filter(between(year, 2014,2019)) %>% group_by(year, complaintType) %>% tally() %>% ggplot(aes(x=year,y=n, group= complaintType, color=complaintType)) + geom_line(stat='identity') + scale_color_brewer(palette="Dark2") + theme_classic() + labs(x="Year(date recieved)", y = "Number of complaints", title= "Complaint types over the years", subtitle= "Townsville Animal Complaints 2014 to 2019")
trend1

Dogs: Complaints type (2014-2019)

vp1= ac %>% filter(animalType2 == "dog" & (between(year, 2014,2019)))  %>% group_by(year,complaintType) %>% tally()
vp2 = vp1 %>% ggplot(aes(x= complaintType, y= n, fill=complaintType, color=complaintType)) + geom_violin(width=2.1,size=0.2) + scale_fill_viridis(discrete=TRUE) + scale_color_viridis(discrete=TRUE) + ylim(0,1600) + theme_minimal() + theme(legend.position="none") + coord_flip() + labs(x="Complaint type", y = "Count", title= "Dogs: Complaints type (2014 to 2019)", subtitle= "Townsville Animal Complaints")
vp2

Complaints about cats (2014-2019)

vp3= ac %>% filter(animalType2 == "cat" & (between(year, 2014,2019)))  %>% group_by(year,complaintType) %>% tally()
vp4 = vp3 %>% ggplot(aes(x= complaintType, y= n, fill=complaintType, color=complaintType)) + geom_violin(width=2.1,size=0.2) + scale_fill_viridis(discrete=TRUE) + scale_color_viridis(discrete=TRUE) + ylim(0,1600) + theme_minimal() + theme(legend.position="none") + coord_flip() + labs(x="Complaint type", y = "Count", title= "Cats: Complaints type (2014 to 2019)", subtitle= "Townsville Animal Complaints")
vp4

Part 3: Brisbane Animal Complaints

Import data

brisbane_complaints = readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-07-21/brisbane_complaints.csv')

Check dates

brisbane_complaints$yearquarter = brisbane_complaints$date_range
unique(brisbane_complaints$date_range)
##  [1] "1st-quarter-2016-17.csv"                                          
##  [2] "april-june-2016.csv"                                              
##  [3] "apr-jun-2019.csv"                                                 
##  [4] "apr-to-jun-2018.csv"                                              
##  [5] "april-to-june-2017.csv"                                           
##  [6] "jan-mar-2019.csv"                                                 
##  [7] "jan-to-mar-2018.csv"                                              
##  [8] "january-to-march-2017.csv"                                        
##  [9] "jul-to-sep-2018.csv"                                              
## [10] "jul-to-sep-2019.csv"                                              
## [11] "july-to-september-2017.csv"                                       
## [12] "oct-to-dec-2018.csv"                                              
## [13] "october-to-december-2016.csv"                                     
## [14] "october-to-december-2017.csv"                                     
## [15] "cars-srsa-open-data-animal-related-complaints-apr-to-jun-2020.csv"
## [16] "cars-srsa-open-data-animal-related-complaints-jan-to-mar-2020.csv"
## [17] "cars-srsa-open-data-animal-related-complaints-oct-to-dec-2019.csv"

Replace date string with year and quarter

brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "1st-quarter-2016-17.csv", "2016-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "apr-jun-2019.csv", "2019-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "apr-to-jun-2018.csv", "2018-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "april-june-2016.csv", "2016-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "april-to-june-2017.csv", "2017-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "cars-srsa-open-data-animal-related-complaints-apr-to-jun-2020.csv", "2020-Q2")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "cars-srsa-open-data-animal-related-complaints-jan-to-mar-2020.csv", "2020-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "cars-srsa-open-data-animal-related-complaints-oct-to-dec-2019.csv", "2019-Q4")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jan-mar-2019.csv", "2019-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jan-to-mar-2018.csv", "2018-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "january-to-march-2017.csv", "2017-Q1")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jul-to-sep-2018.csv", "2018-Q3")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "july-to-september-2017.csv", "2017-Q3")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "jul-to-sep-2019.csv", "2017-Q3")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "oct-to-dec-2018.csv", "2018-Q4")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "october-to-december-2016.csv", "2016-Q4")
brisbane_complaints$yearquarter = str_replace_all(brisbane_complaints$yearquarter, "october-to-december-2017.csv", "2017-Q4")
sort(unique(brisbane_complaints$yearquarter),decreasing=FALSE)
##  [1] "2016-Q1" "2016-Q2" "2016-Q4" "2017-Q1" "2017-Q2" "2017-Q3" "2017-Q4"
##  [8] "2018-Q1" "2018-Q2" "2018-Q3" "2018-Q4" "2019-Q1" "2019-Q2" "2019-Q4"
## [15] "2020-Q1" "2020-Q2"

Subset

sum(is.na(brisbane_complaints$responsible_office))
## [1] 31330
bc= subset(brisbane_complaints, select=-c(responsible_office, date_range, nature, city))
##remove 2016 obs 
bc = bc[!grepl("2016", brisbane_complaints$yearquarter),]
sort(unique(bc$yearquarter),decreasing=FALSE)
##  [1] "2017-Q1" "2017-Q2" "2017-Q3" "2017-Q4" "2018-Q1" "2018-Q2" "2018-Q3"
##  [8] "2018-Q4" "2019-Q1" "2019-Q2" "2019-Q4" "2020-Q1" "2020-Q2"
bc = bc %>% mutate_at(vars(animal_type,category,suburb, yearquarter),list(factor)) 
summary(bc)
##        animal_type                   category              suburb     
##  Attack      : 7087   Wandering          :4250   INALA        :  859  
##  Cat         : 1845   Fencing Issues     :4208   BRACKEN RIDGE:  500  
##  Cat Trapping: 2256   Attack On An Animal:2467   WYNNUM       :  456  
##  Dog         :11253   Unregistered       :1912   FOREST LAKE  :  452  
##  Other Animal: 3865   Attack On A Person :1827   ACACIA RIDGE :  364  
##                       (Other)            :6976   (Other)      :23373  
##                       NA's               :4666   NA's         :  302  
##   yearquarter   
##  2017-Q3: 3896  
##  2020-Q2: 2799  
##  2020-Q1: 2146  
##  2018-Q3: 2101  
##  2018-Q4: 1968  
##  2019-Q2: 1954  
##  (Other):11442

Visualisations

Complaints category (Jan 2017 to Jun 2020)

bv1= bc %>% group_by(category) %>% tally() %>% ggplot(aes(x=category, y=n, fill=n)) +  geom_bar(stat="identity") + theme_classic() + scale_fill_viridis() + coord_flip() + labs(x="Complaints types", y = "Count", title= "Complaint Categories", subtitle= "Brisbane Animal Complaints Jan 2017 to June 2020")
bv1

Animal type (Jan 2017 to Jun 2020)

bv2= bc %>% group_by(animal_type) %>% tally() %>% ggplot(aes(x=animal_type, y=n, fill=n)) +  geom_bar(stat="identity") + theme_classic() + scale_fill_viridis() + coord_flip() + labs(x="Animal types", y = "Count", title= "Animal Types", subtitle= "Brisbane Animal Complaints Jan 2017 to June 2020")
bv2

Number of complaints throughout the quarters

bv3= bc %>% group_by(yearquarter) %>% tally() %>% ggplot(aes(x=yearquarter, y=n, group=1)) + geom_line(color="black") + geom_point(color = "white", shape = 22, aes(fill = n), size = 5) + scale_fill_viridis_c(option = "plasma") + theme(axis.text.x = element_text(angle=90)) + labs(x = "Year-quarter", y = "Number of complaints", title = "Number of complaints throughout the quarters",subtitle= "Brisbane Animal Complaints Jan 2017 to June 2020")
bv3

Number of complaints, yearquarter, animal type

bv4= bc %>% group_by(yearquarter,animal_type) %>% tally() %>%
  ggplot( aes(x=yearquarter, y=n, group=animal_type, fill=animal_type)) +
    geom_area() +
    scale_fill_viridis(discrete = TRUE) +
    theme(legend.position="none") +
    ggtitle("Complaints by animal types from 2017-Q1 to 2020-Q2 ") +
    theme_minimal() + scale_x_discrete("2017-Q1 to 2020-Q2",breaks=2) +
    theme(
      legend.position="none",
      panel.spacing = unit(0.2, "lines"),
      strip.text.x = element_text(size = 8),
      plot.title = element_text(size=14)
    ) + 
    facet_wrap(~animal_type)
bv4